if_igb.c revision 270252
1/******************************************************************************
2
3  Copyright (c) 2001-2013, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/e1000/if_igb.c 270252 2014-08-20 23:34:36Z luigi $*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38
39#ifdef HAVE_KERNEL_OPTION_HEADERS
40#include "opt_device_polling.h"
41#include "opt_altq.h"
42#endif
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#ifndef IGB_LEGACY_TX
47#include <sys/buf_ring.h>
48#endif
49#include <sys/bus.h>
50#include <sys/endian.h>
51#include <sys/kernel.h>
52#include <sys/kthread.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55#include <sys/module.h>
56#include <sys/rman.h>
57#include <sys/socket.h>
58#include <sys/sockio.h>
59#include <sys/sysctl.h>
60#include <sys/taskqueue.h>
61#include <sys/eventhandler.h>
62#include <sys/pcpu.h>
63#include <sys/smp.h>
64#include <machine/smp.h>
65#include <machine/bus.h>
66#include <machine/resource.h>
67
68#include <net/bpf.h>
69#include <net/ethernet.h>
70#include <net/if.h>
71#include <net/if_arp.h>
72#include <net/if_dl.h>
73#include <net/if_media.h>
74
75#include <net/if_types.h>
76#include <net/if_vlan_var.h>
77
78#include <netinet/in_systm.h>
79#include <netinet/in.h>
80#include <netinet/if_ether.h>
81#include <netinet/ip.h>
82#include <netinet/ip6.h>
83#include <netinet/tcp.h>
84#include <netinet/tcp_lro.h>
85#include <netinet/udp.h>
86
87#include <machine/in_cksum.h>
88#include <dev/led/led.h>
89#include <dev/pci/pcivar.h>
90#include <dev/pci/pcireg.h>
91
92#include "e1000_api.h"
93#include "e1000_82575.h"
94#include "if_igb.h"
95
96/*********************************************************************
97 *  Set this to one to display debug statistics
98 *********************************************************************/
99int	igb_display_debug_stats = 0;
100
101/*********************************************************************
102 *  Driver version:
103 *********************************************************************/
104char igb_driver_version[] = "version - 2.4.0";
105
106
107/*********************************************************************
108 *  PCI Device ID Table
109 *
110 *  Used by probe to select devices to load on
111 *  Last field stores an index into e1000_strings
112 *  Last entry must be all 0s
113 *
114 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115 *********************************************************************/
116
117static igb_vendor_info_t igb_vendor_info_array[] =
118{
119	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132						PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
134						PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
143						PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
148						PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
156	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
157						PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_COPPER_FLASHLESS,
159						PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I210_SERDES_FLASHLESS,
161						PCI_ANY_ID, PCI_ANY_ID, 0},
162	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
163	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
164	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
165	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
166	{ 0x8086, E1000_DEV_ID_I354_BACKPLANE_1GBPS,
167						PCI_ANY_ID, PCI_ANY_ID, 0},
168	{ 0x8086, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS,
169						PCI_ANY_ID, PCI_ANY_ID, 0},
170	{ 0x8086, E1000_DEV_ID_I354_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
171	/* required last entry */
172	{ 0, 0, 0, 0, 0}
173};
174
175/*********************************************************************
176 *  Table of branding strings for all supported NICs.
177 *********************************************************************/
178
179static char *igb_strings[] = {
180	"Intel(R) PRO/1000 Network Connection"
181};
182
183/*********************************************************************
184 *  Function prototypes
185 *********************************************************************/
186static int	igb_probe(device_t);
187static int	igb_attach(device_t);
188static int	igb_detach(device_t);
189static int	igb_shutdown(device_t);
190static int	igb_suspend(device_t);
191static int	igb_resume(device_t);
192#ifndef IGB_LEGACY_TX
193static int	igb_mq_start(struct ifnet *, struct mbuf *);
194static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
195static void	igb_qflush(struct ifnet *);
196static void	igb_deferred_mq_start(void *, int);
197#else
198static void	igb_start(struct ifnet *);
199static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
200#endif
201static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
202static void	igb_init(void *);
203static void	igb_init_locked(struct adapter *);
204static void	igb_stop(void *);
205static void	igb_media_status(struct ifnet *, struct ifmediareq *);
206static int	igb_media_change(struct ifnet *);
207static void	igb_identify_hardware(struct adapter *);
208static int	igb_allocate_pci_resources(struct adapter *);
209static int	igb_allocate_msix(struct adapter *);
210static int	igb_allocate_legacy(struct adapter *);
211static int	igb_setup_msix(struct adapter *);
212static void	igb_free_pci_resources(struct adapter *);
213static void	igb_local_timer(void *);
214static void	igb_reset(struct adapter *);
215static int	igb_setup_interface(device_t, struct adapter *);
216static int	igb_allocate_queues(struct adapter *);
217static void	igb_configure_queues(struct adapter *);
218
219static int	igb_allocate_transmit_buffers(struct tx_ring *);
220static void	igb_setup_transmit_structures(struct adapter *);
221static void	igb_setup_transmit_ring(struct tx_ring *);
222static void	igb_initialize_transmit_units(struct adapter *);
223static void	igb_free_transmit_structures(struct adapter *);
224static void	igb_free_transmit_buffers(struct tx_ring *);
225
226static int	igb_allocate_receive_buffers(struct rx_ring *);
227static int	igb_setup_receive_structures(struct adapter *);
228static int	igb_setup_receive_ring(struct rx_ring *);
229static void	igb_initialize_receive_units(struct adapter *);
230static void	igb_free_receive_structures(struct adapter *);
231static void	igb_free_receive_buffers(struct rx_ring *);
232static void	igb_free_receive_ring(struct rx_ring *);
233
234static void	igb_enable_intr(struct adapter *);
235static void	igb_disable_intr(struct adapter *);
236static void	igb_update_stats_counters(struct adapter *);
237static bool	igb_txeof(struct tx_ring *);
238
239static __inline	void igb_rx_discard(struct rx_ring *, int);
240static __inline void igb_rx_input(struct rx_ring *,
241		    struct ifnet *, struct mbuf *, u32);
242
243static bool	igb_rxeof(struct igb_queue *, int, int *);
244static void	igb_rx_checksum(u32, struct mbuf *, u32);
245static int	igb_tx_ctx_setup(struct tx_ring *,
246		    struct mbuf *, u32 *, u32 *);
247static int	igb_tso_setup(struct tx_ring *,
248		    struct mbuf *, u32 *, u32 *);
249static void	igb_set_promisc(struct adapter *);
250static void	igb_disable_promisc(struct adapter *);
251static void	igb_set_multi(struct adapter *);
252static void	igb_update_link_status(struct adapter *);
253static void	igb_refresh_mbufs(struct rx_ring *, int);
254
255static void	igb_register_vlan(void *, struct ifnet *, u16);
256static void	igb_unregister_vlan(void *, struct ifnet *, u16);
257static void	igb_setup_vlan_hw_support(struct adapter *);
258
259static int	igb_xmit(struct tx_ring *, struct mbuf **);
260static int	igb_dma_malloc(struct adapter *, bus_size_t,
261		    struct igb_dma_alloc *, int);
262static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
263static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
264static void	igb_print_nvm_info(struct adapter *);
265static int 	igb_is_valid_ether_addr(u8 *);
266static void     igb_add_hw_stats(struct adapter *);
267
268static void	igb_vf_init_stats(struct adapter *);
269static void	igb_update_vf_stats_counters(struct adapter *);
270
271/* Management and WOL Support */
272static void	igb_init_manageability(struct adapter *);
273static void	igb_release_manageability(struct adapter *);
274static void     igb_get_hw_control(struct adapter *);
275static void     igb_release_hw_control(struct adapter *);
276static void     igb_enable_wakeup(device_t);
277static void     igb_led_func(void *, int);
278
279static int	igb_irq_fast(void *);
280static void	igb_msix_que(void *);
281static void	igb_msix_link(void *);
282static void	igb_handle_que(void *context, int pending);
283static void	igb_handle_link(void *context, int pending);
284static void	igb_handle_link_locked(struct adapter *);
285
286static void	igb_set_sysctl_value(struct adapter *, const char *,
287		    const char *, int *, int);
288static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
289static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
290static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
291
292#ifdef DEVICE_POLLING
293static poll_handler_t igb_poll;
294#endif /* POLLING */
295
296/*********************************************************************
297 *  FreeBSD Device Interface Entry Points
298 *********************************************************************/
299
300static device_method_t igb_methods[] = {
301	/* Device interface */
302	DEVMETHOD(device_probe, igb_probe),
303	DEVMETHOD(device_attach, igb_attach),
304	DEVMETHOD(device_detach, igb_detach),
305	DEVMETHOD(device_shutdown, igb_shutdown),
306	DEVMETHOD(device_suspend, igb_suspend),
307	DEVMETHOD(device_resume, igb_resume),
308	DEVMETHOD_END
309};
310
311static driver_t igb_driver = {
312	"igb", igb_methods, sizeof(struct adapter),
313};
314
315static devclass_t igb_devclass;
316DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
317MODULE_DEPEND(igb, pci, 1, 1, 1);
318MODULE_DEPEND(igb, ether, 1, 1, 1);
319
320/*********************************************************************
321 *  Tunable default values.
322 *********************************************************************/
323
324static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
325
326/* Descriptor defaults */
327static int igb_rxd = IGB_DEFAULT_RXD;
328static int igb_txd = IGB_DEFAULT_TXD;
329TUNABLE_INT("hw.igb.rxd", &igb_rxd);
330TUNABLE_INT("hw.igb.txd", &igb_txd);
331SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
332    "Number of receive descriptors per queue");
333SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
334    "Number of transmit descriptors per queue");
335
336/*
337** AIM: Adaptive Interrupt Moderation
338** which means that the interrupt rate
339** is varied over time based on the
340** traffic for that interrupt vector
341*/
342static int igb_enable_aim = TRUE;
343TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
344SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
345    "Enable adaptive interrupt moderation");
346
347/*
348 * MSIX should be the default for best performance,
349 * but this allows it to be forced off for testing.
350 */
351static int igb_enable_msix = 1;
352TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
353SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
354    "Enable MSI-X interrupts");
355
356/*
357** Tuneable Interrupt rate
358*/
359static int igb_max_interrupt_rate = 8000;
360TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
361SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
362    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
363
364#ifndef IGB_LEGACY_TX
365/*
366** Tuneable number of buffers in the buf-ring (drbr_xxx)
367*/
368static int igb_buf_ring_size = IGB_BR_SIZE;
369TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
370SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
371    &igb_buf_ring_size, 0, "Size of the bufring");
372#endif
373
374/*
375** Header split causes the packet header to
376** be dma'd to a seperate mbuf from the payload.
377** this can have memory alignment benefits. But
378** another plus is that small packets often fit
379** into the header and thus use no cluster. Its
380** a very workload dependent type feature.
381*/
382static int igb_header_split = FALSE;
383TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
384SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
385    "Enable receive mbuf header split");
386
387/*
388** This will autoconfigure based on the
389** number of CPUs and max supported
390** MSIX messages if left at 0.
391*/
392static int igb_num_queues = 0;
393TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
394SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
395    "Number of queues to configure, 0 indicates autoconfigure");
396
397/*
398** Global variable to store last used CPU when binding queues
399** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
400** queue is bound to a cpu.
401*/
402static int igb_last_bind_cpu = -1;
403
404/* How many packets rxeof tries to clean at a time */
405static int igb_rx_process_limit = 100;
406TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
407SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
408    &igb_rx_process_limit, 0,
409    "Maximum number of received packets to process at a time, -1 means unlimited");
410
411#ifdef DEV_NETMAP	/* see ixgbe.c for details */
412#include <dev/netmap/if_igb_netmap.h>
413#endif /* DEV_NETMAP */
414/*********************************************************************
415 *  Device identification routine
416 *
417 *  igb_probe determines if the driver should be loaded on
418 *  adapter based on PCI vendor/device id of the adapter.
419 *
420 *  return BUS_PROBE_DEFAULT on success, positive on failure
421 *********************************************************************/
422
423static int
424igb_probe(device_t dev)
425{
426	char		adapter_name[60];
427	uint16_t	pci_vendor_id = 0;
428	uint16_t	pci_device_id = 0;
429	uint16_t	pci_subvendor_id = 0;
430	uint16_t	pci_subdevice_id = 0;
431	igb_vendor_info_t *ent;
432
433	INIT_DEBUGOUT("igb_probe: begin");
434
435	pci_vendor_id = pci_get_vendor(dev);
436	if (pci_vendor_id != IGB_VENDOR_ID)
437		return (ENXIO);
438
439	pci_device_id = pci_get_device(dev);
440	pci_subvendor_id = pci_get_subvendor(dev);
441	pci_subdevice_id = pci_get_subdevice(dev);
442
443	ent = igb_vendor_info_array;
444	while (ent->vendor_id != 0) {
445		if ((pci_vendor_id == ent->vendor_id) &&
446		    (pci_device_id == ent->device_id) &&
447
448		    ((pci_subvendor_id == ent->subvendor_id) ||
449		    (ent->subvendor_id == PCI_ANY_ID)) &&
450
451		    ((pci_subdevice_id == ent->subdevice_id) ||
452		    (ent->subdevice_id == PCI_ANY_ID))) {
453			sprintf(adapter_name, "%s %s",
454				igb_strings[ent->index],
455				igb_driver_version);
456			device_set_desc_copy(dev, adapter_name);
457			return (BUS_PROBE_DEFAULT);
458		}
459		ent++;
460	}
461
462	return (ENXIO);
463}
464
465/*********************************************************************
466 *  Device initialization routine
467 *
468 *  The attach entry point is called when the driver is being loaded.
469 *  This routine identifies the type of hardware, allocates all resources
470 *  and initializes the hardware.
471 *
472 *  return 0 on success, positive on failure
473 *********************************************************************/
474
475static int
476igb_attach(device_t dev)
477{
478	struct adapter	*adapter;
479	int		error = 0;
480	u16		eeprom_data;
481
482	INIT_DEBUGOUT("igb_attach: begin");
483
484	if (resource_disabled("igb", device_get_unit(dev))) {
485		device_printf(dev, "Disabled by device hint\n");
486		return (ENXIO);
487	}
488
489	adapter = device_get_softc(dev);
490	adapter->dev = adapter->osdep.dev = dev;
491	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
492
493	/* SYSCTL stuff */
494	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
495	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
496	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
497	    igb_sysctl_nvm_info, "I", "NVM Information");
498
499	igb_set_sysctl_value(adapter, "enable_aim",
500	    "Interrupt Moderation", &adapter->enable_aim,
501	    igb_enable_aim);
502
503	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
504	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
505	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
506	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
507
508	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
509
510	/* Determine hardware and mac info */
511	igb_identify_hardware(adapter);
512
513	/* Setup PCI resources */
514	if (igb_allocate_pci_resources(adapter)) {
515		device_printf(dev, "Allocation of PCI resources failed\n");
516		error = ENXIO;
517		goto err_pci;
518	}
519
520	/* Do Shared Code initialization */
521	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
522		device_printf(dev, "Setup of Shared code failed\n");
523		error = ENXIO;
524		goto err_pci;
525	}
526
527	e1000_get_bus_info(&adapter->hw);
528
529	/* Sysctl for limiting the amount of work done in the taskqueue */
530	igb_set_sysctl_value(adapter, "rx_processing_limit",
531	    "max number of rx packets to process",
532	    &adapter->rx_process_limit, igb_rx_process_limit);
533
534	/*
535	 * Validate number of transmit and receive descriptors. It
536	 * must not exceed hardware maximum, and must be multiple
537	 * of E1000_DBA_ALIGN.
538	 */
539	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
540	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
541		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
542		    IGB_DEFAULT_TXD, igb_txd);
543		adapter->num_tx_desc = IGB_DEFAULT_TXD;
544	} else
545		adapter->num_tx_desc = igb_txd;
546	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
547	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
548		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
549		    IGB_DEFAULT_RXD, igb_rxd);
550		adapter->num_rx_desc = IGB_DEFAULT_RXD;
551	} else
552		adapter->num_rx_desc = igb_rxd;
553
554	adapter->hw.mac.autoneg = DO_AUTO_NEG;
555	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
556	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
557
558	/* Copper options */
559	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
560		adapter->hw.phy.mdix = AUTO_ALL_MODES;
561		adapter->hw.phy.disable_polarity_correction = FALSE;
562		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
563	}
564
565	/*
566	 * Set the frame limits assuming
567	 * standard ethernet sized frames.
568	 */
569	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
570
571	/*
572	** Allocate and Setup Queues
573	*/
574	if (igb_allocate_queues(adapter)) {
575		error = ENOMEM;
576		goto err_pci;
577	}
578
579	/* Allocate the appropriate stats memory */
580	if (adapter->vf_ifp) {
581		adapter->stats =
582		    (struct e1000_vf_stats *)malloc(sizeof \
583		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
584		igb_vf_init_stats(adapter);
585	} else
586		adapter->stats =
587		    (struct e1000_hw_stats *)malloc(sizeof \
588		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
589	if (adapter->stats == NULL) {
590		device_printf(dev, "Can not allocate stats memory\n");
591		error = ENOMEM;
592		goto err_late;
593	}
594
595	/* Allocate multicast array memory. */
596	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
597	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
598	if (adapter->mta == NULL) {
599		device_printf(dev, "Can not allocate multicast setup array\n");
600		error = ENOMEM;
601		goto err_late;
602	}
603
604	/* Some adapter-specific advanced features */
605	if (adapter->hw.mac.type >= e1000_i350) {
606		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
607		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
608		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
609		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
610		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
611		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
612		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
613		    adapter, 0, igb_sysctl_eee, "I",
614		    "Disable Energy Efficient Ethernet");
615		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
616			if (adapter->hw.mac.type == e1000_i354)
617				e1000_set_eee_i354(&adapter->hw);
618			else
619				e1000_set_eee_i350(&adapter->hw);
620		}
621	}
622
623	/*
624	** Start from a known state, this is
625	** important in reading the nvm and
626	** mac from that.
627	*/
628	e1000_reset_hw(&adapter->hw);
629
630	/* Make sure we have a good EEPROM before we read from it */
631	if (((adapter->hw.mac.type != e1000_i210) &&
632	    (adapter->hw.mac.type != e1000_i211)) &&
633	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
634		/*
635		** Some PCI-E parts fail the first check due to
636		** the link being in sleep state, call it again,
637		** if it fails a second time its a real issue.
638		*/
639		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
640			device_printf(dev,
641			    "The EEPROM Checksum Is Not Valid\n");
642			error = EIO;
643			goto err_late;
644		}
645	}
646
647	/*
648	** Copy the permanent MAC address out of the EEPROM
649	*/
650	if (e1000_read_mac_addr(&adapter->hw) < 0) {
651		device_printf(dev, "EEPROM read error while reading MAC"
652		    " address\n");
653		error = EIO;
654		goto err_late;
655	}
656	/* Check its sanity */
657	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
658		device_printf(dev, "Invalid MAC address\n");
659		error = EIO;
660		goto err_late;
661	}
662
663	/* Setup OS specific network interface */
664	if (igb_setup_interface(dev, adapter) != 0)
665		goto err_late;
666
667	/* Now get a good starting state */
668	igb_reset(adapter);
669
670	/* Initialize statistics */
671	igb_update_stats_counters(adapter);
672
673	adapter->hw.mac.get_link_status = 1;
674	igb_update_link_status(adapter);
675
676	/* Indicate SOL/IDER usage */
677	if (e1000_check_reset_block(&adapter->hw))
678		device_printf(dev,
679		    "PHY reset is blocked due to SOL/IDER session.\n");
680
681	/* Determine if we have to control management hardware */
682	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
683
684	/*
685	 * Setup Wake-on-Lan
686	 */
687	/* APME bit in EEPROM is mapped to WUC.APME */
688	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
689	if (eeprom_data)
690		adapter->wol = E1000_WUFC_MAG;
691
692	/* Register for VLAN events */
693	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
694	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
695	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
696	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
697
698	igb_add_hw_stats(adapter);
699
700	/* Tell the stack that the interface is not active */
701	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
702	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
703
704	adapter->led_dev = led_create(igb_led_func, adapter,
705	    device_get_nameunit(dev));
706
707	/*
708	** Configure Interrupts
709	*/
710	if ((adapter->msix > 1) && (igb_enable_msix))
711		error = igb_allocate_msix(adapter);
712	else /* MSI or Legacy */
713		error = igb_allocate_legacy(adapter);
714	if (error)
715		goto err_late;
716
717#ifdef DEV_NETMAP
718	igb_netmap_attach(adapter);
719#endif /* DEV_NETMAP */
720	INIT_DEBUGOUT("igb_attach: end");
721
722	return (0);
723
724err_late:
725	igb_detach(dev);
726	igb_free_transmit_structures(adapter);
727	igb_free_receive_structures(adapter);
728	igb_release_hw_control(adapter);
729err_pci:
730	igb_free_pci_resources(adapter);
731	if (adapter->ifp != NULL)
732		if_free(adapter->ifp);
733	free(adapter->mta, M_DEVBUF);
734	IGB_CORE_LOCK_DESTROY(adapter);
735
736	return (error);
737}
738
739/*********************************************************************
740 *  Device removal routine
741 *
742 *  The detach entry point is called when the driver is being removed.
743 *  This routine stops the adapter and deallocates all the resources
744 *  that were allocated for driver operation.
745 *
746 *  return 0 on success, positive on failure
747 *********************************************************************/
748
749static int
750igb_detach(device_t dev)
751{
752	struct adapter	*adapter = device_get_softc(dev);
753	struct ifnet	*ifp = adapter->ifp;
754
755	INIT_DEBUGOUT("igb_detach: begin");
756
757	/* Make sure VLANS are not using driver */
758	if (adapter->ifp->if_vlantrunk != NULL) {
759		device_printf(dev,"Vlan in use, detach first\n");
760		return (EBUSY);
761	}
762
763	ether_ifdetach(adapter->ifp);
764
765	if (adapter->led_dev != NULL)
766		led_destroy(adapter->led_dev);
767
768#ifdef DEVICE_POLLING
769	if (ifp->if_capenable & IFCAP_POLLING)
770		ether_poll_deregister(ifp);
771#endif
772
773	IGB_CORE_LOCK(adapter);
774	adapter->in_detach = 1;
775	igb_stop(adapter);
776	IGB_CORE_UNLOCK(adapter);
777
778	e1000_phy_hw_reset(&adapter->hw);
779
780	/* Give control back to firmware */
781	igb_release_manageability(adapter);
782	igb_release_hw_control(adapter);
783
784	if (adapter->wol) {
785		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
786		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
787		igb_enable_wakeup(dev);
788	}
789
790	/* Unregister VLAN events */
791	if (adapter->vlan_attach != NULL)
792		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
793	if (adapter->vlan_detach != NULL)
794		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
795
796	callout_drain(&adapter->timer);
797
798#ifdef DEV_NETMAP
799	netmap_detach(adapter->ifp);
800#endif /* DEV_NETMAP */
801	igb_free_pci_resources(adapter);
802	bus_generic_detach(dev);
803	if_free(ifp);
804
805	igb_free_transmit_structures(adapter);
806	igb_free_receive_structures(adapter);
807	if (adapter->mta != NULL)
808		free(adapter->mta, M_DEVBUF);
809
810	IGB_CORE_LOCK_DESTROY(adapter);
811
812	return (0);
813}
814
815/*********************************************************************
816 *
817 *  Shutdown entry point
818 *
819 **********************************************************************/
820
821static int
822igb_shutdown(device_t dev)
823{
824	return igb_suspend(dev);
825}
826
827/*
828 * Suspend/resume device methods.
829 */
830static int
831igb_suspend(device_t dev)
832{
833	struct adapter *adapter = device_get_softc(dev);
834
835	IGB_CORE_LOCK(adapter);
836
837	igb_stop(adapter);
838
839        igb_release_manageability(adapter);
840	igb_release_hw_control(adapter);
841
842        if (adapter->wol) {
843                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
844                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
845                igb_enable_wakeup(dev);
846        }
847
848	IGB_CORE_UNLOCK(adapter);
849
850	return bus_generic_suspend(dev);
851}
852
853static int
854igb_resume(device_t dev)
855{
856	struct adapter *adapter = device_get_softc(dev);
857	struct tx_ring	*txr = adapter->tx_rings;
858	struct ifnet *ifp = adapter->ifp;
859
860	IGB_CORE_LOCK(adapter);
861	igb_init_locked(adapter);
862	igb_init_manageability(adapter);
863
864	if ((ifp->if_flags & IFF_UP) &&
865	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
866		for (int i = 0; i < adapter->num_queues; i++, txr++) {
867			IGB_TX_LOCK(txr);
868#ifndef IGB_LEGACY_TX
869			/* Process the stack queue only if not depleted */
870			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
871			    !drbr_empty(ifp, txr->br))
872				igb_mq_start_locked(ifp, txr);
873#else
874			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
875				igb_start_locked(txr, ifp);
876#endif
877			IGB_TX_UNLOCK(txr);
878		}
879	}
880	IGB_CORE_UNLOCK(adapter);
881
882	return bus_generic_resume(dev);
883}
884
885
886#ifdef IGB_LEGACY_TX
887
888/*********************************************************************
889 *  Transmit entry point
890 *
891 *  igb_start is called by the stack to initiate a transmit.
892 *  The driver will remain in this routine as long as there are
893 *  packets to transmit and transmit resources are available.
894 *  In case resources are not available stack is notified and
895 *  the packet is requeued.
896 **********************************************************************/
897
898static void
899igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
900{
901	struct adapter	*adapter = ifp->if_softc;
902	struct mbuf	*m_head;
903
904	IGB_TX_LOCK_ASSERT(txr);
905
906	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
907	    IFF_DRV_RUNNING)
908		return;
909	if (!adapter->link_active)
910		return;
911
912	/* Call cleanup if number of TX descriptors low */
913	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
914		igb_txeof(txr);
915
916	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
917		if (txr->tx_avail <= IGB_MAX_SCATTER) {
918			txr->queue_status |= IGB_QUEUE_DEPLETED;
919			break;
920		}
921		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
922		if (m_head == NULL)
923			break;
924		/*
925		 *  Encapsulation can modify our pointer, and or make it
926		 *  NULL on failure.  In that event, we can't requeue.
927		 */
928		if (igb_xmit(txr, &m_head)) {
929			if (m_head != NULL)
930				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
931			if (txr->tx_avail <= IGB_MAX_SCATTER)
932				txr->queue_status |= IGB_QUEUE_DEPLETED;
933			break;
934		}
935
936		/* Send a copy of the frame to the BPF listener */
937		ETHER_BPF_MTAP(ifp, m_head);
938
939		/* Set watchdog on */
940		txr->watchdog_time = ticks;
941		txr->queue_status |= IGB_QUEUE_WORKING;
942	}
943}
944
945/*
946 * Legacy TX driver routine, called from the
947 * stack, always uses tx[0], and spins for it.
948 * Should not be used with multiqueue tx
949 */
950static void
951igb_start(struct ifnet *ifp)
952{
953	struct adapter	*adapter = ifp->if_softc;
954	struct tx_ring	*txr = adapter->tx_rings;
955
956	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
957		IGB_TX_LOCK(txr);
958		igb_start_locked(txr, ifp);
959		IGB_TX_UNLOCK(txr);
960	}
961	return;
962}
963
964#else /* ~IGB_LEGACY_TX */
965
966/*
967** Multiqueue Transmit Entry:
968**  quick turnaround to the stack
969**
970*/
971static int
972igb_mq_start(struct ifnet *ifp, struct mbuf *m)
973{
974	struct adapter		*adapter = ifp->if_softc;
975	struct igb_queue	*que;
976	struct tx_ring		*txr;
977	int 			i, err = 0;
978
979	/* Which queue to use */
980	if ((m->m_flags & M_FLOWID) != 0)
981		i = m->m_pkthdr.flowid % adapter->num_queues;
982	else
983		i = curcpu % adapter->num_queues;
984	txr = &adapter->tx_rings[i];
985	que = &adapter->queues[i];
986
987	err = drbr_enqueue(ifp, txr->br, m);
988	if (err)
989		return (err);
990	if (IGB_TX_TRYLOCK(txr)) {
991		igb_mq_start_locked(ifp, txr);
992		IGB_TX_UNLOCK(txr);
993	} else
994		taskqueue_enqueue(que->tq, &txr->txq_task);
995
996	return (0);
997}
998
999static int
1000igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
1001{
1002	struct adapter  *adapter = txr->adapter;
1003        struct mbuf     *next;
1004        int             err = 0, enq = 0;
1005
1006	IGB_TX_LOCK_ASSERT(txr);
1007
1008	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
1009	    adapter->link_active == 0)
1010		return (ENETDOWN);
1011
1012
1013	/* Process the queue */
1014	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
1015		if ((err = igb_xmit(txr, &next)) != 0) {
1016			if (next == NULL) {
1017				/* It was freed, move forward */
1018				drbr_advance(ifp, txr->br);
1019			} else {
1020				/*
1021				 * Still have one left, it may not be
1022				 * the same since the transmit function
1023				 * may have changed it.
1024				 */
1025				drbr_putback(ifp, txr->br, next);
1026			}
1027			break;
1028		}
1029		drbr_advance(ifp, txr->br);
1030		enq++;
1031		ifp->if_obytes += next->m_pkthdr.len;
1032		if (next->m_flags & M_MCAST)
1033			ifp->if_omcasts++;
1034		ETHER_BPF_MTAP(ifp, next);
1035		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1036			break;
1037	}
1038	if (enq > 0) {
1039		/* Set the watchdog */
1040		txr->queue_status |= IGB_QUEUE_WORKING;
1041		txr->watchdog_time = ticks;
1042	}
1043	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1044		igb_txeof(txr);
1045	if (txr->tx_avail <= IGB_MAX_SCATTER)
1046		txr->queue_status |= IGB_QUEUE_DEPLETED;
1047	return (err);
1048}
1049
1050/*
1051 * Called from a taskqueue to drain queued transmit packets.
1052 */
1053static void
1054igb_deferred_mq_start(void *arg, int pending)
1055{
1056	struct tx_ring *txr = arg;
1057	struct adapter *adapter = txr->adapter;
1058	struct ifnet *ifp = adapter->ifp;
1059
1060	IGB_TX_LOCK(txr);
1061	if (!drbr_empty(ifp, txr->br))
1062		igb_mq_start_locked(ifp, txr);
1063	IGB_TX_UNLOCK(txr);
1064}
1065
1066/*
1067** Flush all ring buffers
1068*/
1069static void
1070igb_qflush(struct ifnet *ifp)
1071{
1072	struct adapter	*adapter = ifp->if_softc;
1073	struct tx_ring	*txr = adapter->tx_rings;
1074	struct mbuf	*m;
1075
1076	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1077		IGB_TX_LOCK(txr);
1078		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1079			m_freem(m);
1080		IGB_TX_UNLOCK(txr);
1081	}
1082	if_qflush(ifp);
1083}
1084#endif /* ~IGB_LEGACY_TX */
1085
1086/*********************************************************************
1087 *  Ioctl entry point
1088 *
1089 *  igb_ioctl is called when the user wants to configure the
1090 *  interface.
1091 *
1092 *  return 0 on success, positive on failure
1093 **********************************************************************/
1094
1095static int
1096igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1097{
1098	struct adapter	*adapter = ifp->if_softc;
1099	struct ifreq	*ifr = (struct ifreq *)data;
1100#if defined(INET) || defined(INET6)
1101	struct ifaddr	*ifa = (struct ifaddr *)data;
1102#endif
1103	bool		avoid_reset = FALSE;
1104	int		error = 0;
1105
1106	if (adapter->in_detach)
1107		return (error);
1108
1109	switch (command) {
1110	case SIOCSIFADDR:
1111#ifdef INET
1112		if (ifa->ifa_addr->sa_family == AF_INET)
1113			avoid_reset = TRUE;
1114#endif
1115#ifdef INET6
1116		if (ifa->ifa_addr->sa_family == AF_INET6)
1117			avoid_reset = TRUE;
1118#endif
1119		/*
1120		** Calling init results in link renegotiation,
1121		** so we avoid doing it when possible.
1122		*/
1123		if (avoid_reset) {
1124			ifp->if_flags |= IFF_UP;
1125			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1126				igb_init(adapter);
1127#ifdef INET
1128			if (!(ifp->if_flags & IFF_NOARP))
1129				arp_ifinit(ifp, ifa);
1130#endif
1131		} else
1132			error = ether_ioctl(ifp, command, data);
1133		break;
1134	case SIOCSIFMTU:
1135	    {
1136		int max_frame_size;
1137
1138		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1139
1140		IGB_CORE_LOCK(adapter);
1141		max_frame_size = 9234;
1142		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1143		    ETHER_CRC_LEN) {
1144			IGB_CORE_UNLOCK(adapter);
1145			error = EINVAL;
1146			break;
1147		}
1148
1149		ifp->if_mtu = ifr->ifr_mtu;
1150		adapter->max_frame_size =
1151		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1152		igb_init_locked(adapter);
1153		IGB_CORE_UNLOCK(adapter);
1154		break;
1155	    }
1156	case SIOCSIFFLAGS:
1157		IOCTL_DEBUGOUT("ioctl rcv'd:\
1158		    SIOCSIFFLAGS (Set Interface Flags)");
1159		IGB_CORE_LOCK(adapter);
1160		if (ifp->if_flags & IFF_UP) {
1161			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1162				if ((ifp->if_flags ^ adapter->if_flags) &
1163				    (IFF_PROMISC | IFF_ALLMULTI)) {
1164					igb_disable_promisc(adapter);
1165					igb_set_promisc(adapter);
1166				}
1167			} else
1168				igb_init_locked(adapter);
1169		} else
1170			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1171				igb_stop(adapter);
1172		adapter->if_flags = ifp->if_flags;
1173		IGB_CORE_UNLOCK(adapter);
1174		break;
1175	case SIOCADDMULTI:
1176	case SIOCDELMULTI:
1177		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1178		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1179			IGB_CORE_LOCK(adapter);
1180			igb_disable_intr(adapter);
1181			igb_set_multi(adapter);
1182#ifdef DEVICE_POLLING
1183			if (!(ifp->if_capenable & IFCAP_POLLING))
1184#endif
1185				igb_enable_intr(adapter);
1186			IGB_CORE_UNLOCK(adapter);
1187		}
1188		break;
1189	case SIOCSIFMEDIA:
1190		/* Check SOL/IDER usage */
1191		IGB_CORE_LOCK(adapter);
1192		if (e1000_check_reset_block(&adapter->hw)) {
1193			IGB_CORE_UNLOCK(adapter);
1194			device_printf(adapter->dev, "Media change is"
1195			    " blocked due to SOL/IDER session.\n");
1196			break;
1197		}
1198		IGB_CORE_UNLOCK(adapter);
1199	case SIOCGIFMEDIA:
1200		IOCTL_DEBUGOUT("ioctl rcv'd: \
1201		    SIOCxIFMEDIA (Get/Set Interface Media)");
1202		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1203		break;
1204	case SIOCSIFCAP:
1205	    {
1206		int mask, reinit;
1207
1208		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1209		reinit = 0;
1210		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1211#ifdef DEVICE_POLLING
1212		if (mask & IFCAP_POLLING) {
1213			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1214				error = ether_poll_register(igb_poll, ifp);
1215				if (error)
1216					return (error);
1217				IGB_CORE_LOCK(adapter);
1218				igb_disable_intr(adapter);
1219				ifp->if_capenable |= IFCAP_POLLING;
1220				IGB_CORE_UNLOCK(adapter);
1221			} else {
1222				error = ether_poll_deregister(ifp);
1223				/* Enable interrupt even in error case */
1224				IGB_CORE_LOCK(adapter);
1225				igb_enable_intr(adapter);
1226				ifp->if_capenable &= ~IFCAP_POLLING;
1227				IGB_CORE_UNLOCK(adapter);
1228			}
1229		}
1230#endif
1231		if (mask & IFCAP_HWCSUM) {
1232			ifp->if_capenable ^= IFCAP_HWCSUM;
1233			reinit = 1;
1234		}
1235		if (mask & IFCAP_TSO4) {
1236			ifp->if_capenable ^= IFCAP_TSO4;
1237			reinit = 1;
1238		}
1239		if (mask & IFCAP_TSO6) {
1240			ifp->if_capenable ^= IFCAP_TSO6;
1241			reinit = 1;
1242		}
1243		if (mask & IFCAP_VLAN_HWTAGGING) {
1244			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1245			reinit = 1;
1246		}
1247		if (mask & IFCAP_VLAN_HWFILTER) {
1248			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1249			reinit = 1;
1250		}
1251		if (mask & IFCAP_VLAN_HWTSO) {
1252			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1253			reinit = 1;
1254		}
1255		if (mask & IFCAP_LRO) {
1256			ifp->if_capenable ^= IFCAP_LRO;
1257			reinit = 1;
1258		}
1259		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1260			igb_init(adapter);
1261		VLAN_CAPABILITIES(ifp);
1262		break;
1263	    }
1264
1265	default:
1266		error = ether_ioctl(ifp, command, data);
1267		break;
1268	}
1269
1270	return (error);
1271}
1272
1273
1274/*********************************************************************
1275 *  Init entry point
1276 *
1277 *  This routine is used in two ways. It is used by the stack as
1278 *  init entry point in network interface structure. It is also used
1279 *  by the driver as a hw/sw initialization routine to get to a
1280 *  consistent state.
1281 *
1282 *  return 0 on success, positive on failure
1283 **********************************************************************/
1284
1285static void
1286igb_init_locked(struct adapter *adapter)
1287{
1288	struct ifnet	*ifp = adapter->ifp;
1289	device_t	dev = adapter->dev;
1290
1291	INIT_DEBUGOUT("igb_init: begin");
1292
1293	IGB_CORE_LOCK_ASSERT(adapter);
1294
1295	igb_disable_intr(adapter);
1296	callout_stop(&adapter->timer);
1297
1298	/* Get the latest mac address, User can use a LAA */
1299        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1300              ETHER_ADDR_LEN);
1301
1302	/* Put the address into the Receive Address Array */
1303	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1304
1305	igb_reset(adapter);
1306	igb_update_link_status(adapter);
1307
1308	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1309
1310	/* Set hardware offload abilities */
1311	ifp->if_hwassist = 0;
1312	if (ifp->if_capenable & IFCAP_TXCSUM) {
1313		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1314#if __FreeBSD_version >= 800000
1315		if (adapter->hw.mac.type == e1000_82576)
1316			ifp->if_hwassist |= CSUM_SCTP;
1317#endif
1318	}
1319
1320	if (ifp->if_capenable & IFCAP_TSO)
1321		ifp->if_hwassist |= CSUM_TSO;
1322
1323	/* Configure for OS presence */
1324	igb_init_manageability(adapter);
1325
1326	/* Prepare transmit descriptors and buffers */
1327	igb_setup_transmit_structures(adapter);
1328	igb_initialize_transmit_units(adapter);
1329
1330	/* Setup Multicast table */
1331	igb_set_multi(adapter);
1332
1333	/*
1334	** Figure out the desired mbuf pool
1335	** for doing jumbo/packetsplit
1336	*/
1337	if (adapter->max_frame_size <= 2048)
1338		adapter->rx_mbuf_sz = MCLBYTES;
1339	else if (adapter->max_frame_size <= 4096)
1340		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1341	else
1342		adapter->rx_mbuf_sz = MJUM9BYTES;
1343
1344	/* Prepare receive descriptors and buffers */
1345	if (igb_setup_receive_structures(adapter)) {
1346		device_printf(dev, "Could not setup receive structures\n");
1347		return;
1348	}
1349	igb_initialize_receive_units(adapter);
1350
1351        /* Enable VLAN support */
1352	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1353		igb_setup_vlan_hw_support(adapter);
1354
1355	/* Don't lose promiscuous settings */
1356	igb_set_promisc(adapter);
1357
1358	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1359	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1360
1361	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1362	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1363
1364	if (adapter->msix > 1) /* Set up queue routing */
1365		igb_configure_queues(adapter);
1366
1367	/* this clears any pending interrupts */
1368	E1000_READ_REG(&adapter->hw, E1000_ICR);
1369#ifdef DEVICE_POLLING
1370	/*
1371	 * Only enable interrupts if we are not polling, make sure
1372	 * they are off otherwise.
1373	 */
1374	if (ifp->if_capenable & IFCAP_POLLING)
1375		igb_disable_intr(adapter);
1376	else
1377#endif /* DEVICE_POLLING */
1378	{
1379		igb_enable_intr(adapter);
1380		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1381	}
1382
1383	/* Set Energy Efficient Ethernet */
1384	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1385		if (adapter->hw.mac.type == e1000_i354)
1386			e1000_set_eee_i354(&adapter->hw);
1387		else
1388			e1000_set_eee_i350(&adapter->hw);
1389	}
1390}
1391
1392static void
1393igb_init(void *arg)
1394{
1395	struct adapter *adapter = arg;
1396
1397	IGB_CORE_LOCK(adapter);
1398	igb_init_locked(adapter);
1399	IGB_CORE_UNLOCK(adapter);
1400}
1401
1402
1403static void
1404igb_handle_que(void *context, int pending)
1405{
1406	struct igb_queue *que = context;
1407	struct adapter *adapter = que->adapter;
1408	struct tx_ring *txr = que->txr;
1409	struct ifnet	*ifp = adapter->ifp;
1410
1411	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1412		bool	more;
1413
1414		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1415
1416		IGB_TX_LOCK(txr);
1417		igb_txeof(txr);
1418#ifndef IGB_LEGACY_TX
1419		/* Process the stack queue only if not depleted */
1420		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1421		    !drbr_empty(ifp, txr->br))
1422			igb_mq_start_locked(ifp, txr);
1423#else
1424		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1425			igb_start_locked(txr, ifp);
1426#endif
1427		IGB_TX_UNLOCK(txr);
1428		/* Do we need another? */
1429		if (more) {
1430			taskqueue_enqueue(que->tq, &que->que_task);
1431			return;
1432		}
1433	}
1434
1435#ifdef DEVICE_POLLING
1436	if (ifp->if_capenable & IFCAP_POLLING)
1437		return;
1438#endif
1439	/* Reenable this interrupt */
1440	if (que->eims)
1441		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1442	else
1443		igb_enable_intr(adapter);
1444}
1445
1446/* Deal with link in a sleepable context */
1447static void
1448igb_handle_link(void *context, int pending)
1449{
1450	struct adapter *adapter = context;
1451
1452	IGB_CORE_LOCK(adapter);
1453	igb_handle_link_locked(adapter);
1454	IGB_CORE_UNLOCK(adapter);
1455}
1456
1457static void
1458igb_handle_link_locked(struct adapter *adapter)
1459{
1460	struct tx_ring	*txr = adapter->tx_rings;
1461	struct ifnet *ifp = adapter->ifp;
1462
1463	IGB_CORE_LOCK_ASSERT(adapter);
1464	adapter->hw.mac.get_link_status = 1;
1465	igb_update_link_status(adapter);
1466	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1467		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1468			IGB_TX_LOCK(txr);
1469#ifndef IGB_LEGACY_TX
1470			/* Process the stack queue only if not depleted */
1471			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1472			    !drbr_empty(ifp, txr->br))
1473				igb_mq_start_locked(ifp, txr);
1474#else
1475			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1476				igb_start_locked(txr, ifp);
1477#endif
1478			IGB_TX_UNLOCK(txr);
1479		}
1480	}
1481}
1482
1483/*********************************************************************
1484 *
1485 *  MSI/Legacy Deferred
1486 *  Interrupt Service routine
1487 *
1488 *********************************************************************/
1489static int
1490igb_irq_fast(void *arg)
1491{
1492	struct adapter		*adapter = arg;
1493	struct igb_queue	*que = adapter->queues;
1494	u32			reg_icr;
1495
1496
1497	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1498
1499	/* Hot eject?  */
1500	if (reg_icr == 0xffffffff)
1501		return FILTER_STRAY;
1502
1503	/* Definitely not our interrupt.  */
1504	if (reg_icr == 0x0)
1505		return FILTER_STRAY;
1506
1507	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1508		return FILTER_STRAY;
1509
1510	/*
1511	 * Mask interrupts until the taskqueue is finished running.  This is
1512	 * cheap, just assume that it is needed.  This also works around the
1513	 * MSI message reordering errata on certain systems.
1514	 */
1515	igb_disable_intr(adapter);
1516	taskqueue_enqueue(que->tq, &que->que_task);
1517
1518	/* Link status change */
1519	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1520		taskqueue_enqueue(que->tq, &adapter->link_task);
1521
1522	if (reg_icr & E1000_ICR_RXO)
1523		adapter->rx_overruns++;
1524	return FILTER_HANDLED;
1525}
1526
1527#ifdef DEVICE_POLLING
1528#if __FreeBSD_version >= 800000
1529#define POLL_RETURN_COUNT(a) (a)
1530static int
1531#else
1532#define POLL_RETURN_COUNT(a)
1533static void
1534#endif
1535igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1536{
1537	struct adapter		*adapter = ifp->if_softc;
1538	struct igb_queue	*que;
1539	struct tx_ring		*txr;
1540	u32			reg_icr, rx_done = 0;
1541	u32			loop = IGB_MAX_LOOP;
1542	bool			more;
1543
1544	IGB_CORE_LOCK(adapter);
1545	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1546		IGB_CORE_UNLOCK(adapter);
1547		return POLL_RETURN_COUNT(rx_done);
1548	}
1549
1550	if (cmd == POLL_AND_CHECK_STATUS) {
1551		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1552		/* Link status change */
1553		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1554			igb_handle_link_locked(adapter);
1555
1556		if (reg_icr & E1000_ICR_RXO)
1557			adapter->rx_overruns++;
1558	}
1559	IGB_CORE_UNLOCK(adapter);
1560
1561	for (int i = 0; i < adapter->num_queues; i++) {
1562		que = &adapter->queues[i];
1563		txr = que->txr;
1564
1565		igb_rxeof(que, count, &rx_done);
1566
1567		IGB_TX_LOCK(txr);
1568		do {
1569			more = igb_txeof(txr);
1570		} while (loop-- && more);
1571#ifndef IGB_LEGACY_TX
1572		if (!drbr_empty(ifp, txr->br))
1573			igb_mq_start_locked(ifp, txr);
1574#else
1575		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1576			igb_start_locked(txr, ifp);
1577#endif
1578		IGB_TX_UNLOCK(txr);
1579	}
1580
1581	return POLL_RETURN_COUNT(rx_done);
1582}
1583#endif /* DEVICE_POLLING */
1584
1585/*********************************************************************
1586 *
1587 *  MSIX Que Interrupt Service routine
1588 *
1589 **********************************************************************/
1590static void
1591igb_msix_que(void *arg)
1592{
1593	struct igb_queue *que = arg;
1594	struct adapter *adapter = que->adapter;
1595	struct ifnet   *ifp = adapter->ifp;
1596	struct tx_ring *txr = que->txr;
1597	struct rx_ring *rxr = que->rxr;
1598	u32		newitr = 0;
1599	bool		more_rx;
1600
1601	/* Ignore spurious interrupts */
1602	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1603		return;
1604
1605	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1606	++que->irqs;
1607
1608	IGB_TX_LOCK(txr);
1609	igb_txeof(txr);
1610#ifndef IGB_LEGACY_TX
1611	/* Process the stack queue only if not depleted */
1612	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1613	    !drbr_empty(ifp, txr->br))
1614		igb_mq_start_locked(ifp, txr);
1615#else
1616	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1617		igb_start_locked(txr, ifp);
1618#endif
1619	IGB_TX_UNLOCK(txr);
1620
1621	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1622
1623	if (adapter->enable_aim == FALSE)
1624		goto no_calc;
1625	/*
1626	** Do Adaptive Interrupt Moderation:
1627        **  - Write out last calculated setting
1628	**  - Calculate based on average size over
1629	**    the last interval.
1630	*/
1631        if (que->eitr_setting)
1632                E1000_WRITE_REG(&adapter->hw,
1633                    E1000_EITR(que->msix), que->eitr_setting);
1634
1635        que->eitr_setting = 0;
1636
1637        /* Idle, do nothing */
1638        if ((txr->bytes == 0) && (rxr->bytes == 0))
1639                goto no_calc;
1640
1641        /* Used half Default if sub-gig */
1642        if (adapter->link_speed != 1000)
1643                newitr = IGB_DEFAULT_ITR / 2;
1644        else {
1645		if ((txr->bytes) && (txr->packets))
1646                	newitr = txr->bytes/txr->packets;
1647		if ((rxr->bytes) && (rxr->packets))
1648			newitr = max(newitr,
1649			    (rxr->bytes / rxr->packets));
1650                newitr += 24; /* account for hardware frame, crc */
1651		/* set an upper boundary */
1652		newitr = min(newitr, 3000);
1653		/* Be nice to the mid range */
1654                if ((newitr > 300) && (newitr < 1200))
1655                        newitr = (newitr / 3);
1656                else
1657                        newitr = (newitr / 2);
1658        }
1659        newitr &= 0x7FFC;  /* Mask invalid bits */
1660        if (adapter->hw.mac.type == e1000_82575)
1661                newitr |= newitr << 16;
1662        else
1663                newitr |= E1000_EITR_CNT_IGNR;
1664
1665        /* save for next interrupt */
1666        que->eitr_setting = newitr;
1667
1668        /* Reset state */
1669        txr->bytes = 0;
1670        txr->packets = 0;
1671        rxr->bytes = 0;
1672        rxr->packets = 0;
1673
1674no_calc:
1675	/* Schedule a clean task if needed*/
1676	if (more_rx)
1677		taskqueue_enqueue(que->tq, &que->que_task);
1678	else
1679		/* Reenable this interrupt */
1680		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1681	return;
1682}
1683
1684
1685/*********************************************************************
1686 *
1687 *  MSIX Link Interrupt Service routine
1688 *
1689 **********************************************************************/
1690
1691static void
1692igb_msix_link(void *arg)
1693{
1694	struct adapter	*adapter = arg;
1695	u32       	icr;
1696
1697	++adapter->link_irq;
1698	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1699	if (!(icr & E1000_ICR_LSC))
1700		goto spurious;
1701	igb_handle_link(adapter, 0);
1702
1703spurious:
1704	/* Rearm */
1705	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1706	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1707	return;
1708}
1709
1710
1711/*********************************************************************
1712 *
1713 *  Media Ioctl callback
1714 *
1715 *  This routine is called whenever the user queries the status of
1716 *  the interface using ifconfig.
1717 *
1718 **********************************************************************/
1719static void
1720igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1721{
1722	struct adapter *adapter = ifp->if_softc;
1723
1724	INIT_DEBUGOUT("igb_media_status: begin");
1725
1726	IGB_CORE_LOCK(adapter);
1727	igb_update_link_status(adapter);
1728
1729	ifmr->ifm_status = IFM_AVALID;
1730	ifmr->ifm_active = IFM_ETHER;
1731
1732	if (!adapter->link_active) {
1733		IGB_CORE_UNLOCK(adapter);
1734		return;
1735	}
1736
1737	ifmr->ifm_status |= IFM_ACTIVE;
1738
1739	switch (adapter->link_speed) {
1740	case 10:
1741		ifmr->ifm_active |= IFM_10_T;
1742		break;
1743	case 100:
1744		/*
1745		** Support for 100Mb SFP - these are Fiber
1746		** but the media type appears as serdes
1747		*/
1748		if (adapter->hw.phy.media_type ==
1749		    e1000_media_type_internal_serdes)
1750			ifmr->ifm_active |= IFM_100_FX;
1751		else
1752			ifmr->ifm_active |= IFM_100_TX;
1753		break;
1754	case 1000:
1755		ifmr->ifm_active |= IFM_1000_T;
1756		break;
1757	case 2500:
1758		ifmr->ifm_active |= IFM_2500_SX;
1759		break;
1760	}
1761
1762	if (adapter->link_duplex == FULL_DUPLEX)
1763		ifmr->ifm_active |= IFM_FDX;
1764	else
1765		ifmr->ifm_active |= IFM_HDX;
1766
1767	IGB_CORE_UNLOCK(adapter);
1768}
1769
1770/*********************************************************************
1771 *
1772 *  Media Ioctl callback
1773 *
1774 *  This routine is called when the user changes speed/duplex using
1775 *  media/mediopt option with ifconfig.
1776 *
1777 **********************************************************************/
1778static int
1779igb_media_change(struct ifnet *ifp)
1780{
1781	struct adapter *adapter = ifp->if_softc;
1782	struct ifmedia  *ifm = &adapter->media;
1783
1784	INIT_DEBUGOUT("igb_media_change: begin");
1785
1786	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1787		return (EINVAL);
1788
1789	IGB_CORE_LOCK(adapter);
1790	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1791	case IFM_AUTO:
1792		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1793		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1794		break;
1795	case IFM_1000_LX:
1796	case IFM_1000_SX:
1797	case IFM_1000_T:
1798		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1799		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1800		break;
1801	case IFM_100_TX:
1802		adapter->hw.mac.autoneg = FALSE;
1803		adapter->hw.phy.autoneg_advertised = 0;
1804		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1805			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1806		else
1807			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1808		break;
1809	case IFM_10_T:
1810		adapter->hw.mac.autoneg = FALSE;
1811		adapter->hw.phy.autoneg_advertised = 0;
1812		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1813			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1814		else
1815			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1816		break;
1817	default:
1818		device_printf(adapter->dev, "Unsupported media type\n");
1819	}
1820
1821	igb_init_locked(adapter);
1822	IGB_CORE_UNLOCK(adapter);
1823
1824	return (0);
1825}
1826
1827
1828/*********************************************************************
1829 *
1830 *  This routine maps the mbufs to Advanced TX descriptors.
1831 *
1832 **********************************************************************/
1833static int
1834igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1835{
1836	struct adapter  *adapter = txr->adapter;
1837	u32		olinfo_status = 0, cmd_type_len;
1838	int             i, j, error, nsegs;
1839	int		first;
1840	bool		remap = TRUE;
1841	struct mbuf	*m_head;
1842	bus_dma_segment_t segs[IGB_MAX_SCATTER];
1843	bus_dmamap_t	map;
1844	struct igb_tx_buf *txbuf;
1845	union e1000_adv_tx_desc *txd = NULL;
1846
1847	m_head = *m_headp;
1848
1849	/* Basic descriptor defines */
1850        cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1851	    E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1852
1853	if (m_head->m_flags & M_VLANTAG)
1854        	cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1855
1856        /*
1857         * Important to capture the first descriptor
1858         * used because it will contain the index of
1859         * the one we tell the hardware to report back
1860         */
1861        first = txr->next_avail_desc;
1862	txbuf = &txr->tx_buffers[first];
1863	map = txbuf->map;
1864
1865	/*
1866	 * Map the packet for DMA.
1867	 */
1868retry:
1869	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1870	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1871
1872	if (__predict_false(error)) {
1873		struct mbuf *m;
1874
1875		switch (error) {
1876		case EFBIG:
1877			/* Try it again? - one try */
1878			if (remap == TRUE) {
1879				remap = FALSE;
1880				m = m_defrag(*m_headp, M_NOWAIT);
1881				if (m == NULL) {
1882					adapter->mbuf_defrag_failed++;
1883					m_freem(*m_headp);
1884					*m_headp = NULL;
1885					return (ENOBUFS);
1886				}
1887				*m_headp = m;
1888				goto retry;
1889			} else
1890				return (error);
1891		case ENOMEM:
1892			txr->no_tx_dma_setup++;
1893			return (error);
1894		default:
1895			txr->no_tx_dma_setup++;
1896			m_freem(*m_headp);
1897			*m_headp = NULL;
1898			return (error);
1899		}
1900	}
1901
1902	/* Make certain there are enough descriptors */
1903	if (nsegs > txr->tx_avail - 2) {
1904		txr->no_desc_avail++;
1905		bus_dmamap_unload(txr->txtag, map);
1906		return (ENOBUFS);
1907	}
1908	m_head = *m_headp;
1909
1910	/*
1911	** Set up the appropriate offload context
1912	** this will consume the first descriptor
1913	*/
1914	error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1915	if (__predict_false(error)) {
1916		m_freem(*m_headp);
1917		*m_headp = NULL;
1918		return (error);
1919	}
1920
1921	/* 82575 needs the queue index added */
1922	if (adapter->hw.mac.type == e1000_82575)
1923		olinfo_status |= txr->me << 4;
1924
1925	i = txr->next_avail_desc;
1926	for (j = 0; j < nsegs; j++) {
1927		bus_size_t seglen;
1928		bus_addr_t segaddr;
1929
1930		txbuf = &txr->tx_buffers[i];
1931		txd = &txr->tx_base[i];
1932		seglen = segs[j].ds_len;
1933		segaddr = htole64(segs[j].ds_addr);
1934
1935		txd->read.buffer_addr = segaddr;
1936		txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1937		    cmd_type_len | seglen);
1938		txd->read.olinfo_status = htole32(olinfo_status);
1939
1940		if (++i == txr->num_desc)
1941			i = 0;
1942	}
1943
1944	txd->read.cmd_type_len |=
1945	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1946	txr->tx_avail -= nsegs;
1947	txr->next_avail_desc = i;
1948
1949	txbuf->m_head = m_head;
1950	/*
1951	** Here we swap the map so the last descriptor,
1952	** which gets the completion interrupt has the
1953	** real map, and the first descriptor gets the
1954	** unused map from this descriptor.
1955	*/
1956	txr->tx_buffers[first].map = txbuf->map;
1957	txbuf->map = map;
1958	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1959
1960        /* Set the EOP descriptor that will be marked done */
1961        txbuf = &txr->tx_buffers[first];
1962	txbuf->eop = txd;
1963
1964        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1965            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1966	/*
1967	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1968	 * hardware that this frame is available to transmit.
1969	 */
1970	++txr->total_packets;
1971	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1972
1973	return (0);
1974}
1975static void
1976igb_set_promisc(struct adapter *adapter)
1977{
1978	struct ifnet	*ifp = adapter->ifp;
1979	struct e1000_hw *hw = &adapter->hw;
1980	u32		reg;
1981
1982	if (adapter->vf_ifp) {
1983		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1984		return;
1985	}
1986
1987	reg = E1000_READ_REG(hw, E1000_RCTL);
1988	if (ifp->if_flags & IFF_PROMISC) {
1989		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1990		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1991	} else if (ifp->if_flags & IFF_ALLMULTI) {
1992		reg |= E1000_RCTL_MPE;
1993		reg &= ~E1000_RCTL_UPE;
1994		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1995	}
1996}
1997
1998static void
1999igb_disable_promisc(struct adapter *adapter)
2000{
2001	struct e1000_hw *hw = &adapter->hw;
2002	struct ifnet	*ifp = adapter->ifp;
2003	u32		reg;
2004	int		mcnt = 0;
2005
2006	if (adapter->vf_ifp) {
2007		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2008		return;
2009	}
2010	reg = E1000_READ_REG(hw, E1000_RCTL);
2011	reg &=  (~E1000_RCTL_UPE);
2012	if (ifp->if_flags & IFF_ALLMULTI)
2013		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2014	else {
2015		struct  ifmultiaddr *ifma;
2016#if __FreeBSD_version < 800000
2017		IF_ADDR_LOCK(ifp);
2018#else
2019		if_maddr_rlock(ifp);
2020#endif
2021		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2022			if (ifma->ifma_addr->sa_family != AF_LINK)
2023				continue;
2024			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2025				break;
2026			mcnt++;
2027		}
2028#if __FreeBSD_version < 800000
2029		IF_ADDR_UNLOCK(ifp);
2030#else
2031		if_maddr_runlock(ifp);
2032#endif
2033	}
2034	/* Don't disable if in MAX groups */
2035	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2036		reg &=  (~E1000_RCTL_MPE);
2037	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2038}
2039
2040
2041/*********************************************************************
2042 *  Multicast Update
2043 *
2044 *  This routine is called whenever multicast address list is updated.
2045 *
2046 **********************************************************************/
2047
2048static void
2049igb_set_multi(struct adapter *adapter)
2050{
2051	struct ifnet	*ifp = adapter->ifp;
2052	struct ifmultiaddr *ifma;
2053	u32 reg_rctl = 0;
2054	u8  *mta;
2055
2056	int mcnt = 0;
2057
2058	IOCTL_DEBUGOUT("igb_set_multi: begin");
2059
2060	mta = adapter->mta;
2061	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2062	    MAX_NUM_MULTICAST_ADDRESSES);
2063
2064#if __FreeBSD_version < 800000
2065	IF_ADDR_LOCK(ifp);
2066#else
2067	if_maddr_rlock(ifp);
2068#endif
2069	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2070		if (ifma->ifma_addr->sa_family != AF_LINK)
2071			continue;
2072
2073		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2074			break;
2075
2076		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2077		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2078		mcnt++;
2079	}
2080#if __FreeBSD_version < 800000
2081	IF_ADDR_UNLOCK(ifp);
2082#else
2083	if_maddr_runlock(ifp);
2084#endif
2085
2086	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2087		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2088		reg_rctl |= E1000_RCTL_MPE;
2089		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2090	} else
2091		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2092}
2093
2094
2095/*********************************************************************
2096 *  Timer routine:
2097 *  	This routine checks for link status,
2098 *	updates statistics, and does the watchdog.
2099 *
2100 **********************************************************************/
2101
2102static void
2103igb_local_timer(void *arg)
2104{
2105	struct adapter		*adapter = arg;
2106	device_t		dev = adapter->dev;
2107	struct ifnet		*ifp = adapter->ifp;
2108	struct tx_ring		*txr = adapter->tx_rings;
2109	struct igb_queue	*que = adapter->queues;
2110	int			hung = 0, busy = 0;
2111
2112
2113	IGB_CORE_LOCK_ASSERT(adapter);
2114
2115	igb_update_link_status(adapter);
2116	igb_update_stats_counters(adapter);
2117
2118        /*
2119        ** Check the TX queues status
2120	**	- central locked handling of OACTIVE
2121	**	- watchdog only if all queues show hung
2122        */
2123	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2124		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2125		    (adapter->pause_frames == 0))
2126			++hung;
2127		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2128			++busy;
2129		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2130			taskqueue_enqueue(que->tq, &que->que_task);
2131	}
2132	if (hung == adapter->num_queues)
2133		goto timeout;
2134	if (busy == adapter->num_queues)
2135		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2136	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2137	    (busy < adapter->num_queues))
2138		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2139
2140	adapter->pause_frames = 0;
2141	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2142#ifndef DEVICE_POLLING
2143	/* Schedule all queue interrupts - deadlock protection */
2144	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2145#endif
2146	return;
2147
2148timeout:
2149	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2150	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2151            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2152            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2153	device_printf(dev,"TX(%d) desc avail = %d,"
2154            "Next TX to Clean = %d\n",
2155            txr->me, txr->tx_avail, txr->next_to_clean);
2156	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2157	adapter->watchdog_events++;
2158	igb_init_locked(adapter);
2159}
2160
2161static void
2162igb_update_link_status(struct adapter *adapter)
2163{
2164	struct e1000_hw		*hw = &adapter->hw;
2165	struct e1000_fc_info	*fc = &hw->fc;
2166	struct ifnet		*ifp = adapter->ifp;
2167	device_t		dev = adapter->dev;
2168	struct tx_ring		*txr = adapter->tx_rings;
2169	u32			link_check, thstat, ctrl;
2170	char			*flowctl = NULL;
2171
2172	link_check = thstat = ctrl = 0;
2173
2174	/* Get the cached link value or read for real */
2175        switch (hw->phy.media_type) {
2176        case e1000_media_type_copper:
2177                if (hw->mac.get_link_status) {
2178			/* Do the work to read phy */
2179                        e1000_check_for_link(hw);
2180                        link_check = !hw->mac.get_link_status;
2181                } else
2182                        link_check = TRUE;
2183                break;
2184        case e1000_media_type_fiber:
2185                e1000_check_for_link(hw);
2186                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2187                                 E1000_STATUS_LU);
2188                break;
2189        case e1000_media_type_internal_serdes:
2190                e1000_check_for_link(hw);
2191                link_check = adapter->hw.mac.serdes_has_link;
2192                break;
2193	/* VF device is type_unknown */
2194        case e1000_media_type_unknown:
2195                e1000_check_for_link(hw);
2196		link_check = !hw->mac.get_link_status;
2197		/* Fall thru */
2198        default:
2199                break;
2200        }
2201
2202	/* Check for thermal downshift or shutdown */
2203	if (hw->mac.type == e1000_i350) {
2204		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2205		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2206	}
2207
2208	/* Get the flow control for display */
2209	switch (fc->current_mode) {
2210	case e1000_fc_rx_pause:
2211		flowctl = "RX";
2212		break;
2213	case e1000_fc_tx_pause:
2214		flowctl = "TX";
2215		break;
2216	case e1000_fc_full:
2217		flowctl = "Full";
2218		break;
2219	case e1000_fc_none:
2220	default:
2221		flowctl = "None";
2222		break;
2223	}
2224
2225	/* Now we check if a transition has happened */
2226	if (link_check && (adapter->link_active == 0)) {
2227		e1000_get_speed_and_duplex(&adapter->hw,
2228		    &adapter->link_speed, &adapter->link_duplex);
2229		if (bootverbose)
2230			device_printf(dev, "Link is up %d Mbps %s,"
2231			    " Flow Control: %s\n",
2232			    adapter->link_speed,
2233			    ((adapter->link_duplex == FULL_DUPLEX) ?
2234			    "Full Duplex" : "Half Duplex"), flowctl);
2235		adapter->link_active = 1;
2236		ifp->if_baudrate = adapter->link_speed * 1000000;
2237		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2238		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2239			device_printf(dev, "Link: thermal downshift\n");
2240		/* Delay Link Up for Phy update */
2241		if (((hw->mac.type == e1000_i210) ||
2242		    (hw->mac.type == e1000_i211)) &&
2243		    (hw->phy.id == I210_I_PHY_ID))
2244			msec_delay(I210_LINK_DELAY);
2245		/* Reset if the media type changed. */
2246		if (hw->dev_spec._82575.media_changed) {
2247			hw->dev_spec._82575.media_changed = false;
2248			adapter->flags |= IGB_MEDIA_RESET;
2249			igb_reset(adapter);
2250		}
2251		/* This can sleep */
2252		if_link_state_change(ifp, LINK_STATE_UP);
2253	} else if (!link_check && (adapter->link_active == 1)) {
2254		ifp->if_baudrate = adapter->link_speed = 0;
2255		adapter->link_duplex = 0;
2256		if (bootverbose)
2257			device_printf(dev, "Link is Down\n");
2258		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2259		    (thstat & E1000_THSTAT_PWR_DOWN))
2260			device_printf(dev, "Link: thermal shutdown\n");
2261		adapter->link_active = 0;
2262		/* This can sleep */
2263		if_link_state_change(ifp, LINK_STATE_DOWN);
2264		/* Reset queue state */
2265		for (int i = 0; i < adapter->num_queues; i++, txr++)
2266			txr->queue_status = IGB_QUEUE_IDLE;
2267	}
2268}
2269
2270/*********************************************************************
2271 *
2272 *  This routine disables all traffic on the adapter by issuing a
2273 *  global reset on the MAC and deallocates TX/RX buffers.
2274 *
2275 **********************************************************************/
2276
2277static void
2278igb_stop(void *arg)
2279{
2280	struct adapter	*adapter = arg;
2281	struct ifnet	*ifp = adapter->ifp;
2282	struct tx_ring *txr = adapter->tx_rings;
2283
2284	IGB_CORE_LOCK_ASSERT(adapter);
2285
2286	INIT_DEBUGOUT("igb_stop: begin");
2287
2288	igb_disable_intr(adapter);
2289
2290	callout_stop(&adapter->timer);
2291
2292	/* Tell the stack that the interface is no longer active */
2293	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2294	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2295
2296	/* Disarm watchdog timer. */
2297	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2298		IGB_TX_LOCK(txr);
2299		txr->queue_status = IGB_QUEUE_IDLE;
2300		IGB_TX_UNLOCK(txr);
2301	}
2302
2303	e1000_reset_hw(&adapter->hw);
2304	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2305
2306	e1000_led_off(&adapter->hw);
2307	e1000_cleanup_led(&adapter->hw);
2308}
2309
2310
2311/*********************************************************************
2312 *
2313 *  Determine hardware revision.
2314 *
2315 **********************************************************************/
2316static void
2317igb_identify_hardware(struct adapter *adapter)
2318{
2319	device_t dev = adapter->dev;
2320
2321	/* Make sure our PCI config space has the necessary stuff set */
2322	pci_enable_busmaster(dev);
2323	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2324
2325	/* Save off the information about this board */
2326	adapter->hw.vendor_id = pci_get_vendor(dev);
2327	adapter->hw.device_id = pci_get_device(dev);
2328	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2329	adapter->hw.subsystem_vendor_id =
2330	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2331	adapter->hw.subsystem_device_id =
2332	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2333
2334	/* Set MAC type early for PCI setup */
2335	e1000_set_mac_type(&adapter->hw);
2336
2337	/* Are we a VF device? */
2338	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2339	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2340		adapter->vf_ifp = 1;
2341	else
2342		adapter->vf_ifp = 0;
2343}
2344
2345static int
2346igb_allocate_pci_resources(struct adapter *adapter)
2347{
2348	device_t	dev = adapter->dev;
2349	int		rid;
2350
2351	rid = PCIR_BAR(0);
2352	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2353	    &rid, RF_ACTIVE);
2354	if (adapter->pci_mem == NULL) {
2355		device_printf(dev, "Unable to allocate bus resource: memory\n");
2356		return (ENXIO);
2357	}
2358	adapter->osdep.mem_bus_space_tag =
2359	    rman_get_bustag(adapter->pci_mem);
2360	adapter->osdep.mem_bus_space_handle =
2361	    rman_get_bushandle(adapter->pci_mem);
2362	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2363
2364	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2365
2366	/* This will setup either MSI/X or MSI */
2367	adapter->msix = igb_setup_msix(adapter);
2368	adapter->hw.back = &adapter->osdep;
2369
2370	return (0);
2371}
2372
2373/*********************************************************************
2374 *
2375 *  Setup the Legacy or MSI Interrupt handler
2376 *
2377 **********************************************************************/
2378static int
2379igb_allocate_legacy(struct adapter *adapter)
2380{
2381	device_t		dev = adapter->dev;
2382	struct igb_queue	*que = adapter->queues;
2383#ifndef IGB_LEGACY_TX
2384	struct tx_ring		*txr = adapter->tx_rings;
2385#endif
2386	int			error, rid = 0;
2387
2388	/* Turn off all interrupts */
2389	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2390
2391	/* MSI RID is 1 */
2392	if (adapter->msix == 1)
2393		rid = 1;
2394
2395	/* We allocate a single interrupt resource */
2396	adapter->res = bus_alloc_resource_any(dev,
2397	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2398	if (adapter->res == NULL) {
2399		device_printf(dev, "Unable to allocate bus resource: "
2400		    "interrupt\n");
2401		return (ENXIO);
2402	}
2403
2404#ifndef IGB_LEGACY_TX
2405	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2406#endif
2407
2408	/*
2409	 * Try allocating a fast interrupt and the associated deferred
2410	 * processing contexts.
2411	 */
2412	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2413	/* Make tasklet for deferred link handling */
2414	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2415	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2416	    taskqueue_thread_enqueue, &que->tq);
2417	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2418	    device_get_nameunit(adapter->dev));
2419	if ((error = bus_setup_intr(dev, adapter->res,
2420	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2421	    adapter, &adapter->tag)) != 0) {
2422		device_printf(dev, "Failed to register fast interrupt "
2423			    "handler: %d\n", error);
2424		taskqueue_free(que->tq);
2425		que->tq = NULL;
2426		return (error);
2427	}
2428
2429	return (0);
2430}
2431
2432
2433/*********************************************************************
2434 *
2435 *  Setup the MSIX Queue Interrupt handlers:
2436 *
2437 **********************************************************************/
2438static int
2439igb_allocate_msix(struct adapter *adapter)
2440{
2441	device_t		dev = adapter->dev;
2442	struct igb_queue	*que = adapter->queues;
2443	int			error, rid, vector = 0;
2444
2445	/* Be sure to start with all interrupts disabled */
2446	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2447	E1000_WRITE_FLUSH(&adapter->hw);
2448
2449	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2450		rid = vector +1;
2451		que->res = bus_alloc_resource_any(dev,
2452		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2453		if (que->res == NULL) {
2454			device_printf(dev,
2455			    "Unable to allocate bus resource: "
2456			    "MSIX Queue Interrupt\n");
2457			return (ENXIO);
2458		}
2459		error = bus_setup_intr(dev, que->res,
2460	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2461		    igb_msix_que, que, &que->tag);
2462		if (error) {
2463			que->res = NULL;
2464			device_printf(dev, "Failed to register Queue handler");
2465			return (error);
2466		}
2467#if __FreeBSD_version >= 800504
2468		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2469#endif
2470		que->msix = vector;
2471		if (adapter->hw.mac.type == e1000_82575)
2472			que->eims = E1000_EICR_TX_QUEUE0 << i;
2473		else
2474			que->eims = 1 << vector;
2475		/*
2476		** Bind the msix vector, and thus the
2477		** rings to the corresponding cpu.
2478		*/
2479		if (adapter->num_queues > 1) {
2480			if (igb_last_bind_cpu < 0)
2481				igb_last_bind_cpu = CPU_FIRST();
2482			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2483			device_printf(dev,
2484				"Bound queue %d to cpu %d\n",
2485				i,igb_last_bind_cpu);
2486			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2487		}
2488#ifndef IGB_LEGACY_TX
2489		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2490		    que->txr);
2491#endif
2492		/* Make tasklet for deferred handling */
2493		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2494		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2495		    taskqueue_thread_enqueue, &que->tq);
2496		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2497		    device_get_nameunit(adapter->dev));
2498	}
2499
2500	/* And Link */
2501	rid = vector + 1;
2502	adapter->res = bus_alloc_resource_any(dev,
2503	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2504	if (adapter->res == NULL) {
2505		device_printf(dev,
2506		    "Unable to allocate bus resource: "
2507		    "MSIX Link Interrupt\n");
2508		return (ENXIO);
2509	}
2510	if ((error = bus_setup_intr(dev, adapter->res,
2511	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2512	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2513		device_printf(dev, "Failed to register Link handler");
2514		return (error);
2515	}
2516#if __FreeBSD_version >= 800504
2517	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2518#endif
2519	adapter->linkvec = vector;
2520
2521	return (0);
2522}
2523
2524
2525static void
2526igb_configure_queues(struct adapter *adapter)
2527{
2528	struct	e1000_hw	*hw = &adapter->hw;
2529	struct	igb_queue	*que;
2530	u32			tmp, ivar = 0, newitr = 0;
2531
2532	/* First turn on RSS capability */
2533	if (adapter->hw.mac.type != e1000_82575)
2534		E1000_WRITE_REG(hw, E1000_GPIE,
2535		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2536		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2537
2538	/* Turn on MSIX */
2539	switch (adapter->hw.mac.type) {
2540	case e1000_82580:
2541	case e1000_i350:
2542	case e1000_i354:
2543	case e1000_i210:
2544	case e1000_i211:
2545	case e1000_vfadapt:
2546	case e1000_vfadapt_i350:
2547		/* RX entries */
2548		for (int i = 0; i < adapter->num_queues; i++) {
2549			u32 index = i >> 1;
2550			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2551			que = &adapter->queues[i];
2552			if (i & 1) {
2553				ivar &= 0xFF00FFFF;
2554				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2555			} else {
2556				ivar &= 0xFFFFFF00;
2557				ivar |= que->msix | E1000_IVAR_VALID;
2558			}
2559			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2560		}
2561		/* TX entries */
2562		for (int i = 0; i < adapter->num_queues; i++) {
2563			u32 index = i >> 1;
2564			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2565			que = &adapter->queues[i];
2566			if (i & 1) {
2567				ivar &= 0x00FFFFFF;
2568				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2569			} else {
2570				ivar &= 0xFFFF00FF;
2571				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2572			}
2573			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2574			adapter->que_mask |= que->eims;
2575		}
2576
2577		/* And for the link interrupt */
2578		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2579		adapter->link_mask = 1 << adapter->linkvec;
2580		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2581		break;
2582	case e1000_82576:
2583		/* RX entries */
2584		for (int i = 0; i < adapter->num_queues; i++) {
2585			u32 index = i & 0x7; /* Each IVAR has two entries */
2586			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2587			que = &adapter->queues[i];
2588			if (i < 8) {
2589				ivar &= 0xFFFFFF00;
2590				ivar |= que->msix | E1000_IVAR_VALID;
2591			} else {
2592				ivar &= 0xFF00FFFF;
2593				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2594			}
2595			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2596			adapter->que_mask |= que->eims;
2597		}
2598		/* TX entries */
2599		for (int i = 0; i < adapter->num_queues; i++) {
2600			u32 index = i & 0x7; /* Each IVAR has two entries */
2601			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2602			que = &adapter->queues[i];
2603			if (i < 8) {
2604				ivar &= 0xFFFF00FF;
2605				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2606			} else {
2607				ivar &= 0x00FFFFFF;
2608				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2609			}
2610			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2611			adapter->que_mask |= que->eims;
2612		}
2613
2614		/* And for the link interrupt */
2615		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2616		adapter->link_mask = 1 << adapter->linkvec;
2617		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2618		break;
2619
2620	case e1000_82575:
2621                /* enable MSI-X support*/
2622		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2623                tmp |= E1000_CTRL_EXT_PBA_CLR;
2624                /* Auto-Mask interrupts upon ICR read. */
2625                tmp |= E1000_CTRL_EXT_EIAME;
2626                tmp |= E1000_CTRL_EXT_IRCA;
2627                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2628
2629		/* Queues */
2630		for (int i = 0; i < adapter->num_queues; i++) {
2631			que = &adapter->queues[i];
2632			tmp = E1000_EICR_RX_QUEUE0 << i;
2633			tmp |= E1000_EICR_TX_QUEUE0 << i;
2634			que->eims = tmp;
2635			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2636			    i, que->eims);
2637			adapter->que_mask |= que->eims;
2638		}
2639
2640		/* Link */
2641		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2642		    E1000_EIMS_OTHER);
2643		adapter->link_mask |= E1000_EIMS_OTHER;
2644	default:
2645		break;
2646	}
2647
2648	/* Set the starting interrupt rate */
2649	if (igb_max_interrupt_rate > 0)
2650		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2651
2652        if (hw->mac.type == e1000_82575)
2653                newitr |= newitr << 16;
2654        else
2655                newitr |= E1000_EITR_CNT_IGNR;
2656
2657	for (int i = 0; i < adapter->num_queues; i++) {
2658		que = &adapter->queues[i];
2659		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2660	}
2661
2662	return;
2663}
2664
2665
2666static void
2667igb_free_pci_resources(struct adapter *adapter)
2668{
2669	struct		igb_queue *que = adapter->queues;
2670	device_t	dev = adapter->dev;
2671	int		rid;
2672
2673	/*
2674	** There is a slight possibility of a failure mode
2675	** in attach that will result in entering this function
2676	** before interrupt resources have been initialized, and
2677	** in that case we do not want to execute the loops below
2678	** We can detect this reliably by the state of the adapter
2679	** res pointer.
2680	*/
2681	if (adapter->res == NULL)
2682		goto mem;
2683
2684	/*
2685	 * First release all the interrupt resources:
2686	 */
2687	for (int i = 0; i < adapter->num_queues; i++, que++) {
2688		rid = que->msix + 1;
2689		if (que->tag != NULL) {
2690			bus_teardown_intr(dev, que->res, que->tag);
2691			que->tag = NULL;
2692		}
2693		if (que->res != NULL)
2694			bus_release_resource(dev,
2695			    SYS_RES_IRQ, rid, que->res);
2696	}
2697
2698	/* Clean the Legacy or Link interrupt last */
2699	if (adapter->linkvec) /* we are doing MSIX */
2700		rid = adapter->linkvec + 1;
2701	else
2702		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2703
2704	que = adapter->queues;
2705	if (adapter->tag != NULL) {
2706		taskqueue_drain(que->tq, &adapter->link_task);
2707		bus_teardown_intr(dev, adapter->res, adapter->tag);
2708		adapter->tag = NULL;
2709	}
2710	if (adapter->res != NULL)
2711		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2712
2713	for (int i = 0; i < adapter->num_queues; i++, que++) {
2714		if (que->tq != NULL) {
2715#ifndef IGB_LEGACY_TX
2716			taskqueue_drain(que->tq, &que->txr->txq_task);
2717#endif
2718			taskqueue_drain(que->tq, &que->que_task);
2719			taskqueue_free(que->tq);
2720		}
2721	}
2722mem:
2723	if (adapter->msix)
2724		pci_release_msi(dev);
2725
2726	if (adapter->msix_mem != NULL)
2727		bus_release_resource(dev, SYS_RES_MEMORY,
2728		    adapter->memrid, adapter->msix_mem);
2729
2730	if (adapter->pci_mem != NULL)
2731		bus_release_resource(dev, SYS_RES_MEMORY,
2732		    PCIR_BAR(0), adapter->pci_mem);
2733
2734}
2735
2736/*
2737 * Setup Either MSI/X or MSI
2738 */
2739static int
2740igb_setup_msix(struct adapter *adapter)
2741{
2742	device_t	dev = adapter->dev;
2743	int		bar, want, queues, msgs, maxqueues;
2744
2745	/* tuneable override */
2746	if (igb_enable_msix == 0)
2747		goto msi;
2748
2749	/* First try MSI/X */
2750	msgs = pci_msix_count(dev);
2751	if (msgs == 0)
2752		goto msi;
2753	/*
2754	** Some new devices, as with ixgbe, now may
2755	** use a different BAR, so we need to keep
2756	** track of which is used.
2757	*/
2758	adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2759	bar = pci_read_config(dev, adapter->memrid, 4);
2760	if (bar == 0) /* use next bar */
2761		adapter->memrid += 4;
2762	adapter->msix_mem = bus_alloc_resource_any(dev,
2763	    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2764       	if (adapter->msix_mem == NULL) {
2765		/* May not be enabled */
2766		device_printf(adapter->dev,
2767		    "Unable to map MSIX table \n");
2768		goto msi;
2769	}
2770
2771	/* Figure out a reasonable auto config value */
2772	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2773
2774	/* Manual override */
2775	if (igb_num_queues != 0)
2776		queues = igb_num_queues;
2777
2778	/* Sanity check based on HW */
2779	switch (adapter->hw.mac.type) {
2780		case e1000_82575:
2781			maxqueues = 4;
2782			break;
2783		case e1000_82576:
2784		case e1000_82580:
2785		case e1000_i350:
2786		case e1000_i354:
2787			maxqueues = 8;
2788			break;
2789		case e1000_i210:
2790			maxqueues = 4;
2791			break;
2792		case e1000_i211:
2793			maxqueues = 2;
2794			break;
2795		default:  /* VF interfaces */
2796			maxqueues = 1;
2797			break;
2798	}
2799	if (queues > maxqueues)
2800		queues = maxqueues;
2801
2802	/* Manual override */
2803	if (igb_num_queues != 0)
2804		queues = igb_num_queues;
2805
2806	/*
2807	** One vector (RX/TX pair) per queue
2808	** plus an additional for Link interrupt
2809	*/
2810	want = queues + 1;
2811	if (msgs >= want)
2812		msgs = want;
2813	else {
2814               	device_printf(adapter->dev,
2815		    "MSIX Configuration Problem, "
2816		    "%d vectors configured, but %d queues wanted!\n",
2817		    msgs, want);
2818		goto msi;
2819	}
2820	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2821               	device_printf(adapter->dev,
2822		    "Using MSIX interrupts with %d vectors\n", msgs);
2823		adapter->num_queues = queues;
2824		return (msgs);
2825	}
2826	/*
2827	** If MSIX alloc failed or provided us with
2828	** less than needed, free and fall through to MSI
2829	*/
2830	pci_release_msi(dev);
2831
2832msi:
2833       	if (adapter->msix_mem != NULL) {
2834		bus_release_resource(dev, SYS_RES_MEMORY,
2835		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2836		adapter->msix_mem = NULL;
2837	}
2838       	msgs = 1;
2839	if (pci_alloc_msi(dev, &msgs) == 0) {
2840		device_printf(adapter->dev," Using an MSI interrupt\n");
2841		return (msgs);
2842	}
2843	device_printf(adapter->dev," Using a Legacy interrupt\n");
2844	return (0);
2845}
2846
2847/*********************************************************************
2848 *
2849 *  Initialize the DMA Coalescing feature
2850 *
2851 **********************************************************************/
2852static void
2853igb_init_dmac(struct adapter *adapter, u32 pba)
2854{
2855	device_t	dev = adapter->dev;
2856	struct e1000_hw *hw = &adapter->hw;
2857	u32 		dmac, reg = ~E1000_DMACR_DMAC_EN;
2858	u16		hwm;
2859
2860	if (hw->mac.type == e1000_i211)
2861		return;
2862
2863	if (hw->mac.type > e1000_82580) {
2864
2865		if (adapter->dmac == 0) { /* Disabling it */
2866			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2867			return;
2868		} else
2869			device_printf(dev, "DMA Coalescing enabled\n");
2870
2871		/* Set starting threshold */
2872		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2873
2874		hwm = 64 * pba - adapter->max_frame_size / 16;
2875		if (hwm < 64 * (pba - 6))
2876			hwm = 64 * (pba - 6);
2877		reg = E1000_READ_REG(hw, E1000_FCRTC);
2878		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2879		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2880		    & E1000_FCRTC_RTH_COAL_MASK);
2881		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2882
2883
2884		dmac = pba - adapter->max_frame_size / 512;
2885		if (dmac < pba - 10)
2886			dmac = pba - 10;
2887		reg = E1000_READ_REG(hw, E1000_DMACR);
2888		reg &= ~E1000_DMACR_DMACTHR_MASK;
2889		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2890		    & E1000_DMACR_DMACTHR_MASK);
2891
2892		/* transition to L0x or L1 if available..*/
2893		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2894
2895		/* Check if status is 2.5Gb backplane connection
2896		* before configuration of watchdog timer, which is
2897		* in msec values in 12.8usec intervals
2898		* watchdog timer= msec values in 32usec intervals
2899		* for non 2.5Gb connection
2900		*/
2901		if (hw->mac.type == e1000_i354) {
2902			int status = E1000_READ_REG(hw, E1000_STATUS);
2903			if ((status & E1000_STATUS_2P5_SKU) &&
2904			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2905				reg |= ((adapter->dmac * 5) >> 6);
2906			else
2907				reg |= (adapter->dmac >> 5);
2908		} else {
2909			reg |= (adapter->dmac >> 5);
2910		}
2911
2912		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2913
2914#ifdef I210_OBFF_SUPPORT
2915		/*
2916		 * Set the OBFF Rx threshold to DMA Coalescing Rx
2917		 * threshold - 2KB and enable the feature in the
2918		 * hardware for I210.
2919		 */
2920		if (hw->mac.type == e1000_i210) {
2921			int obff = dmac - 2;
2922			reg = E1000_READ_REG(hw, E1000_DOBFFCTL);
2923			reg &= ~E1000_DOBFFCTL_OBFFTHR_MASK;
2924			reg |= (obff & E1000_DOBFFCTL_OBFFTHR_MASK)
2925			    | E1000_DOBFFCTL_EXIT_ACT_MASK;
2926			E1000_WRITE_REG(hw, E1000_DOBFFCTL, reg);
2927		}
2928#endif
2929		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2930
2931		/* Set the interval before transition */
2932		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2933		if (hw->mac.type == e1000_i350)
2934			reg |= IGB_DMCTLX_DCFLUSH_DIS;
2935		/*
2936		** in 2.5Gb connection, TTLX unit is 0.4 usec
2937		** which is 0x4*2 = 0xA. But delay is still 4 usec
2938		*/
2939		if (hw->mac.type == e1000_i354) {
2940			int status = E1000_READ_REG(hw, E1000_STATUS);
2941			if ((status & E1000_STATUS_2P5_SKU) &&
2942			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2943				reg |= 0xA;
2944			else
2945				reg |= 0x4;
2946		} else {
2947			reg |= 0x4;
2948		}
2949
2950		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2951
2952		/* free space in tx packet buffer to wake from DMA coal */
2953		E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2954		    (2 * adapter->max_frame_size)) >> 6);
2955
2956		/* make low power state decision controlled by DMA coal */
2957		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2958		reg &= ~E1000_PCIEMISC_LX_DECISION;
2959		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2960
2961	} else if (hw->mac.type == e1000_82580) {
2962		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2963		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2964		    reg & ~E1000_PCIEMISC_LX_DECISION);
2965		E1000_WRITE_REG(hw, E1000_DMACR, 0);
2966	}
2967}
2968
2969
2970/*********************************************************************
2971 *
2972 *  Set up an fresh starting state
2973 *
2974 **********************************************************************/
2975static void
2976igb_reset(struct adapter *adapter)
2977{
2978	device_t	dev = adapter->dev;
2979	struct e1000_hw *hw = &adapter->hw;
2980	struct e1000_fc_info *fc = &hw->fc;
2981	struct ifnet	*ifp = adapter->ifp;
2982	u32		pba = 0;
2983	u16		hwm;
2984
2985	INIT_DEBUGOUT("igb_reset: begin");
2986
2987	/* Let the firmware know the OS is in control */
2988	igb_get_hw_control(adapter);
2989
2990	/*
2991	 * Packet Buffer Allocation (PBA)
2992	 * Writing PBA sets the receive portion of the buffer
2993	 * the remainder is used for the transmit buffer.
2994	 */
2995	switch (hw->mac.type) {
2996	case e1000_82575:
2997		pba = E1000_PBA_32K;
2998		break;
2999	case e1000_82576:
3000	case e1000_vfadapt:
3001		pba = E1000_READ_REG(hw, E1000_RXPBS);
3002		pba &= E1000_RXPBS_SIZE_MASK_82576;
3003		break;
3004	case e1000_82580:
3005	case e1000_i350:
3006	case e1000_i354:
3007	case e1000_vfadapt_i350:
3008		pba = E1000_READ_REG(hw, E1000_RXPBS);
3009		pba = e1000_rxpbs_adjust_82580(pba);
3010		break;
3011	case e1000_i210:
3012	case e1000_i211:
3013		pba = E1000_PBA_34K;
3014	default:
3015		break;
3016	}
3017
3018	/* Special needs in case of Jumbo frames */
3019	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3020		u32 tx_space, min_tx, min_rx;
3021		pba = E1000_READ_REG(hw, E1000_PBA);
3022		tx_space = pba >> 16;
3023		pba &= 0xffff;
3024		min_tx = (adapter->max_frame_size +
3025		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3026		min_tx = roundup2(min_tx, 1024);
3027		min_tx >>= 10;
3028                min_rx = adapter->max_frame_size;
3029                min_rx = roundup2(min_rx, 1024);
3030                min_rx >>= 10;
3031		if (tx_space < min_tx &&
3032		    ((min_tx - tx_space) < pba)) {
3033			pba = pba - (min_tx - tx_space);
3034			/*
3035                         * if short on rx space, rx wins
3036                         * and must trump tx adjustment
3037			 */
3038                        if (pba < min_rx)
3039                                pba = min_rx;
3040		}
3041		E1000_WRITE_REG(hw, E1000_PBA, pba);
3042	}
3043
3044	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3045
3046	/*
3047	 * These parameters control the automatic generation (Tx) and
3048	 * response (Rx) to Ethernet PAUSE frames.
3049	 * - High water mark should allow for at least two frames to be
3050	 *   received after sending an XOFF.
3051	 * - Low water mark works best when it is very near the high water mark.
3052	 *   This allows the receiver to restart by sending XON when it has
3053	 *   drained a bit.
3054	 */
3055	hwm = min(((pba << 10) * 9 / 10),
3056	    ((pba << 10) - 2 * adapter->max_frame_size));
3057
3058	if (hw->mac.type < e1000_82576) {
3059		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3060		fc->low_water = fc->high_water - 8;
3061	} else {
3062		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3063		fc->low_water = fc->high_water - 16;
3064	}
3065
3066	fc->pause_time = IGB_FC_PAUSE_TIME;
3067	fc->send_xon = TRUE;
3068	if (adapter->fc)
3069		fc->requested_mode = adapter->fc;
3070	else
3071		fc->requested_mode = e1000_fc_default;
3072
3073	/* Issue a global reset */
3074	e1000_reset_hw(hw);
3075	E1000_WRITE_REG(hw, E1000_WUC, 0);
3076
3077	/* Reset for AutoMediaDetect */
3078	if (adapter->flags & IGB_MEDIA_RESET) {
3079		e1000_setup_init_funcs(hw, TRUE);
3080		e1000_get_bus_info(hw);
3081		adapter->flags &= ~IGB_MEDIA_RESET;
3082	}
3083
3084	if (e1000_init_hw(hw) < 0)
3085		device_printf(dev, "Hardware Initialization Failed\n");
3086
3087	/* Setup DMA Coalescing */
3088	igb_init_dmac(adapter, pba);
3089
3090	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3091	e1000_get_phy_info(hw);
3092	e1000_check_for_link(hw);
3093	return;
3094}
3095
3096/*********************************************************************
3097 *
3098 *  Setup networking device structure and register an interface.
3099 *
3100 **********************************************************************/
3101static int
3102igb_setup_interface(device_t dev, struct adapter *adapter)
3103{
3104	struct ifnet   *ifp;
3105
3106	INIT_DEBUGOUT("igb_setup_interface: begin");
3107
3108	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3109	if (ifp == NULL) {
3110		device_printf(dev, "can not allocate ifnet structure\n");
3111		return (-1);
3112	}
3113	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3114	ifp->if_init =  igb_init;
3115	ifp->if_softc = adapter;
3116	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3117	ifp->if_ioctl = igb_ioctl;
3118#ifndef IGB_LEGACY_TX
3119	ifp->if_transmit = igb_mq_start;
3120	ifp->if_qflush = igb_qflush;
3121#else
3122	ifp->if_start = igb_start;
3123	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3124	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3125	IFQ_SET_READY(&ifp->if_snd);
3126#endif
3127
3128	ether_ifattach(ifp, adapter->hw.mac.addr);
3129
3130	ifp->if_capabilities = ifp->if_capenable = 0;
3131
3132	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3133	ifp->if_capabilities |= IFCAP_TSO;
3134	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3135	ifp->if_capenable = ifp->if_capabilities;
3136
3137	/* Don't enable LRO by default */
3138	ifp->if_capabilities |= IFCAP_LRO;
3139
3140#ifdef DEVICE_POLLING
3141	ifp->if_capabilities |= IFCAP_POLLING;
3142#endif
3143
3144	/*
3145	 * Tell the upper layer(s) we
3146	 * support full VLAN capability.
3147	 */
3148	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3149	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3150			     |  IFCAP_VLAN_HWTSO
3151			     |  IFCAP_VLAN_MTU;
3152	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3153			  |  IFCAP_VLAN_HWTSO
3154			  |  IFCAP_VLAN_MTU;
3155
3156	/*
3157	** Don't turn this on by default, if vlans are
3158	** created on another pseudo device (eg. lagg)
3159	** then vlan events are not passed thru, breaking
3160	** operation, but with HW FILTER off it works. If
3161	** using vlans directly on the igb driver you can
3162	** enable this and get full hardware tag filtering.
3163	*/
3164	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3165
3166	/*
3167	 * Specify the media types supported by this adapter and register
3168	 * callbacks to update media and link information
3169	 */
3170	ifmedia_init(&adapter->media, IFM_IMASK,
3171	    igb_media_change, igb_media_status);
3172	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3173	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3174		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3175			    0, NULL);
3176		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3177	} else {
3178		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3179		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3180			    0, NULL);
3181		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3182			    0, NULL);
3183		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3184			    0, NULL);
3185		if (adapter->hw.phy.type != e1000_phy_ife) {
3186			ifmedia_add(&adapter->media,
3187				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3188			ifmedia_add(&adapter->media,
3189				IFM_ETHER | IFM_1000_T, 0, NULL);
3190		}
3191	}
3192	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3193	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3194	return (0);
3195}
3196
3197
3198/*
3199 * Manage DMA'able memory.
3200 */
3201static void
3202igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3203{
3204	if (error)
3205		return;
3206	*(bus_addr_t *) arg = segs[0].ds_addr;
3207}
3208
3209static int
3210igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3211        struct igb_dma_alloc *dma, int mapflags)
3212{
3213	int error;
3214
3215	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3216				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3217				BUS_SPACE_MAXADDR,	/* lowaddr */
3218				BUS_SPACE_MAXADDR,	/* highaddr */
3219				NULL, NULL,		/* filter, filterarg */
3220				size,			/* maxsize */
3221				1,			/* nsegments */
3222				size,			/* maxsegsize */
3223				0,			/* flags */
3224				NULL,			/* lockfunc */
3225				NULL,			/* lockarg */
3226				&dma->dma_tag);
3227	if (error) {
3228		device_printf(adapter->dev,
3229		    "%s: bus_dma_tag_create failed: %d\n",
3230		    __func__, error);
3231		goto fail_0;
3232	}
3233
3234	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3235	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3236	if (error) {
3237		device_printf(adapter->dev,
3238		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3239		    __func__, (uintmax_t)size, error);
3240		goto fail_2;
3241	}
3242
3243	dma->dma_paddr = 0;
3244	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3245	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3246	if (error || dma->dma_paddr == 0) {
3247		device_printf(adapter->dev,
3248		    "%s: bus_dmamap_load failed: %d\n",
3249		    __func__, error);
3250		goto fail_3;
3251	}
3252
3253	return (0);
3254
3255fail_3:
3256	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3257fail_2:
3258	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3259	bus_dma_tag_destroy(dma->dma_tag);
3260fail_0:
3261	dma->dma_map = NULL;
3262	dma->dma_tag = NULL;
3263
3264	return (error);
3265}
3266
3267static void
3268igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3269{
3270	if (dma->dma_tag == NULL)
3271		return;
3272	if (dma->dma_map != NULL) {
3273		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3274		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3275		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3276		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3277		dma->dma_map = NULL;
3278	}
3279	bus_dma_tag_destroy(dma->dma_tag);
3280	dma->dma_tag = NULL;
3281}
3282
3283
3284/*********************************************************************
3285 *
3286 *  Allocate memory for the transmit and receive rings, and then
3287 *  the descriptors associated with each, called only once at attach.
3288 *
3289 **********************************************************************/
3290static int
3291igb_allocate_queues(struct adapter *adapter)
3292{
3293	device_t dev = adapter->dev;
3294	struct igb_queue	*que = NULL;
3295	struct tx_ring		*txr = NULL;
3296	struct rx_ring		*rxr = NULL;
3297	int rsize, tsize, error = E1000_SUCCESS;
3298	int txconf = 0, rxconf = 0;
3299
3300	/* First allocate the top level queue structs */
3301	if (!(adapter->queues =
3302	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3303	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3304		device_printf(dev, "Unable to allocate queue memory\n");
3305		error = ENOMEM;
3306		goto fail;
3307	}
3308
3309	/* Next allocate the TX ring struct memory */
3310	if (!(adapter->tx_rings =
3311	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3312	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3313		device_printf(dev, "Unable to allocate TX ring memory\n");
3314		error = ENOMEM;
3315		goto tx_fail;
3316	}
3317
3318	/* Now allocate the RX */
3319	if (!(adapter->rx_rings =
3320	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3321	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3322		device_printf(dev, "Unable to allocate RX ring memory\n");
3323		error = ENOMEM;
3324		goto rx_fail;
3325	}
3326
3327	tsize = roundup2(adapter->num_tx_desc *
3328	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3329	/*
3330	 * Now set up the TX queues, txconf is needed to handle the
3331	 * possibility that things fail midcourse and we need to
3332	 * undo memory gracefully
3333	 */
3334	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3335		/* Set up some basics */
3336		txr = &adapter->tx_rings[i];
3337		txr->adapter = adapter;
3338		txr->me = i;
3339		txr->num_desc = adapter->num_tx_desc;
3340
3341		/* Initialize the TX lock */
3342		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3343		    device_get_nameunit(dev), txr->me);
3344		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3345
3346		if (igb_dma_malloc(adapter, tsize,
3347			&txr->txdma, BUS_DMA_NOWAIT)) {
3348			device_printf(dev,
3349			    "Unable to allocate TX Descriptor memory\n");
3350			error = ENOMEM;
3351			goto err_tx_desc;
3352		}
3353		txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3354		bzero((void *)txr->tx_base, tsize);
3355
3356        	/* Now allocate transmit buffers for the ring */
3357        	if (igb_allocate_transmit_buffers(txr)) {
3358			device_printf(dev,
3359			    "Critical Failure setting up transmit buffers\n");
3360			error = ENOMEM;
3361			goto err_tx_desc;
3362        	}
3363#ifndef IGB_LEGACY_TX
3364		/* Allocate a buf ring */
3365		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3366		    M_WAITOK, &txr->tx_mtx);
3367#endif
3368	}
3369
3370	/*
3371	 * Next the RX queues...
3372	 */
3373	rsize = roundup2(adapter->num_rx_desc *
3374	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3375	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3376		rxr = &adapter->rx_rings[i];
3377		rxr->adapter = adapter;
3378		rxr->me = i;
3379
3380		/* Initialize the RX lock */
3381		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3382		    device_get_nameunit(dev), txr->me);
3383		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3384
3385		if (igb_dma_malloc(adapter, rsize,
3386			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3387			device_printf(dev,
3388			    "Unable to allocate RxDescriptor memory\n");
3389			error = ENOMEM;
3390			goto err_rx_desc;
3391		}
3392		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3393		bzero((void *)rxr->rx_base, rsize);
3394
3395        	/* Allocate receive buffers for the ring*/
3396		if (igb_allocate_receive_buffers(rxr)) {
3397			device_printf(dev,
3398			    "Critical Failure setting up receive buffers\n");
3399			error = ENOMEM;
3400			goto err_rx_desc;
3401		}
3402	}
3403
3404	/*
3405	** Finally set up the queue holding structs
3406	*/
3407	for (int i = 0; i < adapter->num_queues; i++) {
3408		que = &adapter->queues[i];
3409		que->adapter = adapter;
3410		que->txr = &adapter->tx_rings[i];
3411		que->rxr = &adapter->rx_rings[i];
3412	}
3413
3414	return (0);
3415
3416err_rx_desc:
3417	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3418		igb_dma_free(adapter, &rxr->rxdma);
3419err_tx_desc:
3420	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3421		igb_dma_free(adapter, &txr->txdma);
3422	free(adapter->rx_rings, M_DEVBUF);
3423rx_fail:
3424#ifndef IGB_LEGACY_TX
3425	buf_ring_free(txr->br, M_DEVBUF);
3426#endif
3427	free(adapter->tx_rings, M_DEVBUF);
3428tx_fail:
3429	free(adapter->queues, M_DEVBUF);
3430fail:
3431	return (error);
3432}
3433
3434/*********************************************************************
3435 *
3436 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3437 *  the information needed to transmit a packet on the wire. This is
3438 *  called only once at attach, setup is done every reset.
3439 *
3440 **********************************************************************/
3441static int
3442igb_allocate_transmit_buffers(struct tx_ring *txr)
3443{
3444	struct adapter *adapter = txr->adapter;
3445	device_t dev = adapter->dev;
3446	struct igb_tx_buf *txbuf;
3447	int error, i;
3448
3449	/*
3450	 * Setup DMA descriptor areas.
3451	 */
3452	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3453			       1, 0,			/* alignment, bounds */
3454			       BUS_SPACE_MAXADDR,	/* lowaddr */
3455			       BUS_SPACE_MAXADDR,	/* highaddr */
3456			       NULL, NULL,		/* filter, filterarg */
3457			       IGB_TSO_SIZE,		/* maxsize */
3458			       IGB_MAX_SCATTER,		/* nsegments */
3459			       PAGE_SIZE,		/* maxsegsize */
3460			       0,			/* flags */
3461			       NULL,			/* lockfunc */
3462			       NULL,			/* lockfuncarg */
3463			       &txr->txtag))) {
3464		device_printf(dev,"Unable to allocate TX DMA tag\n");
3465		goto fail;
3466	}
3467
3468	if (!(txr->tx_buffers =
3469	    (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3470	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3471		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3472		error = ENOMEM;
3473		goto fail;
3474	}
3475
3476        /* Create the descriptor buffer dma maps */
3477	txbuf = txr->tx_buffers;
3478	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3479		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3480		if (error != 0) {
3481			device_printf(dev, "Unable to create TX DMA map\n");
3482			goto fail;
3483		}
3484	}
3485
3486	return 0;
3487fail:
3488	/* We free all, it handles case where we are in the middle */
3489	igb_free_transmit_structures(adapter);
3490	return (error);
3491}
3492
3493/*********************************************************************
3494 *
3495 *  Initialize a transmit ring.
3496 *
3497 **********************************************************************/
3498static void
3499igb_setup_transmit_ring(struct tx_ring *txr)
3500{
3501	struct adapter *adapter = txr->adapter;
3502	struct igb_tx_buf *txbuf;
3503	int i;
3504#ifdef DEV_NETMAP
3505	struct netmap_adapter *na = NA(adapter->ifp);
3506	struct netmap_slot *slot;
3507#endif /* DEV_NETMAP */
3508
3509	/* Clear the old descriptor contents */
3510	IGB_TX_LOCK(txr);
3511#ifdef DEV_NETMAP
3512	slot = netmap_reset(na, NR_TX, txr->me, 0);
3513#endif /* DEV_NETMAP */
3514	bzero((void *)txr->tx_base,
3515	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3516	/* Reset indices */
3517	txr->next_avail_desc = 0;
3518	txr->next_to_clean = 0;
3519
3520	/* Free any existing tx buffers. */
3521        txbuf = txr->tx_buffers;
3522	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3523		if (txbuf->m_head != NULL) {
3524			bus_dmamap_sync(txr->txtag, txbuf->map,
3525			    BUS_DMASYNC_POSTWRITE);
3526			bus_dmamap_unload(txr->txtag, txbuf->map);
3527			m_freem(txbuf->m_head);
3528			txbuf->m_head = NULL;
3529		}
3530#ifdef DEV_NETMAP
3531		if (slot) {
3532			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3533			/* no need to set the address */
3534			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3535		}
3536#endif /* DEV_NETMAP */
3537		/* clear the watch index */
3538		txbuf->eop = NULL;
3539        }
3540
3541	/* Set number of descriptors available */
3542	txr->tx_avail = adapter->num_tx_desc;
3543
3544	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3545	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3546	IGB_TX_UNLOCK(txr);
3547}
3548
3549/*********************************************************************
3550 *
3551 *  Initialize all transmit rings.
3552 *
3553 **********************************************************************/
3554static void
3555igb_setup_transmit_structures(struct adapter *adapter)
3556{
3557	struct tx_ring *txr = adapter->tx_rings;
3558
3559	for (int i = 0; i < adapter->num_queues; i++, txr++)
3560		igb_setup_transmit_ring(txr);
3561
3562	return;
3563}
3564
3565/*********************************************************************
3566 *
3567 *  Enable transmit unit.
3568 *
3569 **********************************************************************/
3570static void
3571igb_initialize_transmit_units(struct adapter *adapter)
3572{
3573	struct tx_ring	*txr = adapter->tx_rings;
3574	struct e1000_hw *hw = &adapter->hw;
3575	u32		tctl, txdctl;
3576
3577	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3578	tctl = txdctl = 0;
3579
3580	/* Setup the Tx Descriptor Rings */
3581	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3582		u64 bus_addr = txr->txdma.dma_paddr;
3583
3584		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3585		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3586		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3587		    (uint32_t)(bus_addr >> 32));
3588		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3589		    (uint32_t)bus_addr);
3590
3591		/* Setup the HW Tx Head and Tail descriptor pointers */
3592		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3593		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3594
3595		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3596		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3597		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3598
3599		txr->queue_status = IGB_QUEUE_IDLE;
3600
3601		txdctl |= IGB_TX_PTHRESH;
3602		txdctl |= IGB_TX_HTHRESH << 8;
3603		txdctl |= IGB_TX_WTHRESH << 16;
3604		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3605		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3606	}
3607
3608	if (adapter->vf_ifp)
3609		return;
3610
3611	e1000_config_collision_dist(hw);
3612
3613	/* Program the Transmit Control Register */
3614	tctl = E1000_READ_REG(hw, E1000_TCTL);
3615	tctl &= ~E1000_TCTL_CT;
3616	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3617		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3618
3619	/* This write will effectively turn on the transmit unit. */
3620	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3621}
3622
3623/*********************************************************************
3624 *
3625 *  Free all transmit rings.
3626 *
3627 **********************************************************************/
3628static void
3629igb_free_transmit_structures(struct adapter *adapter)
3630{
3631	struct tx_ring *txr = adapter->tx_rings;
3632
3633	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3634		IGB_TX_LOCK(txr);
3635		igb_free_transmit_buffers(txr);
3636		igb_dma_free(adapter, &txr->txdma);
3637		IGB_TX_UNLOCK(txr);
3638		IGB_TX_LOCK_DESTROY(txr);
3639	}
3640	free(adapter->tx_rings, M_DEVBUF);
3641}
3642
3643/*********************************************************************
3644 *
3645 *  Free transmit ring related data structures.
3646 *
3647 **********************************************************************/
3648static void
3649igb_free_transmit_buffers(struct tx_ring *txr)
3650{
3651	struct adapter *adapter = txr->adapter;
3652	struct igb_tx_buf *tx_buffer;
3653	int             i;
3654
3655	INIT_DEBUGOUT("free_transmit_ring: begin");
3656
3657	if (txr->tx_buffers == NULL)
3658		return;
3659
3660	tx_buffer = txr->tx_buffers;
3661	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3662		if (tx_buffer->m_head != NULL) {
3663			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3664			    BUS_DMASYNC_POSTWRITE);
3665			bus_dmamap_unload(txr->txtag,
3666			    tx_buffer->map);
3667			m_freem(tx_buffer->m_head);
3668			tx_buffer->m_head = NULL;
3669			if (tx_buffer->map != NULL) {
3670				bus_dmamap_destroy(txr->txtag,
3671				    tx_buffer->map);
3672				tx_buffer->map = NULL;
3673			}
3674		} else if (tx_buffer->map != NULL) {
3675			bus_dmamap_unload(txr->txtag,
3676			    tx_buffer->map);
3677			bus_dmamap_destroy(txr->txtag,
3678			    tx_buffer->map);
3679			tx_buffer->map = NULL;
3680		}
3681	}
3682#ifndef IGB_LEGACY_TX
3683	if (txr->br != NULL)
3684		buf_ring_free(txr->br, M_DEVBUF);
3685#endif
3686	if (txr->tx_buffers != NULL) {
3687		free(txr->tx_buffers, M_DEVBUF);
3688		txr->tx_buffers = NULL;
3689	}
3690	if (txr->txtag != NULL) {
3691		bus_dma_tag_destroy(txr->txtag);
3692		txr->txtag = NULL;
3693	}
3694	return;
3695}
3696
3697/**********************************************************************
3698 *
3699 *  Setup work for hardware segmentation offload (TSO) on
3700 *  adapters using advanced tx descriptors
3701 *
3702 **********************************************************************/
3703static int
3704igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3705    u32 *cmd_type_len, u32 *olinfo_status)
3706{
3707	struct adapter *adapter = txr->adapter;
3708	struct e1000_adv_tx_context_desc *TXD;
3709	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3710	u32 mss_l4len_idx = 0, paylen;
3711	u16 vtag = 0, eh_type;
3712	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3713	struct ether_vlan_header *eh;
3714#ifdef INET6
3715	struct ip6_hdr *ip6;
3716#endif
3717#ifdef INET
3718	struct ip *ip;
3719#endif
3720	struct tcphdr *th;
3721
3722
3723	/*
3724	 * Determine where frame payload starts.
3725	 * Jump over vlan headers if already present
3726	 */
3727	eh = mtod(mp, struct ether_vlan_header *);
3728	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3729		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3730		eh_type = eh->evl_proto;
3731	} else {
3732		ehdrlen = ETHER_HDR_LEN;
3733		eh_type = eh->evl_encap_proto;
3734	}
3735
3736	switch (ntohs(eh_type)) {
3737#ifdef INET6
3738	case ETHERTYPE_IPV6:
3739		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3740		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
3741		if (ip6->ip6_nxt != IPPROTO_TCP)
3742			return (ENXIO);
3743		ip_hlen = sizeof(struct ip6_hdr);
3744		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3745		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3746		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3747		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3748		break;
3749#endif
3750#ifdef INET
3751	case ETHERTYPE_IP:
3752		ip = (struct ip *)(mp->m_data + ehdrlen);
3753		if (ip->ip_p != IPPROTO_TCP)
3754			return (ENXIO);
3755		ip->ip_sum = 0;
3756		ip_hlen = ip->ip_hl << 2;
3757		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3758		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3759		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3760		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3761		/* Tell transmit desc to also do IPv4 checksum. */
3762		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3763		break;
3764#endif
3765	default:
3766		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3767		    __func__, ntohs(eh_type));
3768		break;
3769	}
3770
3771	ctxd = txr->next_avail_desc;
3772	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3773
3774	tcp_hlen = th->th_off << 2;
3775
3776	/* This is used in the transmit desc in encap */
3777	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3778
3779	/* VLAN MACLEN IPLEN */
3780	if (mp->m_flags & M_VLANTAG) {
3781		vtag = htole16(mp->m_pkthdr.ether_vtag);
3782                vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3783	}
3784
3785	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3786	vlan_macip_lens |= ip_hlen;
3787	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3788
3789	/* ADV DTYPE TUCMD */
3790	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3791	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3792	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3793
3794	/* MSS L4LEN IDX */
3795	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3796	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3797	/* 82575 needs the queue index added */
3798	if (adapter->hw.mac.type == e1000_82575)
3799		mss_l4len_idx |= txr->me << 4;
3800	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3801
3802	TXD->seqnum_seed = htole32(0);
3803
3804	if (++ctxd == txr->num_desc)
3805		ctxd = 0;
3806
3807	txr->tx_avail--;
3808	txr->next_avail_desc = ctxd;
3809	*cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3810	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3811	*olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3812	++txr->tso_tx;
3813	return (0);
3814}
3815
3816/*********************************************************************
3817 *
3818 *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3819 *
3820 **********************************************************************/
3821
3822static int
3823igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3824    u32 *cmd_type_len, u32 *olinfo_status)
3825{
3826	struct e1000_adv_tx_context_desc *TXD;
3827	struct adapter *adapter = txr->adapter;
3828	struct ether_vlan_header *eh;
3829	struct ip *ip;
3830	struct ip6_hdr *ip6;
3831	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3832	int	ehdrlen, ip_hlen = 0;
3833	u16	etype;
3834	u8	ipproto = 0;
3835	int	offload = TRUE;
3836	int	ctxd = txr->next_avail_desc;
3837	u16	vtag = 0;
3838
3839	/* First check if TSO is to be used */
3840	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3841		return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3842
3843	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3844		offload = FALSE;
3845
3846	/* Indicate the whole packet as payload when not doing TSO */
3847       	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3848
3849	/* Now ready a context descriptor */
3850	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3851
3852	/*
3853	** In advanced descriptors the vlan tag must
3854	** be placed into the context descriptor. Hence
3855	** we need to make one even if not doing offloads.
3856	*/
3857	if (mp->m_flags & M_VLANTAG) {
3858		vtag = htole16(mp->m_pkthdr.ether_vtag);
3859		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3860	} else if (offload == FALSE) /* ... no offload to do */
3861		return (0);
3862
3863	/*
3864	 * Determine where frame payload starts.
3865	 * Jump over vlan headers if already present,
3866	 * helpful for QinQ too.
3867	 */
3868	eh = mtod(mp, struct ether_vlan_header *);
3869	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3870		etype = ntohs(eh->evl_proto);
3871		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3872	} else {
3873		etype = ntohs(eh->evl_encap_proto);
3874		ehdrlen = ETHER_HDR_LEN;
3875	}
3876
3877	/* Set the ether header length */
3878	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3879
3880	switch (etype) {
3881		case ETHERTYPE_IP:
3882			ip = (struct ip *)(mp->m_data + ehdrlen);
3883			ip_hlen = ip->ip_hl << 2;
3884			ipproto = ip->ip_p;
3885			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3886			break;
3887		case ETHERTYPE_IPV6:
3888			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3889			ip_hlen = sizeof(struct ip6_hdr);
3890			/* XXX-BZ this will go badly in case of ext hdrs. */
3891			ipproto = ip6->ip6_nxt;
3892			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3893			break;
3894		default:
3895			offload = FALSE;
3896			break;
3897	}
3898
3899	vlan_macip_lens |= ip_hlen;
3900	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3901
3902	switch (ipproto) {
3903		case IPPROTO_TCP:
3904			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3905				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3906			break;
3907		case IPPROTO_UDP:
3908			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3909				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3910			break;
3911
3912#if __FreeBSD_version >= 800000
3913		case IPPROTO_SCTP:
3914			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3915				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3916			break;
3917#endif
3918		default:
3919			offload = FALSE;
3920			break;
3921	}
3922
3923	if (offload) /* For the TX descriptor setup */
3924		*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3925
3926	/* 82575 needs the queue index added */
3927	if (adapter->hw.mac.type == e1000_82575)
3928		mss_l4len_idx = txr->me << 4;
3929
3930	/* Now copy bits into descriptor */
3931	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3932	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3933	TXD->seqnum_seed = htole32(0);
3934	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3935
3936	/* We've consumed the first desc, adjust counters */
3937	if (++ctxd == txr->num_desc)
3938		ctxd = 0;
3939	txr->next_avail_desc = ctxd;
3940	--txr->tx_avail;
3941
3942        return (0);
3943}
3944
3945/**********************************************************************
3946 *
3947 *  Examine each tx_buffer in the used queue. If the hardware is done
3948 *  processing the packet then free associated resources. The
3949 *  tx_buffer is put back on the free queue.
3950 *
3951 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3952 **********************************************************************/
3953static bool
3954igb_txeof(struct tx_ring *txr)
3955{
3956	struct adapter		*adapter = txr->adapter;
3957	struct ifnet		*ifp = adapter->ifp;
3958	u32			work, processed = 0;
3959	u16			limit = txr->process_limit;
3960	struct igb_tx_buf	*buf;
3961	union e1000_adv_tx_desc *txd;
3962
3963	mtx_assert(&txr->tx_mtx, MA_OWNED);
3964
3965#ifdef DEV_NETMAP
3966	if (netmap_tx_irq(ifp, txr->me))
3967		return (FALSE);
3968#endif /* DEV_NETMAP */
3969
3970	if (txr->tx_avail == txr->num_desc) {
3971		txr->queue_status = IGB_QUEUE_IDLE;
3972		return FALSE;
3973	}
3974
3975	/* Get work starting point */
3976	work = txr->next_to_clean;
3977	buf = &txr->tx_buffers[work];
3978	txd = &txr->tx_base[work];
3979	work -= txr->num_desc; /* The distance to ring end */
3980        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3981            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3982	do {
3983		union e1000_adv_tx_desc *eop = buf->eop;
3984		if (eop == NULL) /* No work */
3985			break;
3986
3987		if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
3988			break;	/* I/O not complete */
3989
3990		if (buf->m_head) {
3991			txr->bytes +=
3992			    buf->m_head->m_pkthdr.len;
3993			bus_dmamap_sync(txr->txtag,
3994			    buf->map,
3995			    BUS_DMASYNC_POSTWRITE);
3996			bus_dmamap_unload(txr->txtag,
3997			    buf->map);
3998			m_freem(buf->m_head);
3999			buf->m_head = NULL;
4000		}
4001		buf->eop = NULL;
4002		++txr->tx_avail;
4003
4004		/* We clean the range if multi segment */
4005		while (txd != eop) {
4006			++txd;
4007			++buf;
4008			++work;
4009			/* wrap the ring? */
4010			if (__predict_false(!work)) {
4011				work -= txr->num_desc;
4012				buf = txr->tx_buffers;
4013				txd = txr->tx_base;
4014			}
4015			if (buf->m_head) {
4016				txr->bytes +=
4017				    buf->m_head->m_pkthdr.len;
4018				bus_dmamap_sync(txr->txtag,
4019				    buf->map,
4020				    BUS_DMASYNC_POSTWRITE);
4021				bus_dmamap_unload(txr->txtag,
4022				    buf->map);
4023				m_freem(buf->m_head);
4024				buf->m_head = NULL;
4025			}
4026			++txr->tx_avail;
4027			buf->eop = NULL;
4028
4029		}
4030		++txr->packets;
4031		++processed;
4032		++ifp->if_opackets;
4033		txr->watchdog_time = ticks;
4034
4035		/* Try the next packet */
4036		++txd;
4037		++buf;
4038		++work;
4039		/* reset with a wrap */
4040		if (__predict_false(!work)) {
4041			work -= txr->num_desc;
4042			buf = txr->tx_buffers;
4043			txd = txr->tx_base;
4044		}
4045		prefetch(txd);
4046	} while (__predict_true(--limit));
4047
4048	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4049	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4050
4051	work += txr->num_desc;
4052	txr->next_to_clean = work;
4053
4054	/*
4055	** Watchdog calculation, we know there's
4056	** work outstanding or the first return
4057	** would have been taken, so none processed
4058	** for too long indicates a hang.
4059	*/
4060	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4061		txr->queue_status |= IGB_QUEUE_HUNG;
4062
4063	if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4064		txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4065
4066	if (txr->tx_avail == txr->num_desc) {
4067		txr->queue_status = IGB_QUEUE_IDLE;
4068		return (FALSE);
4069	}
4070
4071	return (TRUE);
4072}
4073
4074/*********************************************************************
4075 *
4076 *  Refresh mbuf buffers for RX descriptor rings
4077 *   - now keeps its own state so discards due to resource
4078 *     exhaustion are unnecessary, if an mbuf cannot be obtained
4079 *     it just returns, keeping its placeholder, thus it can simply
4080 *     be recalled to try again.
4081 *
4082 **********************************************************************/
4083static void
4084igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4085{
4086	struct adapter		*adapter = rxr->adapter;
4087	bus_dma_segment_t	hseg[1];
4088	bus_dma_segment_t	pseg[1];
4089	struct igb_rx_buf	*rxbuf;
4090	struct mbuf		*mh, *mp;
4091	int			i, j, nsegs, error;
4092	bool			refreshed = FALSE;
4093
4094	i = j = rxr->next_to_refresh;
4095	/*
4096	** Get one descriptor beyond
4097	** our work mark to control
4098	** the loop.
4099        */
4100	if (++j == adapter->num_rx_desc)
4101		j = 0;
4102
4103	while (j != limit) {
4104		rxbuf = &rxr->rx_buffers[i];
4105		/* No hdr mbuf used with header split off */
4106		if (rxr->hdr_split == FALSE)
4107			goto no_split;
4108		if (rxbuf->m_head == NULL) {
4109			mh = m_gethdr(M_NOWAIT, MT_DATA);
4110			if (mh == NULL)
4111				goto update;
4112		} else
4113			mh = rxbuf->m_head;
4114
4115		mh->m_pkthdr.len = mh->m_len = MHLEN;
4116		mh->m_len = MHLEN;
4117		mh->m_flags |= M_PKTHDR;
4118		/* Get the memory mapping */
4119		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4120		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4121		if (error != 0) {
4122			printf("Refresh mbufs: hdr dmamap load"
4123			    " failure - %d\n", error);
4124			m_free(mh);
4125			rxbuf->m_head = NULL;
4126			goto update;
4127		}
4128		rxbuf->m_head = mh;
4129		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4130		    BUS_DMASYNC_PREREAD);
4131		rxr->rx_base[i].read.hdr_addr =
4132		    htole64(hseg[0].ds_addr);
4133no_split:
4134		if (rxbuf->m_pack == NULL) {
4135			mp = m_getjcl(M_NOWAIT, MT_DATA,
4136			    M_PKTHDR, adapter->rx_mbuf_sz);
4137			if (mp == NULL)
4138				goto update;
4139		} else
4140			mp = rxbuf->m_pack;
4141
4142		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4143		/* Get the memory mapping */
4144		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4145		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4146		if (error != 0) {
4147			printf("Refresh mbufs: payload dmamap load"
4148			    " failure - %d\n", error);
4149			m_free(mp);
4150			rxbuf->m_pack = NULL;
4151			goto update;
4152		}
4153		rxbuf->m_pack = mp;
4154		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4155		    BUS_DMASYNC_PREREAD);
4156		rxr->rx_base[i].read.pkt_addr =
4157		    htole64(pseg[0].ds_addr);
4158		refreshed = TRUE; /* I feel wefreshed :) */
4159
4160		i = j; /* our next is precalculated */
4161		rxr->next_to_refresh = i;
4162		if (++j == adapter->num_rx_desc)
4163			j = 0;
4164	}
4165update:
4166	if (refreshed) /* update tail */
4167		E1000_WRITE_REG(&adapter->hw,
4168		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4169	return;
4170}
4171
4172
4173/*********************************************************************
4174 *
4175 *  Allocate memory for rx_buffer structures. Since we use one
4176 *  rx_buffer per received packet, the maximum number of rx_buffer's
4177 *  that we'll need is equal to the number of receive descriptors
4178 *  that we've allocated.
4179 *
4180 **********************************************************************/
4181static int
4182igb_allocate_receive_buffers(struct rx_ring *rxr)
4183{
4184	struct	adapter 	*adapter = rxr->adapter;
4185	device_t 		dev = adapter->dev;
4186	struct igb_rx_buf	*rxbuf;
4187	int             	i, bsize, error;
4188
4189	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4190	if (!(rxr->rx_buffers =
4191	    (struct igb_rx_buf *) malloc(bsize,
4192	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4193		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4194		error = ENOMEM;
4195		goto fail;
4196	}
4197
4198	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4199				   1, 0,		/* alignment, bounds */
4200				   BUS_SPACE_MAXADDR,	/* lowaddr */
4201				   BUS_SPACE_MAXADDR,	/* highaddr */
4202				   NULL, NULL,		/* filter, filterarg */
4203				   MSIZE,		/* maxsize */
4204				   1,			/* nsegments */
4205				   MSIZE,		/* maxsegsize */
4206				   0,			/* flags */
4207				   NULL,		/* lockfunc */
4208				   NULL,		/* lockfuncarg */
4209				   &rxr->htag))) {
4210		device_printf(dev, "Unable to create RX DMA tag\n");
4211		goto fail;
4212	}
4213
4214	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4215				   1, 0,		/* alignment, bounds */
4216				   BUS_SPACE_MAXADDR,	/* lowaddr */
4217				   BUS_SPACE_MAXADDR,	/* highaddr */
4218				   NULL, NULL,		/* filter, filterarg */
4219				   MJUM9BYTES,		/* maxsize */
4220				   1,			/* nsegments */
4221				   MJUM9BYTES,		/* maxsegsize */
4222				   0,			/* flags */
4223				   NULL,		/* lockfunc */
4224				   NULL,		/* lockfuncarg */
4225				   &rxr->ptag))) {
4226		device_printf(dev, "Unable to create RX payload DMA tag\n");
4227		goto fail;
4228	}
4229
4230	for (i = 0; i < adapter->num_rx_desc; i++) {
4231		rxbuf = &rxr->rx_buffers[i];
4232		error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4233		if (error) {
4234			device_printf(dev,
4235			    "Unable to create RX head DMA maps\n");
4236			goto fail;
4237		}
4238		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4239		if (error) {
4240			device_printf(dev,
4241			    "Unable to create RX packet DMA maps\n");
4242			goto fail;
4243		}
4244	}
4245
4246	return (0);
4247
4248fail:
4249	/* Frees all, but can handle partial completion */
4250	igb_free_receive_structures(adapter);
4251	return (error);
4252}
4253
4254
4255static void
4256igb_free_receive_ring(struct rx_ring *rxr)
4257{
4258	struct	adapter		*adapter = rxr->adapter;
4259	struct igb_rx_buf	*rxbuf;
4260
4261
4262	for (int i = 0; i < adapter->num_rx_desc; i++) {
4263		rxbuf = &rxr->rx_buffers[i];
4264		if (rxbuf->m_head != NULL) {
4265			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4266			    BUS_DMASYNC_POSTREAD);
4267			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4268			rxbuf->m_head->m_flags |= M_PKTHDR;
4269			m_freem(rxbuf->m_head);
4270		}
4271		if (rxbuf->m_pack != NULL) {
4272			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4273			    BUS_DMASYNC_POSTREAD);
4274			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4275			rxbuf->m_pack->m_flags |= M_PKTHDR;
4276			m_freem(rxbuf->m_pack);
4277		}
4278		rxbuf->m_head = NULL;
4279		rxbuf->m_pack = NULL;
4280	}
4281}
4282
4283
4284/*********************************************************************
4285 *
4286 *  Initialize a receive ring and its buffers.
4287 *
4288 **********************************************************************/
4289static int
4290igb_setup_receive_ring(struct rx_ring *rxr)
4291{
4292	struct	adapter		*adapter;
4293	struct  ifnet		*ifp;
4294	device_t		dev;
4295	struct igb_rx_buf	*rxbuf;
4296	bus_dma_segment_t	pseg[1], hseg[1];
4297	struct lro_ctrl		*lro = &rxr->lro;
4298	int			rsize, nsegs, error = 0;
4299#ifdef DEV_NETMAP
4300	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4301	struct netmap_slot *slot;
4302#endif /* DEV_NETMAP */
4303
4304	adapter = rxr->adapter;
4305	dev = adapter->dev;
4306	ifp = adapter->ifp;
4307
4308	/* Clear the ring contents */
4309	IGB_RX_LOCK(rxr);
4310#ifdef DEV_NETMAP
4311	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4312#endif /* DEV_NETMAP */
4313	rsize = roundup2(adapter->num_rx_desc *
4314	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4315	bzero((void *)rxr->rx_base, rsize);
4316
4317	/*
4318	** Free current RX buffer structures and their mbufs
4319	*/
4320	igb_free_receive_ring(rxr);
4321
4322	/* Configure for header split? */
4323	if (igb_header_split)
4324		rxr->hdr_split = TRUE;
4325
4326        /* Now replenish the ring mbufs */
4327	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4328		struct mbuf	*mh, *mp;
4329
4330		rxbuf = &rxr->rx_buffers[j];
4331#ifdef DEV_NETMAP
4332		if (slot) {
4333			/* slot sj is mapped to the i-th NIC-ring entry */
4334			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4335			uint64_t paddr;
4336			void *addr;
4337
4338			addr = PNMB(na, slot + sj, &paddr);
4339			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4340			/* Update descriptor */
4341			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4342			continue;
4343		}
4344#endif /* DEV_NETMAP */
4345		if (rxr->hdr_split == FALSE)
4346			goto skip_head;
4347
4348		/* First the header */
4349		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4350		if (rxbuf->m_head == NULL) {
4351			error = ENOBUFS;
4352                        goto fail;
4353		}
4354		m_adj(rxbuf->m_head, ETHER_ALIGN);
4355		mh = rxbuf->m_head;
4356		mh->m_len = mh->m_pkthdr.len = MHLEN;
4357		mh->m_flags |= M_PKTHDR;
4358		/* Get the memory mapping */
4359		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4360		    rxbuf->hmap, rxbuf->m_head, hseg,
4361		    &nsegs, BUS_DMA_NOWAIT);
4362		if (error != 0) /* Nothing elegant to do here */
4363                        goto fail;
4364		bus_dmamap_sync(rxr->htag,
4365		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4366		/* Update descriptor */
4367		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4368
4369skip_head:
4370		/* Now the payload cluster */
4371		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4372		    M_PKTHDR, adapter->rx_mbuf_sz);
4373		if (rxbuf->m_pack == NULL) {
4374			error = ENOBUFS;
4375                        goto fail;
4376		}
4377		mp = rxbuf->m_pack;
4378		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4379		/* Get the memory mapping */
4380		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4381		    rxbuf->pmap, mp, pseg,
4382		    &nsegs, BUS_DMA_NOWAIT);
4383		if (error != 0)
4384                        goto fail;
4385		bus_dmamap_sync(rxr->ptag,
4386		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4387		/* Update descriptor */
4388		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4389        }
4390
4391	/* Setup our descriptor indices */
4392	rxr->next_to_check = 0;
4393	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4394	rxr->lro_enabled = FALSE;
4395	rxr->rx_split_packets = 0;
4396	rxr->rx_bytes = 0;
4397
4398	rxr->fmp = NULL;
4399	rxr->lmp = NULL;
4400	rxr->discard = FALSE;
4401
4402	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4403	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4404
4405	/*
4406	** Now set up the LRO interface, we
4407	** also only do head split when LRO
4408	** is enabled, since so often they
4409	** are undesireable in similar setups.
4410	*/
4411	if (ifp->if_capenable & IFCAP_LRO) {
4412		error = tcp_lro_init(lro);
4413		if (error) {
4414			device_printf(dev, "LRO Initialization failed!\n");
4415			goto fail;
4416		}
4417		INIT_DEBUGOUT("RX LRO Initialized\n");
4418		rxr->lro_enabled = TRUE;
4419		lro->ifp = adapter->ifp;
4420	}
4421
4422	IGB_RX_UNLOCK(rxr);
4423	return (0);
4424
4425fail:
4426	igb_free_receive_ring(rxr);
4427	IGB_RX_UNLOCK(rxr);
4428	return (error);
4429}
4430
4431
4432/*********************************************************************
4433 *
4434 *  Initialize all receive rings.
4435 *
4436 **********************************************************************/
4437static int
4438igb_setup_receive_structures(struct adapter *adapter)
4439{
4440	struct rx_ring *rxr = adapter->rx_rings;
4441	int i;
4442
4443	for (i = 0; i < adapter->num_queues; i++, rxr++)
4444		if (igb_setup_receive_ring(rxr))
4445			goto fail;
4446
4447	return (0);
4448fail:
4449	/*
4450	 * Free RX buffers allocated so far, we will only handle
4451	 * the rings that completed, the failing case will have
4452	 * cleaned up for itself. 'i' is the endpoint.
4453	 */
4454	for (int j = 0; j < i; ++j) {
4455		rxr = &adapter->rx_rings[j];
4456		IGB_RX_LOCK(rxr);
4457		igb_free_receive_ring(rxr);
4458		IGB_RX_UNLOCK(rxr);
4459	}
4460
4461	return (ENOBUFS);
4462}
4463
4464/*********************************************************************
4465 *
4466 *  Enable receive unit.
4467 *
4468 **********************************************************************/
4469static void
4470igb_initialize_receive_units(struct adapter *adapter)
4471{
4472	struct rx_ring	*rxr = adapter->rx_rings;
4473	struct ifnet	*ifp = adapter->ifp;
4474	struct e1000_hw *hw = &adapter->hw;
4475	u32		rctl, rxcsum, psize, srrctl = 0;
4476
4477	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4478
4479	/*
4480	 * Make sure receives are disabled while setting
4481	 * up the descriptor ring
4482	 */
4483	rctl = E1000_READ_REG(hw, E1000_RCTL);
4484	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4485
4486	/*
4487	** Set up for header split
4488	*/
4489	if (igb_header_split) {
4490		/* Use a standard mbuf for the header */
4491		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4492		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4493	} else
4494		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4495
4496	/*
4497	** Set up for jumbo frames
4498	*/
4499	if (ifp->if_mtu > ETHERMTU) {
4500		rctl |= E1000_RCTL_LPE;
4501		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4502			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4503			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4504		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4505			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4506			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4507		}
4508		/* Set maximum packet len */
4509		psize = adapter->max_frame_size;
4510		/* are we on a vlan? */
4511		if (adapter->ifp->if_vlantrunk != NULL)
4512			psize += VLAN_TAG_SIZE;
4513		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4514	} else {
4515		rctl &= ~E1000_RCTL_LPE;
4516		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4517		rctl |= E1000_RCTL_SZ_2048;
4518	}
4519
4520	/* Setup the Base and Length of the Rx Descriptor Rings */
4521	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4522		u64 bus_addr = rxr->rxdma.dma_paddr;
4523		u32 rxdctl;
4524
4525		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4526		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4527		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4528		    (uint32_t)(bus_addr >> 32));
4529		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4530		    (uint32_t)bus_addr);
4531		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4532		/* Enable this Queue */
4533		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4534		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4535		rxdctl &= 0xFFF00000;
4536		rxdctl |= IGB_RX_PTHRESH;
4537		rxdctl |= IGB_RX_HTHRESH << 8;
4538		rxdctl |= IGB_RX_WTHRESH << 16;
4539		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4540	}
4541
4542	/*
4543	** Setup for RX MultiQueue
4544	*/
4545	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4546	if (adapter->num_queues >1) {
4547		u32 random[10], mrqc, shift = 0;
4548		union igb_reta {
4549			u32 dword;
4550			u8  bytes[4];
4551		} reta;
4552
4553		arc4rand(&random, sizeof(random), 0);
4554		if (adapter->hw.mac.type == e1000_82575)
4555			shift = 6;
4556		/* Warning FM follows */
4557		for (int i = 0; i < 128; i++) {
4558			reta.bytes[i & 3] =
4559			    (i % adapter->num_queues) << shift;
4560			if ((i & 3) == 3)
4561				E1000_WRITE_REG(hw,
4562				    E1000_RETA(i >> 2), reta.dword);
4563		}
4564		/* Now fill in hash table */
4565		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4566		for (int i = 0; i < 10; i++)
4567			E1000_WRITE_REG_ARRAY(hw,
4568			    E1000_RSSRK(0), i, random[i]);
4569
4570		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4571		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4572		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4573		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4574		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4575		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4576		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4577		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4578
4579		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4580
4581		/*
4582		** NOTE: Receive Full-Packet Checksum Offload
4583		** is mutually exclusive with Multiqueue. However
4584		** this is not the same as TCP/IP checksums which
4585		** still work.
4586		*/
4587		rxcsum |= E1000_RXCSUM_PCSD;
4588#if __FreeBSD_version >= 800000
4589		/* For SCTP Offload */
4590		if ((hw->mac.type == e1000_82576)
4591		    && (ifp->if_capenable & IFCAP_RXCSUM))
4592			rxcsum |= E1000_RXCSUM_CRCOFL;
4593#endif
4594	} else {
4595		/* Non RSS setup */
4596		if (ifp->if_capenable & IFCAP_RXCSUM) {
4597			rxcsum |= E1000_RXCSUM_IPPCSE;
4598#if __FreeBSD_version >= 800000
4599			if (adapter->hw.mac.type == e1000_82576)
4600				rxcsum |= E1000_RXCSUM_CRCOFL;
4601#endif
4602		} else
4603			rxcsum &= ~E1000_RXCSUM_TUOFL;
4604	}
4605	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4606
4607	/* Setup the Receive Control Register */
4608	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4609	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4610		   E1000_RCTL_RDMTS_HALF |
4611		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4612	/* Strip CRC bytes. */
4613	rctl |= E1000_RCTL_SECRC;
4614	/* Make sure VLAN Filters are off */
4615	rctl &= ~E1000_RCTL_VFE;
4616	/* Don't store bad packets */
4617	rctl &= ~E1000_RCTL_SBP;
4618
4619	/* Enable Receives */
4620	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4621
4622	/*
4623	 * Setup the HW Rx Head and Tail Descriptor Pointers
4624	 *   - needs to be after enable
4625	 */
4626	for (int i = 0; i < adapter->num_queues; i++) {
4627		rxr = &adapter->rx_rings[i];
4628		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4629#ifdef DEV_NETMAP
4630		/*
4631		 * an init() while a netmap client is active must
4632		 * preserve the rx buffers passed to userspace.
4633		 * In this driver it means we adjust RDT to
4634		 * something different from next_to_refresh
4635		 * (which is not used in netmap mode).
4636		 */
4637		if (ifp->if_capenable & IFCAP_NETMAP) {
4638			struct netmap_adapter *na = NA(adapter->ifp);
4639			struct netmap_kring *kring = &na->rx_rings[i];
4640			int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4641
4642			if (t >= adapter->num_rx_desc)
4643				t -= adapter->num_rx_desc;
4644			else if (t < 0)
4645				t += adapter->num_rx_desc;
4646			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4647		} else
4648#endif /* DEV_NETMAP */
4649		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4650	}
4651	return;
4652}
4653
4654/*********************************************************************
4655 *
4656 *  Free receive rings.
4657 *
4658 **********************************************************************/
4659static void
4660igb_free_receive_structures(struct adapter *adapter)
4661{
4662	struct rx_ring *rxr = adapter->rx_rings;
4663
4664	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4665		struct lro_ctrl	*lro = &rxr->lro;
4666		igb_free_receive_buffers(rxr);
4667		tcp_lro_free(lro);
4668		igb_dma_free(adapter, &rxr->rxdma);
4669	}
4670
4671	free(adapter->rx_rings, M_DEVBUF);
4672}
4673
4674/*********************************************************************
4675 *
4676 *  Free receive ring data structures.
4677 *
4678 **********************************************************************/
4679static void
4680igb_free_receive_buffers(struct rx_ring *rxr)
4681{
4682	struct adapter		*adapter = rxr->adapter;
4683	struct igb_rx_buf	*rxbuf;
4684	int i;
4685
4686	INIT_DEBUGOUT("free_receive_structures: begin");
4687
4688	/* Cleanup any existing buffers */
4689	if (rxr->rx_buffers != NULL) {
4690		for (i = 0; i < adapter->num_rx_desc; i++) {
4691			rxbuf = &rxr->rx_buffers[i];
4692			if (rxbuf->m_head != NULL) {
4693				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4694				    BUS_DMASYNC_POSTREAD);
4695				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4696				rxbuf->m_head->m_flags |= M_PKTHDR;
4697				m_freem(rxbuf->m_head);
4698			}
4699			if (rxbuf->m_pack != NULL) {
4700				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4701				    BUS_DMASYNC_POSTREAD);
4702				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4703				rxbuf->m_pack->m_flags |= M_PKTHDR;
4704				m_freem(rxbuf->m_pack);
4705			}
4706			rxbuf->m_head = NULL;
4707			rxbuf->m_pack = NULL;
4708			if (rxbuf->hmap != NULL) {
4709				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4710				rxbuf->hmap = NULL;
4711			}
4712			if (rxbuf->pmap != NULL) {
4713				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4714				rxbuf->pmap = NULL;
4715			}
4716		}
4717		if (rxr->rx_buffers != NULL) {
4718			free(rxr->rx_buffers, M_DEVBUF);
4719			rxr->rx_buffers = NULL;
4720		}
4721	}
4722
4723	if (rxr->htag != NULL) {
4724		bus_dma_tag_destroy(rxr->htag);
4725		rxr->htag = NULL;
4726	}
4727	if (rxr->ptag != NULL) {
4728		bus_dma_tag_destroy(rxr->ptag);
4729		rxr->ptag = NULL;
4730	}
4731}
4732
4733static __inline void
4734igb_rx_discard(struct rx_ring *rxr, int i)
4735{
4736	struct igb_rx_buf	*rbuf;
4737
4738	rbuf = &rxr->rx_buffers[i];
4739
4740	/* Partially received? Free the chain */
4741	if (rxr->fmp != NULL) {
4742		rxr->fmp->m_flags |= M_PKTHDR;
4743		m_freem(rxr->fmp);
4744		rxr->fmp = NULL;
4745		rxr->lmp = NULL;
4746	}
4747
4748	/*
4749	** With advanced descriptors the writeback
4750	** clobbers the buffer addrs, so its easier
4751	** to just free the existing mbufs and take
4752	** the normal refresh path to get new buffers
4753	** and mapping.
4754	*/
4755	if (rbuf->m_head) {
4756		m_free(rbuf->m_head);
4757		rbuf->m_head = NULL;
4758		bus_dmamap_unload(rxr->htag, rbuf->hmap);
4759	}
4760
4761	if (rbuf->m_pack) {
4762		m_free(rbuf->m_pack);
4763		rbuf->m_pack = NULL;
4764		bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4765	}
4766
4767	return;
4768}
4769
4770static __inline void
4771igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4772{
4773
4774	/*
4775	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4776	 * should be computed by hardware. Also it should not have VLAN tag in
4777	 * ethernet header.
4778	 */
4779	if (rxr->lro_enabled &&
4780	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4781	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4782	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4783	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4784	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4785	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4786		/*
4787		 * Send to the stack if:
4788		 **  - LRO not enabled, or
4789		 **  - no LRO resources, or
4790		 **  - lro enqueue fails
4791		 */
4792		if (rxr->lro.lro_cnt != 0)
4793			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4794				return;
4795	}
4796	IGB_RX_UNLOCK(rxr);
4797	(*ifp->if_input)(ifp, m);
4798	IGB_RX_LOCK(rxr);
4799}
4800
4801/*********************************************************************
4802 *
4803 *  This routine executes in interrupt context. It replenishes
4804 *  the mbufs in the descriptor and sends data which has been
4805 *  dma'ed into host memory to upper layer.
4806 *
4807 *  We loop at most count times if count is > 0, or until done if
4808 *  count < 0.
4809 *
4810 *  Return TRUE if more to clean, FALSE otherwise
4811 *********************************************************************/
4812static bool
4813igb_rxeof(struct igb_queue *que, int count, int *done)
4814{
4815	struct adapter		*adapter = que->adapter;
4816	struct rx_ring		*rxr = que->rxr;
4817	struct ifnet		*ifp = adapter->ifp;
4818	struct lro_ctrl		*lro = &rxr->lro;
4819	struct lro_entry	*queued;
4820	int			i, processed = 0, rxdone = 0;
4821	u32			ptype, staterr = 0;
4822	union e1000_adv_rx_desc	*cur;
4823
4824	IGB_RX_LOCK(rxr);
4825	/* Sync the ring. */
4826	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4827	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4828
4829#ifdef DEV_NETMAP
4830	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4831		IGB_RX_UNLOCK(rxr);
4832		return (FALSE);
4833	}
4834#endif /* DEV_NETMAP */
4835
4836	/* Main clean loop */
4837	for (i = rxr->next_to_check; count != 0;) {
4838		struct mbuf		*sendmp, *mh, *mp;
4839		struct igb_rx_buf	*rxbuf;
4840		u16			hlen, plen, hdr, vtag;
4841		bool			eop = FALSE;
4842
4843		cur = &rxr->rx_base[i];
4844		staterr = le32toh(cur->wb.upper.status_error);
4845		if ((staterr & E1000_RXD_STAT_DD) == 0)
4846			break;
4847		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4848			break;
4849		count--;
4850		sendmp = mh = mp = NULL;
4851		cur->wb.upper.status_error = 0;
4852		rxbuf = &rxr->rx_buffers[i];
4853		plen = le16toh(cur->wb.upper.length);
4854		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4855		if (((adapter->hw.mac.type == e1000_i350) ||
4856		    (adapter->hw.mac.type == e1000_i354)) &&
4857		    (staterr & E1000_RXDEXT_STATERR_LB))
4858			vtag = be16toh(cur->wb.upper.vlan);
4859		else
4860			vtag = le16toh(cur->wb.upper.vlan);
4861		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4862		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4863
4864		/* Make sure all segments of a bad packet are discarded */
4865		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4866		    (rxr->discard)) {
4867			adapter->dropped_pkts++;
4868			++rxr->rx_discarded;
4869			if (!eop) /* Catch subsequent segs */
4870				rxr->discard = TRUE;
4871			else
4872				rxr->discard = FALSE;
4873			igb_rx_discard(rxr, i);
4874			goto next_desc;
4875		}
4876
4877		/*
4878		** The way the hardware is configured to
4879		** split, it will ONLY use the header buffer
4880		** when header split is enabled, otherwise we
4881		** get normal behavior, ie, both header and
4882		** payload are DMA'd into the payload buffer.
4883		**
4884		** The fmp test is to catch the case where a
4885		** packet spans multiple descriptors, in that
4886		** case only the first header is valid.
4887		*/
4888		if (rxr->hdr_split && rxr->fmp == NULL) {
4889			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4890			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4891			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4892			if (hlen > IGB_HDR_BUF)
4893				hlen = IGB_HDR_BUF;
4894			mh = rxr->rx_buffers[i].m_head;
4895			mh->m_len = hlen;
4896			/* clear buf pointer for refresh */
4897			rxbuf->m_head = NULL;
4898			/*
4899			** Get the payload length, this
4900			** could be zero if its a small
4901			** packet.
4902			*/
4903			if (plen > 0) {
4904				mp = rxr->rx_buffers[i].m_pack;
4905				mp->m_len = plen;
4906				mh->m_next = mp;
4907				/* clear buf pointer */
4908				rxbuf->m_pack = NULL;
4909				rxr->rx_split_packets++;
4910			}
4911		} else {
4912			/*
4913			** Either no header split, or a
4914			** secondary piece of a fragmented
4915			** split packet.
4916			*/
4917			mh = rxr->rx_buffers[i].m_pack;
4918			mh->m_len = plen;
4919			/* clear buf info for refresh */
4920			rxbuf->m_pack = NULL;
4921		}
4922		bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4923
4924		++processed; /* So we know when to refresh */
4925
4926		/* Initial frame - setup */
4927		if (rxr->fmp == NULL) {
4928			mh->m_pkthdr.len = mh->m_len;
4929			/* Save the head of the chain */
4930			rxr->fmp = mh;
4931			rxr->lmp = mh;
4932			if (mp != NULL) {
4933				/* Add payload if split */
4934				mh->m_pkthdr.len += mp->m_len;
4935				rxr->lmp = mh->m_next;
4936			}
4937		} else {
4938			/* Chain mbuf's together */
4939			rxr->lmp->m_next = mh;
4940			rxr->lmp = rxr->lmp->m_next;
4941			rxr->fmp->m_pkthdr.len += mh->m_len;
4942		}
4943
4944		if (eop) {
4945			rxr->fmp->m_pkthdr.rcvif = ifp;
4946			ifp->if_ipackets++;
4947			rxr->rx_packets++;
4948			/* capture data for AIM */
4949			rxr->packets++;
4950			rxr->bytes += rxr->fmp->m_pkthdr.len;
4951			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4952
4953			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4954				igb_rx_checksum(staterr, rxr->fmp, ptype);
4955
4956			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4957			    (staterr & E1000_RXD_STAT_VP) != 0) {
4958				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4959				rxr->fmp->m_flags |= M_VLANTAG;
4960			}
4961#ifndef IGB_LEGACY_TX
4962			rxr->fmp->m_pkthdr.flowid = que->msix;
4963			rxr->fmp->m_flags |= M_FLOWID;
4964#endif
4965			sendmp = rxr->fmp;
4966			/* Make sure to set M_PKTHDR. */
4967			sendmp->m_flags |= M_PKTHDR;
4968			rxr->fmp = NULL;
4969			rxr->lmp = NULL;
4970		}
4971
4972next_desc:
4973		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4974		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4975
4976		/* Advance our pointers to the next descriptor. */
4977		if (++i == adapter->num_rx_desc)
4978			i = 0;
4979		/*
4980		** Send to the stack or LRO
4981		*/
4982		if (sendmp != NULL) {
4983			rxr->next_to_check = i;
4984			igb_rx_input(rxr, ifp, sendmp, ptype);
4985			i = rxr->next_to_check;
4986			rxdone++;
4987		}
4988
4989		/* Every 8 descriptors we go to refresh mbufs */
4990		if (processed == 8) {
4991                        igb_refresh_mbufs(rxr, i);
4992                        processed = 0;
4993		}
4994	}
4995
4996	/* Catch any remainders */
4997	if (igb_rx_unrefreshed(rxr))
4998		igb_refresh_mbufs(rxr, i);
4999
5000	rxr->next_to_check = i;
5001
5002	/*
5003	 * Flush any outstanding LRO work
5004	 */
5005	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5006		SLIST_REMOVE_HEAD(&lro->lro_active, next);
5007		tcp_lro_flush(lro, queued);
5008	}
5009
5010	if (done != NULL)
5011		*done += rxdone;
5012
5013	IGB_RX_UNLOCK(rxr);
5014	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5015}
5016
5017/*********************************************************************
5018 *
5019 *  Verify that the hardware indicated that the checksum is valid.
5020 *  Inform the stack about the status of checksum so that stack
5021 *  doesn't spend time verifying the checksum.
5022 *
5023 *********************************************************************/
5024static void
5025igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5026{
5027	u16 status = (u16)staterr;
5028	u8  errors = (u8) (staterr >> 24);
5029	int sctp;
5030
5031	/* Ignore Checksum bit is set */
5032	if (status & E1000_RXD_STAT_IXSM) {
5033		mp->m_pkthdr.csum_flags = 0;
5034		return;
5035	}
5036
5037	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5038	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5039		sctp = 1;
5040	else
5041		sctp = 0;
5042	if (status & E1000_RXD_STAT_IPCS) {
5043		/* Did it pass? */
5044		if (!(errors & E1000_RXD_ERR_IPE)) {
5045			/* IP Checksum Good */
5046			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5047			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5048		} else
5049			mp->m_pkthdr.csum_flags = 0;
5050	}
5051
5052	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5053		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5054#if __FreeBSD_version >= 800000
5055		if (sctp) /* reassign */
5056			type = CSUM_SCTP_VALID;
5057#endif
5058		/* Did it pass? */
5059		if (!(errors & E1000_RXD_ERR_TCPE)) {
5060			mp->m_pkthdr.csum_flags |= type;
5061			if (sctp == 0)
5062				mp->m_pkthdr.csum_data = htons(0xffff);
5063		}
5064	}
5065	return;
5066}
5067
5068/*
5069 * This routine is run via an vlan
5070 * config EVENT
5071 */
5072static void
5073igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5074{
5075	struct adapter	*adapter = ifp->if_softc;
5076	u32		index, bit;
5077
5078	if (ifp->if_softc !=  arg)   /* Not our event */
5079		return;
5080
5081	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5082                return;
5083
5084	IGB_CORE_LOCK(adapter);
5085	index = (vtag >> 5) & 0x7F;
5086	bit = vtag & 0x1F;
5087	adapter->shadow_vfta[index] |= (1 << bit);
5088	++adapter->num_vlans;
5089	/* Change hw filter setting */
5090	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5091		igb_setup_vlan_hw_support(adapter);
5092	IGB_CORE_UNLOCK(adapter);
5093}
5094
5095/*
5096 * This routine is run via an vlan
5097 * unconfig EVENT
5098 */
5099static void
5100igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5101{
5102	struct adapter	*adapter = ifp->if_softc;
5103	u32		index, bit;
5104
5105	if (ifp->if_softc !=  arg)
5106		return;
5107
5108	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5109                return;
5110
5111	IGB_CORE_LOCK(adapter);
5112	index = (vtag >> 5) & 0x7F;
5113	bit = vtag & 0x1F;
5114	adapter->shadow_vfta[index] &= ~(1 << bit);
5115	--adapter->num_vlans;
5116	/* Change hw filter setting */
5117	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5118		igb_setup_vlan_hw_support(adapter);
5119	IGB_CORE_UNLOCK(adapter);
5120}
5121
5122static void
5123igb_setup_vlan_hw_support(struct adapter *adapter)
5124{
5125	struct e1000_hw *hw = &adapter->hw;
5126	struct ifnet	*ifp = adapter->ifp;
5127	u32             reg;
5128
5129	if (adapter->vf_ifp) {
5130		e1000_rlpml_set_vf(hw,
5131		    adapter->max_frame_size + VLAN_TAG_SIZE);
5132		return;
5133	}
5134
5135	reg = E1000_READ_REG(hw, E1000_CTRL);
5136	reg |= E1000_CTRL_VME;
5137	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5138
5139	/* Enable the Filter Table */
5140	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5141		reg = E1000_READ_REG(hw, E1000_RCTL);
5142		reg &= ~E1000_RCTL_CFIEN;
5143		reg |= E1000_RCTL_VFE;
5144		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5145	}
5146
5147	/* Update the frame size */
5148	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5149	    adapter->max_frame_size + VLAN_TAG_SIZE);
5150
5151	/* Don't bother with table if no vlans */
5152	if ((adapter->num_vlans == 0) ||
5153	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5154                return;
5155	/*
5156	** A soft reset zero's out the VFTA, so
5157	** we need to repopulate it now.
5158	*/
5159	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5160                if (adapter->shadow_vfta[i] != 0) {
5161			if (adapter->vf_ifp)
5162				e1000_vfta_set_vf(hw,
5163				    adapter->shadow_vfta[i], TRUE);
5164			else
5165				e1000_write_vfta(hw,
5166				    i, adapter->shadow_vfta[i]);
5167		}
5168}
5169
5170static void
5171igb_enable_intr(struct adapter *adapter)
5172{
5173	/* With RSS set up what to auto clear */
5174	if (adapter->msix_mem) {
5175		u32 mask = (adapter->que_mask | adapter->link_mask);
5176		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5177		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5178		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5179		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5180		    E1000_IMS_LSC);
5181	} else {
5182		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5183		    IMS_ENABLE_MASK);
5184	}
5185	E1000_WRITE_FLUSH(&adapter->hw);
5186
5187	return;
5188}
5189
5190static void
5191igb_disable_intr(struct adapter *adapter)
5192{
5193	if (adapter->msix_mem) {
5194		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5195		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5196	}
5197	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5198	E1000_WRITE_FLUSH(&adapter->hw);
5199	return;
5200}
5201
5202/*
5203 * Bit of a misnomer, what this really means is
5204 * to enable OS management of the system... aka
5205 * to disable special hardware management features
5206 */
5207static void
5208igb_init_manageability(struct adapter *adapter)
5209{
5210	if (adapter->has_manage) {
5211		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5212		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5213
5214		/* disable hardware interception of ARP */
5215		manc &= ~(E1000_MANC_ARP_EN);
5216
5217                /* enable receiving management packets to the host */
5218		manc |= E1000_MANC_EN_MNG2HOST;
5219		manc2h |= 1 << 5;  /* Mng Port 623 */
5220		manc2h |= 1 << 6;  /* Mng Port 664 */
5221		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5222		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5223	}
5224}
5225
5226/*
5227 * Give control back to hardware management
5228 * controller if there is one.
5229 */
5230static void
5231igb_release_manageability(struct adapter *adapter)
5232{
5233	if (adapter->has_manage) {
5234		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5235
5236		/* re-enable hardware interception of ARP */
5237		manc |= E1000_MANC_ARP_EN;
5238		manc &= ~E1000_MANC_EN_MNG2HOST;
5239
5240		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5241	}
5242}
5243
5244/*
5245 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5246 * For ASF and Pass Through versions of f/w this means that
5247 * the driver is loaded.
5248 *
5249 */
5250static void
5251igb_get_hw_control(struct adapter *adapter)
5252{
5253	u32 ctrl_ext;
5254
5255	if (adapter->vf_ifp)
5256		return;
5257
5258	/* Let firmware know the driver has taken over */
5259	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5260	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5261	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5262}
5263
5264/*
5265 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5266 * For ASF and Pass Through versions of f/w this means that the
5267 * driver is no longer loaded.
5268 *
5269 */
5270static void
5271igb_release_hw_control(struct adapter *adapter)
5272{
5273	u32 ctrl_ext;
5274
5275	if (adapter->vf_ifp)
5276		return;
5277
5278	/* Let firmware taken over control of h/w */
5279	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5280	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5281	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5282}
5283
5284static int
5285igb_is_valid_ether_addr(uint8_t *addr)
5286{
5287	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5288
5289	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5290		return (FALSE);
5291	}
5292
5293	return (TRUE);
5294}
5295
5296
5297/*
5298 * Enable PCI Wake On Lan capability
5299 */
5300static void
5301igb_enable_wakeup(device_t dev)
5302{
5303	u16     cap, status;
5304	u8      id;
5305
5306	/* First find the capabilities pointer*/
5307	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5308	/* Read the PM Capabilities */
5309	id = pci_read_config(dev, cap, 1);
5310	if (id != PCIY_PMG)     /* Something wrong */
5311		return;
5312	/* OK, we have the power capabilities, so
5313	   now get the status register */
5314	cap += PCIR_POWER_STATUS;
5315	status = pci_read_config(dev, cap, 2);
5316	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5317	pci_write_config(dev, cap, status, 2);
5318	return;
5319}
5320
5321static void
5322igb_led_func(void *arg, int onoff)
5323{
5324	struct adapter	*adapter = arg;
5325
5326	IGB_CORE_LOCK(adapter);
5327	if (onoff) {
5328		e1000_setup_led(&adapter->hw);
5329		e1000_led_on(&adapter->hw);
5330	} else {
5331		e1000_led_off(&adapter->hw);
5332		e1000_cleanup_led(&adapter->hw);
5333	}
5334	IGB_CORE_UNLOCK(adapter);
5335}
5336
5337/**********************************************************************
5338 *
5339 *  Update the board statistics counters.
5340 *
5341 **********************************************************************/
5342static void
5343igb_update_stats_counters(struct adapter *adapter)
5344{
5345	struct ifnet		*ifp;
5346        struct e1000_hw		*hw = &adapter->hw;
5347	struct e1000_hw_stats	*stats;
5348
5349	/*
5350	** The virtual function adapter has only a
5351	** small controlled set of stats, do only
5352	** those and return.
5353	*/
5354	if (adapter->vf_ifp) {
5355		igb_update_vf_stats_counters(adapter);
5356		return;
5357	}
5358
5359	stats = (struct e1000_hw_stats	*)adapter->stats;
5360
5361	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5362	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5363		stats->symerrs +=
5364		    E1000_READ_REG(hw,E1000_SYMERRS);
5365		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5366	}
5367
5368	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5369	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5370	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5371	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5372
5373	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5374	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5375	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5376	stats->dc += E1000_READ_REG(hw, E1000_DC);
5377	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5378	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5379	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5380	/*
5381	** For watchdog management we need to know if we have been
5382	** paused during the last interval, so capture that here.
5383	*/
5384        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5385        stats->xoffrxc += adapter->pause_frames;
5386	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5387	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5388	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5389	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5390	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5391	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5392	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5393	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5394	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5395	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5396	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5397	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5398
5399	/* For the 64-bit byte counters the low dword must be read first. */
5400	/* Both registers clear on the read of the high dword */
5401
5402	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5403	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5404	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5405	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5406
5407	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5408	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5409	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5410	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5411	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5412
5413	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5414	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5415
5416	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5417	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5418	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5419	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5420	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5421	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5422	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5423	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5424	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5425	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5426
5427	/* Interrupt Counts */
5428
5429	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5430	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5431	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5432	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5433	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5434	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5435	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5436	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5437	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5438
5439	/* Host to Card Statistics */
5440
5441	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5442	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5443	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5444	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5445	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5446	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5447	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5448	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5449	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5450	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5451	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5452	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5453	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5454	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5455
5456	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5457	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5458	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5459	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5460	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5461	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5462
5463	ifp = adapter->ifp;
5464	ifp->if_collisions = stats->colc;
5465
5466	/* Rx Errors */
5467	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5468	    stats->crcerrs + stats->algnerrc +
5469	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5470
5471	/* Tx Errors */
5472	ifp->if_oerrors = stats->ecol +
5473	    stats->latecol + adapter->watchdog_events;
5474
5475	/* Driver specific counters */
5476	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5477	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5478	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5479	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5480	adapter->packet_buf_alloc_tx =
5481	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5482	adapter->packet_buf_alloc_rx =
5483	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5484}
5485
5486
5487/**********************************************************************
5488 *
5489 *  Initialize the VF board statistics counters.
5490 *
5491 **********************************************************************/
5492static void
5493igb_vf_init_stats(struct adapter *adapter)
5494{
5495        struct e1000_hw *hw = &adapter->hw;
5496	struct e1000_vf_stats	*stats;
5497
5498	stats = (struct e1000_vf_stats	*)adapter->stats;
5499	if (stats == NULL)
5500		return;
5501        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5502        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5503        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5504        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5505        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5506}
5507
5508/**********************************************************************
5509 *
5510 *  Update the VF board statistics counters.
5511 *
5512 **********************************************************************/
5513static void
5514igb_update_vf_stats_counters(struct adapter *adapter)
5515{
5516	struct e1000_hw *hw = &adapter->hw;
5517	struct e1000_vf_stats	*stats;
5518
5519	if (adapter->link_speed == 0)
5520		return;
5521
5522	stats = (struct e1000_vf_stats	*)adapter->stats;
5523
5524	UPDATE_VF_REG(E1000_VFGPRC,
5525	    stats->last_gprc, stats->gprc);
5526	UPDATE_VF_REG(E1000_VFGORC,
5527	    stats->last_gorc, stats->gorc);
5528	UPDATE_VF_REG(E1000_VFGPTC,
5529	    stats->last_gptc, stats->gptc);
5530	UPDATE_VF_REG(E1000_VFGOTC,
5531	    stats->last_gotc, stats->gotc);
5532	UPDATE_VF_REG(E1000_VFMPRC,
5533	    stats->last_mprc, stats->mprc);
5534}
5535
5536/* Export a single 32-bit register via a read-only sysctl. */
5537static int
5538igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5539{
5540	struct adapter *adapter;
5541	u_int val;
5542
5543	adapter = oidp->oid_arg1;
5544	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5545	return (sysctl_handle_int(oidp, &val, 0, req));
5546}
5547
5548/*
5549**  Tuneable interrupt rate handler
5550*/
5551static int
5552igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5553{
5554	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5555	int			error;
5556	u32			reg, usec, rate;
5557
5558	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5559	usec = ((reg & 0x7FFC) >> 2);
5560	if (usec > 0)
5561		rate = 1000000 / usec;
5562	else
5563		rate = 0;
5564	error = sysctl_handle_int(oidp, &rate, 0, req);
5565	if (error || !req->newptr)
5566		return error;
5567	return 0;
5568}
5569
5570/*
5571 * Add sysctl variables, one per statistic, to the system.
5572 */
5573static void
5574igb_add_hw_stats(struct adapter *adapter)
5575{
5576	device_t dev = adapter->dev;
5577
5578	struct tx_ring *txr = adapter->tx_rings;
5579	struct rx_ring *rxr = adapter->rx_rings;
5580
5581	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5582	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5583	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5584	struct e1000_hw_stats *stats = adapter->stats;
5585
5586	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5587	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5588
5589#define QUEUE_NAME_LEN 32
5590	char namebuf[QUEUE_NAME_LEN];
5591
5592	/* Driver Statistics */
5593	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5594			CTLFLAG_RD, &adapter->link_irq, 0,
5595			"Link MSIX IRQ Handled");
5596	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5597			CTLFLAG_RD, &adapter->dropped_pkts,
5598			"Driver dropped packets");
5599	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5600			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5601			"Driver tx dma failure in xmit");
5602	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5603			CTLFLAG_RD, &adapter->rx_overruns,
5604			"RX overruns");
5605	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5606			CTLFLAG_RD, &adapter->watchdog_events,
5607			"Watchdog timeouts");
5608
5609	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5610			CTLFLAG_RD, &adapter->device_control,
5611			"Device Control Register");
5612	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5613			CTLFLAG_RD, &adapter->rx_control,
5614			"Receiver Control Register");
5615	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5616			CTLFLAG_RD, &adapter->int_mask,
5617			"Interrupt Mask");
5618	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5619			CTLFLAG_RD, &adapter->eint_mask,
5620			"Extended Interrupt Mask");
5621	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5622			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5623			"Transmit Buffer Packet Allocation");
5624	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5625			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5626			"Receive Buffer Packet Allocation");
5627	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5628			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5629			"Flow Control High Watermark");
5630	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5631			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5632			"Flow Control Low Watermark");
5633
5634	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5635		struct lro_ctrl *lro = &rxr->lro;
5636
5637		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5638		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5639					    CTLFLAG_RD, NULL, "Queue Name");
5640		queue_list = SYSCTL_CHILDREN(queue_node);
5641
5642		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5643				CTLFLAG_RD, &adapter->queues[i],
5644				sizeof(&adapter->queues[i]),
5645				igb_sysctl_interrupt_rate_handler,
5646				"IU", "Interrupt Rate");
5647
5648		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5649				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5650				igb_sysctl_reg_handler, "IU",
5651 				"Transmit Descriptor Head");
5652		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5653				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5654				igb_sysctl_reg_handler, "IU",
5655 				"Transmit Descriptor Tail");
5656		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5657				CTLFLAG_RD, &txr->no_desc_avail,
5658				"Queue No Descriptor Available");
5659		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5660				CTLFLAG_RD, &txr->total_packets,
5661				"Queue Packets Transmitted");
5662
5663		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5664				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5665				igb_sysctl_reg_handler, "IU",
5666				"Receive Descriptor Head");
5667		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5668				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5669				igb_sysctl_reg_handler, "IU",
5670				"Receive Descriptor Tail");
5671		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5672				CTLFLAG_RD, &rxr->rx_packets,
5673				"Queue Packets Received");
5674		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5675				CTLFLAG_RD, &rxr->rx_bytes,
5676				"Queue Bytes Received");
5677		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5678				CTLFLAG_RD, &lro->lro_queued, 0,
5679				"LRO Queued");
5680		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5681				CTLFLAG_RD, &lro->lro_flushed, 0,
5682				"LRO Flushed");
5683	}
5684
5685	/* MAC stats get their own sub node */
5686
5687	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5688				    CTLFLAG_RD, NULL, "MAC Statistics");
5689	stat_list = SYSCTL_CHILDREN(stat_node);
5690
5691	/*
5692	** VF adapter has a very limited set of stats
5693	** since its not managing the metal, so to speak.
5694	*/
5695	if (adapter->vf_ifp) {
5696	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5697			CTLFLAG_RD, &stats->gprc,
5698			"Good Packets Received");
5699	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5700			CTLFLAG_RD, &stats->gptc,
5701			"Good Packets Transmitted");
5702 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5703 			CTLFLAG_RD, &stats->gorc,
5704 			"Good Octets Received");
5705 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5706 			CTLFLAG_RD, &stats->gotc,
5707 			"Good Octets Transmitted");
5708	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5709			CTLFLAG_RD, &stats->mprc,
5710			"Multicast Packets Received");
5711		return;
5712	}
5713
5714	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5715			CTLFLAG_RD, &stats->ecol,
5716			"Excessive collisions");
5717	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5718			CTLFLAG_RD, &stats->scc,
5719			"Single collisions");
5720	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5721			CTLFLAG_RD, &stats->mcc,
5722			"Multiple collisions");
5723	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5724			CTLFLAG_RD, &stats->latecol,
5725			"Late collisions");
5726	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5727			CTLFLAG_RD, &stats->colc,
5728			"Collision Count");
5729	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5730			CTLFLAG_RD, &stats->symerrs,
5731			"Symbol Errors");
5732	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5733			CTLFLAG_RD, &stats->sec,
5734			"Sequence Errors");
5735	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5736			CTLFLAG_RD, &stats->dc,
5737			"Defer Count");
5738	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5739			CTLFLAG_RD, &stats->mpc,
5740			"Missed Packets");
5741	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5742			CTLFLAG_RD, &stats->rnbc,
5743			"Receive No Buffers");
5744	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5745			CTLFLAG_RD, &stats->ruc,
5746			"Receive Undersize");
5747	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5748			CTLFLAG_RD, &stats->rfc,
5749			"Fragmented Packets Received ");
5750	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5751			CTLFLAG_RD, &stats->roc,
5752			"Oversized Packets Received");
5753	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5754			CTLFLAG_RD, &stats->rjc,
5755			"Recevied Jabber");
5756	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5757			CTLFLAG_RD, &stats->rxerrc,
5758			"Receive Errors");
5759	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5760			CTLFLAG_RD, &stats->crcerrs,
5761			"CRC errors");
5762	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5763			CTLFLAG_RD, &stats->algnerrc,
5764			"Alignment Errors");
5765	/* On 82575 these are collision counts */
5766	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5767			CTLFLAG_RD, &stats->cexterr,
5768			"Collision/Carrier extension errors");
5769	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5770			CTLFLAG_RD, &stats->xonrxc,
5771			"XON Received");
5772	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5773			CTLFLAG_RD, &stats->xontxc,
5774			"XON Transmitted");
5775	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5776			CTLFLAG_RD, &stats->xoffrxc,
5777			"XOFF Received");
5778	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5779			CTLFLAG_RD, &stats->xofftxc,
5780			"XOFF Transmitted");
5781	/* Packet Reception Stats */
5782	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5783			CTLFLAG_RD, &stats->tpr,
5784			"Total Packets Received ");
5785	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5786			CTLFLAG_RD, &stats->gprc,
5787			"Good Packets Received");
5788	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5789			CTLFLAG_RD, &stats->bprc,
5790			"Broadcast Packets Received");
5791	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5792			CTLFLAG_RD, &stats->mprc,
5793			"Multicast Packets Received");
5794	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5795			CTLFLAG_RD, &stats->prc64,
5796			"64 byte frames received ");
5797	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5798			CTLFLAG_RD, &stats->prc127,
5799			"65-127 byte frames received");
5800	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5801			CTLFLAG_RD, &stats->prc255,
5802			"128-255 byte frames received");
5803	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5804			CTLFLAG_RD, &stats->prc511,
5805			"256-511 byte frames received");
5806	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5807			CTLFLAG_RD, &stats->prc1023,
5808			"512-1023 byte frames received");
5809	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5810			CTLFLAG_RD, &stats->prc1522,
5811			"1023-1522 byte frames received");
5812 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5813 			CTLFLAG_RD, &stats->gorc,
5814 			"Good Octets Received");
5815
5816	/* Packet Transmission Stats */
5817 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5818 			CTLFLAG_RD, &stats->gotc,
5819 			"Good Octets Transmitted");
5820	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5821			CTLFLAG_RD, &stats->tpt,
5822			"Total Packets Transmitted");
5823	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5824			CTLFLAG_RD, &stats->gptc,
5825			"Good Packets Transmitted");
5826	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5827			CTLFLAG_RD, &stats->bptc,
5828			"Broadcast Packets Transmitted");
5829	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5830			CTLFLAG_RD, &stats->mptc,
5831			"Multicast Packets Transmitted");
5832	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5833			CTLFLAG_RD, &stats->ptc64,
5834			"64 byte frames transmitted ");
5835	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5836			CTLFLAG_RD, &stats->ptc127,
5837			"65-127 byte frames transmitted");
5838	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5839			CTLFLAG_RD, &stats->ptc255,
5840			"128-255 byte frames transmitted");
5841	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5842			CTLFLAG_RD, &stats->ptc511,
5843			"256-511 byte frames transmitted");
5844	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5845			CTLFLAG_RD, &stats->ptc1023,
5846			"512-1023 byte frames transmitted");
5847	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5848			CTLFLAG_RD, &stats->ptc1522,
5849			"1024-1522 byte frames transmitted");
5850	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5851			CTLFLAG_RD, &stats->tsctc,
5852			"TSO Contexts Transmitted");
5853	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5854			CTLFLAG_RD, &stats->tsctfc,
5855			"TSO Contexts Failed");
5856
5857
5858	/* Interrupt Stats */
5859
5860	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5861				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5862	int_list = SYSCTL_CHILDREN(int_node);
5863
5864	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5865			CTLFLAG_RD, &stats->iac,
5866			"Interrupt Assertion Count");
5867
5868	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5869			CTLFLAG_RD, &stats->icrxptc,
5870			"Interrupt Cause Rx Pkt Timer Expire Count");
5871
5872	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5873			CTLFLAG_RD, &stats->icrxatc,
5874			"Interrupt Cause Rx Abs Timer Expire Count");
5875
5876	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5877			CTLFLAG_RD, &stats->ictxptc,
5878			"Interrupt Cause Tx Pkt Timer Expire Count");
5879
5880	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5881			CTLFLAG_RD, &stats->ictxatc,
5882			"Interrupt Cause Tx Abs Timer Expire Count");
5883
5884	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5885			CTLFLAG_RD, &stats->ictxqec,
5886			"Interrupt Cause Tx Queue Empty Count");
5887
5888	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5889			CTLFLAG_RD, &stats->ictxqmtc,
5890			"Interrupt Cause Tx Queue Min Thresh Count");
5891
5892	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5893			CTLFLAG_RD, &stats->icrxdmtc,
5894			"Interrupt Cause Rx Desc Min Thresh Count");
5895
5896	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5897			CTLFLAG_RD, &stats->icrxoc,
5898			"Interrupt Cause Receiver Overrun Count");
5899
5900	/* Host to Card Stats */
5901
5902	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5903				    CTLFLAG_RD, NULL,
5904				    "Host to Card Statistics");
5905
5906	host_list = SYSCTL_CHILDREN(host_node);
5907
5908	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5909			CTLFLAG_RD, &stats->cbtmpc,
5910			"Circuit Breaker Tx Packet Count");
5911
5912	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5913			CTLFLAG_RD, &stats->htdpmc,
5914			"Host Transmit Discarded Packets");
5915
5916	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5917			CTLFLAG_RD, &stats->rpthc,
5918			"Rx Packets To Host");
5919
5920	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5921			CTLFLAG_RD, &stats->cbrmpc,
5922			"Circuit Breaker Rx Packet Count");
5923
5924	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5925			CTLFLAG_RD, &stats->cbrdpc,
5926			"Circuit Breaker Rx Dropped Count");
5927
5928	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5929			CTLFLAG_RD, &stats->hgptc,
5930			"Host Good Packets Tx Count");
5931
5932	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5933			CTLFLAG_RD, &stats->htcbdpc,
5934			"Host Tx Circuit Breaker Dropped Count");
5935
5936	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5937			CTLFLAG_RD, &stats->hgorc,
5938			"Host Good Octets Received Count");
5939
5940	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5941			CTLFLAG_RD, &stats->hgotc,
5942			"Host Good Octets Transmit Count");
5943
5944	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5945			CTLFLAG_RD, &stats->lenerrs,
5946			"Length Errors");
5947
5948	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5949			CTLFLAG_RD, &stats->scvpc,
5950			"SerDes/SGMII Code Violation Pkt Count");
5951
5952	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5953			CTLFLAG_RD, &stats->hrmpc,
5954			"Header Redirection Missed Packet Count");
5955}
5956
5957
5958/**********************************************************************
5959 *
5960 *  This routine provides a way to dump out the adapter eeprom,
5961 *  often a useful debug/service tool. This only dumps the first
5962 *  32 words, stuff that matters is in that extent.
5963 *
5964 **********************************************************************/
5965static int
5966igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5967{
5968	struct adapter *adapter;
5969	int error;
5970	int result;
5971
5972	result = -1;
5973	error = sysctl_handle_int(oidp, &result, 0, req);
5974
5975	if (error || !req->newptr)
5976		return (error);
5977
5978	/*
5979	 * This value will cause a hex dump of the
5980	 * first 32 16-bit words of the EEPROM to
5981	 * the screen.
5982	 */
5983	if (result == 1) {
5984		adapter = (struct adapter *)arg1;
5985		igb_print_nvm_info(adapter);
5986        }
5987
5988	return (error);
5989}
5990
5991static void
5992igb_print_nvm_info(struct adapter *adapter)
5993{
5994	u16	eeprom_data;
5995	int	i, j, row = 0;
5996
5997	/* Its a bit crude, but it gets the job done */
5998	printf("\nInterface EEPROM Dump:\n");
5999	printf("Offset\n0x0000  ");
6000	for (i = 0, j = 0; i < 32; i++, j++) {
6001		if (j == 8) { /* Make the offset block */
6002			j = 0; ++row;
6003			printf("\n0x00%x0  ",row);
6004		}
6005		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6006		printf("%04x ", eeprom_data);
6007	}
6008	printf("\n");
6009}
6010
6011static void
6012igb_set_sysctl_value(struct adapter *adapter, const char *name,
6013	const char *description, int *limit, int value)
6014{
6015	*limit = value;
6016	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6017	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6018	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
6019}
6020
6021/*
6022** Set flow control using sysctl:
6023** Flow control values:
6024** 	0 - off
6025**	1 - rx pause
6026**	2 - tx pause
6027**	3 - full
6028*/
6029static int
6030igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6031{
6032	int		error;
6033	static int	input = 3; /* default is full */
6034	struct adapter	*adapter = (struct adapter *) arg1;
6035
6036	error = sysctl_handle_int(oidp, &input, 0, req);
6037
6038	if ((error) || (req->newptr == NULL))
6039		return (error);
6040
6041	switch (input) {
6042		case e1000_fc_rx_pause:
6043		case e1000_fc_tx_pause:
6044		case e1000_fc_full:
6045		case e1000_fc_none:
6046			adapter->hw.fc.requested_mode = input;
6047			adapter->fc = input;
6048			break;
6049		default:
6050			/* Do nothing */
6051			return (error);
6052	}
6053
6054	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6055	e1000_force_mac_fc(&adapter->hw);
6056	return (error);
6057}
6058
6059/*
6060** Manage DMA Coalesce:
6061** Control values:
6062** 	0/1 - off/on
6063**	Legal timer values are:
6064**	250,500,1000-10000 in thousands
6065*/
6066static int
6067igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6068{
6069	struct adapter *adapter = (struct adapter *) arg1;
6070	int		error;
6071
6072	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6073
6074	if ((error) || (req->newptr == NULL))
6075		return (error);
6076
6077	switch (adapter->dmac) {
6078		case 0:
6079			/*Disabling */
6080			break;
6081		case 1: /* Just enable and use default */
6082			adapter->dmac = 1000;
6083			break;
6084		case 250:
6085		case 500:
6086		case 1000:
6087		case 2000:
6088		case 3000:
6089		case 4000:
6090		case 5000:
6091		case 6000:
6092		case 7000:
6093		case 8000:
6094		case 9000:
6095		case 10000:
6096			/* Legal values - allow */
6097			break;
6098		default:
6099			/* Do nothing, illegal value */
6100			adapter->dmac = 0;
6101			return (EINVAL);
6102	}
6103	/* Reinit the interface */
6104	igb_init(adapter);
6105	return (error);
6106}
6107
6108/*
6109** Manage Energy Efficient Ethernet:
6110** Control values:
6111**     0/1 - enabled/disabled
6112*/
6113static int
6114igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6115{
6116	struct adapter	*adapter = (struct adapter *) arg1;
6117	int		error, value;
6118
6119	value = adapter->hw.dev_spec._82575.eee_disable;
6120	error = sysctl_handle_int(oidp, &value, 0, req);
6121	if (error || req->newptr == NULL)
6122		return (error);
6123	IGB_CORE_LOCK(adapter);
6124	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6125	igb_init_locked(adapter);
6126	IGB_CORE_UNLOCK(adapter);
6127	return (0);
6128}
6129