1/******************************************************************************
2
3  Copyright (c) 2001-2015, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/10/sys/dev/e1000/if_igb.c 342790 2019-01-05 19:35:10Z marius $*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38
39#ifdef HAVE_KERNEL_OPTION_HEADERS
40#include "opt_device_polling.h"
41#include "opt_altq.h"
42#endif
43
44#include "if_igb.h"
45
46/*********************************************************************
47 *  Driver version:
48 *********************************************************************/
49char igb_driver_version[] = "2.5.3-k";
50
51
52/*********************************************************************
53 *  PCI Device ID Table
54 *
55 *  Used by probe to select devices to load on
56 *  Last field stores an index into e1000_strings
57 *  Last entry must be all 0s
58 *
59 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
60 *********************************************************************/
61
62static igb_vendor_info_t igb_vendor_info_array[] =
63{
64	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
65	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
66	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
67	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
68	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
69	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
70	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER,	0, 0, 0},
71	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
72	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
73	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
74	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
75	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
76	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
77	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER,	0, 0, 0},
78	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
79	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII,	0, 0, 0},
80	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
81	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
82	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
83	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
84	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
85	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
86	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER,	0, 0, 0},
87	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,	0, 0, 0},
88	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES,	0, 0, 0},
89	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,	0, 0, 0},
90	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
91	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER,	0, 0, 0},
92	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
93	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
94	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
95	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
96	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,	0, 0, 0},
97	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES,	0, 0, 0},
98	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,	0, 0, 0},
99	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER,	0, 0, 0},
100	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
101	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
102	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,	0, 0, 0},
103	/* required last entry */
104	{0, 0, 0, 0, 0}
105};
106
107/*********************************************************************
108 *  Table of branding strings for all supported NICs.
109 *********************************************************************/
110
111static char *igb_strings[] = {
112	"Intel(R) PRO/1000 Network Connection"
113};
114
115/*********************************************************************
116 *  Function prototypes
117 *********************************************************************/
118static int	igb_probe(device_t);
119static int	igb_attach(device_t);
120static int	igb_detach(device_t);
121static int	igb_shutdown(device_t);
122static int	igb_suspend(device_t);
123static int	igb_resume(device_t);
124#ifndef IGB_LEGACY_TX
125static int	igb_mq_start(struct ifnet *, struct mbuf *);
126static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
127static void	igb_qflush(struct ifnet *);
128static void	igb_deferred_mq_start(void *, int);
129#else
130static void	igb_start(struct ifnet *);
131static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
132#endif
133static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
134static void	igb_init(void *);
135static void	igb_init_locked(struct adapter *);
136static void	igb_stop(void *);
137static void	igb_media_status(struct ifnet *, struct ifmediareq *);
138static int	igb_media_change(struct ifnet *);
139static void	igb_identify_hardware(struct adapter *);
140static int	igb_allocate_pci_resources(struct adapter *);
141static int	igb_allocate_msix(struct adapter *);
142static int	igb_allocate_legacy(struct adapter *);
143static int	igb_setup_msix(struct adapter *);
144static void	igb_free_pci_resources(struct adapter *);
145static void	igb_local_timer(void *);
146static void	igb_reset(struct adapter *);
147static int	igb_setup_interface(device_t, struct adapter *);
148static int	igb_allocate_queues(struct adapter *);
149static void	igb_configure_queues(struct adapter *);
150
151static int	igb_allocate_transmit_buffers(struct tx_ring *);
152static void	igb_setup_transmit_structures(struct adapter *);
153static void	igb_setup_transmit_ring(struct tx_ring *);
154static void	igb_initialize_transmit_units(struct adapter *);
155static void	igb_free_transmit_structures(struct adapter *);
156static void	igb_free_transmit_buffers(struct tx_ring *);
157
158static int	igb_allocate_receive_buffers(struct rx_ring *);
159static int	igb_setup_receive_structures(struct adapter *);
160static int	igb_setup_receive_ring(struct rx_ring *);
161static void	igb_initialize_receive_units(struct adapter *);
162static void	igb_free_receive_structures(struct adapter *);
163static void	igb_free_receive_buffers(struct rx_ring *);
164static void	igb_free_receive_ring(struct rx_ring *);
165
166static void	igb_enable_intr(struct adapter *);
167static void	igb_disable_intr(struct adapter *);
168static void	igb_update_stats_counters(struct adapter *);
169static bool	igb_txeof(struct tx_ring *);
170
171static __inline	void igb_rx_discard(struct rx_ring *, int);
172static __inline void igb_rx_input(struct rx_ring *,
173		    struct ifnet *, struct mbuf *, u32);
174
175static bool	igb_rxeof(struct igb_queue *, int, int *);
176static void	igb_rx_checksum(u32, struct mbuf *, u32);
177static int	igb_tx_ctx_setup(struct tx_ring *,
178		    struct mbuf *, u32 *, u32 *);
179static int	igb_tso_setup(struct tx_ring *,
180		    struct mbuf *, u32 *, u32 *);
181static void	igb_set_promisc(struct adapter *);
182static void	igb_disable_promisc(struct adapter *);
183static void	igb_set_multi(struct adapter *);
184static void	igb_update_link_status(struct adapter *);
185static void	igb_refresh_mbufs(struct rx_ring *, int);
186
187static void	igb_register_vlan(void *, struct ifnet *, u16);
188static void	igb_unregister_vlan(void *, struct ifnet *, u16);
189static void	igb_setup_vlan_hw_support(struct adapter *);
190
191static int	igb_xmit(struct tx_ring *, struct mbuf **);
192static int	igb_dma_malloc(struct adapter *, bus_size_t,
193		    struct igb_dma_alloc *, int);
194static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
195static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
196static void	igb_print_nvm_info(struct adapter *);
197static int 	igb_is_valid_ether_addr(u8 *);
198static void     igb_add_hw_stats(struct adapter *);
199
200static void	igb_vf_init_stats(struct adapter *);
201static void	igb_update_vf_stats_counters(struct adapter *);
202
203/* Management and WOL Support */
204static void	igb_init_manageability(struct adapter *);
205static void	igb_release_manageability(struct adapter *);
206static void     igb_get_hw_control(struct adapter *);
207static void     igb_release_hw_control(struct adapter *);
208static void     igb_enable_wakeup(device_t);
209static void     igb_led_func(void *, int);
210
211static int	igb_irq_fast(void *);
212static void	igb_msix_que(void *);
213static void	igb_msix_link(void *);
214static void	igb_handle_que(void *context, int pending);
215static void	igb_handle_link(void *context, int pending);
216static void	igb_handle_link_locked(struct adapter *);
217
218static void	igb_set_sysctl_value(struct adapter *, const char *,
219		    const char *, int *, int);
220static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
221static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
222static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
223
224#ifdef DEVICE_POLLING
225static poll_handler_t igb_poll;
226#endif /* POLLING */
227
228/*********************************************************************
229 *  FreeBSD Device Interface Entry Points
230 *********************************************************************/
231
232static device_method_t igb_methods[] = {
233	/* Device interface */
234	DEVMETHOD(device_probe, igb_probe),
235	DEVMETHOD(device_attach, igb_attach),
236	DEVMETHOD(device_detach, igb_detach),
237	DEVMETHOD(device_shutdown, igb_shutdown),
238	DEVMETHOD(device_suspend, igb_suspend),
239	DEVMETHOD(device_resume, igb_resume),
240	DEVMETHOD_END
241};
242
243static driver_t igb_driver = {
244	"igb", igb_methods, sizeof(struct adapter),
245};
246
247static devclass_t igb_devclass;
248DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
249MODULE_DEPEND(igb, pci, 1, 1, 1);
250MODULE_DEPEND(igb, ether, 1, 1, 1);
251
252/*********************************************************************
253 *  Tunable default values.
254 *********************************************************************/
255
256static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
257
258/* Descriptor defaults */
259static int igb_rxd = IGB_DEFAULT_RXD;
260static int igb_txd = IGB_DEFAULT_TXD;
261TUNABLE_INT("hw.igb.rxd", &igb_rxd);
262TUNABLE_INT("hw.igb.txd", &igb_txd);
263SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
264    "Number of receive descriptors per queue");
265SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
266    "Number of transmit descriptors per queue");
267
268/*
269** AIM: Adaptive Interrupt Moderation
270** which means that the interrupt rate
271** is varied over time based on the
272** traffic for that interrupt vector
273*/
274static int igb_enable_aim = TRUE;
275TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
276SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
277    "Enable adaptive interrupt moderation");
278
279/*
280 * MSIX should be the default for best performance,
281 * but this allows it to be forced off for testing.
282 */
283static int igb_enable_msix = 1;
284TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
285SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
286    "Enable MSI-X interrupts");
287
288/*
289** Tuneable Interrupt rate
290*/
291static int igb_max_interrupt_rate = 8000;
292TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
293SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296#ifndef IGB_LEGACY_TX
297/*
298** Tuneable number of buffers in the buf-ring (drbr_xxx)
299*/
300static int igb_buf_ring_size = IGB_BR_SIZE;
301TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
302SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
303    &igb_buf_ring_size, 0, "Size of the bufring");
304#endif
305
306/*
307** Header split causes the packet header to
308** be dma'd to a seperate mbuf from the payload.
309** this can have memory alignment benefits. But
310** another plus is that small packets often fit
311** into the header and thus use no cluster. Its
312** a very workload dependent type feature.
313*/
314static int igb_header_split = FALSE;
315TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
316SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
317    "Enable receive mbuf header split");
318
319/*
320** This will autoconfigure based on the
321** number of CPUs and max supported
322** MSIX messages if left at 0.
323*/
324static int igb_num_queues = 0;
325TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
326SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
327    "Number of queues to configure, 0 indicates autoconfigure");
328
329/*
330** Global variable to store last used CPU when binding queues
331** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
332** queue is bound to a cpu.
333*/
334static int igb_last_bind_cpu = -1;
335
336/* How many packets rxeof tries to clean at a time */
337static int igb_rx_process_limit = 100;
338TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
339SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
340    &igb_rx_process_limit, 0,
341    "Maximum number of received packets to process at a time, -1 means unlimited");
342
343/* How many packets txeof tries to clean at a time */
344static int igb_tx_process_limit = -1;
345TUNABLE_INT("hw.igb.tx_process_limit", &igb_tx_process_limit);
346SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
347    &igb_tx_process_limit, 0,
348    "Maximum number of sent packets to process at a time, -1 means unlimited");
349
350#ifdef DEV_NETMAP	/* see ixgbe.c for details */
351#include <dev/netmap/if_igb_netmap.h>
352#endif /* DEV_NETMAP */
353/*********************************************************************
354 *  Device identification routine
355 *
356 *  igb_probe determines if the driver should be loaded on
357 *  adapter based on PCI vendor/device id of the adapter.
358 *
359 *  return BUS_PROBE_DEFAULT on success, positive on failure
360 *********************************************************************/
361
362static int
363igb_probe(device_t dev)
364{
365	char		adapter_name[256];
366	uint16_t	pci_vendor_id = 0;
367	uint16_t	pci_device_id = 0;
368	uint16_t	pci_subvendor_id = 0;
369	uint16_t	pci_subdevice_id = 0;
370	igb_vendor_info_t *ent;
371
372	INIT_DEBUGOUT("igb_probe: begin");
373
374	pci_vendor_id = pci_get_vendor(dev);
375	if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
376		return (ENXIO);
377
378	pci_device_id = pci_get_device(dev);
379	pci_subvendor_id = pci_get_subvendor(dev);
380	pci_subdevice_id = pci_get_subdevice(dev);
381
382	ent = igb_vendor_info_array;
383	while (ent->vendor_id != 0) {
384		if ((pci_vendor_id == ent->vendor_id) &&
385		    (pci_device_id == ent->device_id) &&
386
387		    ((pci_subvendor_id == ent->subvendor_id) ||
388		    (ent->subvendor_id == 0)) &&
389
390		    ((pci_subdevice_id == ent->subdevice_id) ||
391		    (ent->subdevice_id == 0))) {
392			sprintf(adapter_name, "%s, Version - %s",
393				igb_strings[ent->index],
394				igb_driver_version);
395			device_set_desc_copy(dev, adapter_name);
396			return (BUS_PROBE_DEFAULT);
397		}
398		ent++;
399	}
400	return (ENXIO);
401}
402
403/*********************************************************************
404 *  Device initialization routine
405 *
406 *  The attach entry point is called when the driver is being loaded.
407 *  This routine identifies the type of hardware, allocates all resources
408 *  and initializes the hardware.
409 *
410 *  return 0 on success, positive on failure
411 *********************************************************************/
412
413static int
414igb_attach(device_t dev)
415{
416	struct adapter	*adapter;
417	int		error = 0;
418	u16		eeprom_data;
419
420	INIT_DEBUGOUT("igb_attach: begin");
421
422	if (resource_disabled("igb", device_get_unit(dev))) {
423		device_printf(dev, "Disabled by device hint\n");
424		return (ENXIO);
425	}
426
427	adapter = device_get_softc(dev);
428	adapter->dev = adapter->osdep.dev = dev;
429	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
430
431	/* SYSCTLs */
432	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
433	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
434	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
435	    igb_sysctl_nvm_info, "I", "NVM Information");
436
437	igb_set_sysctl_value(adapter, "enable_aim",
438	    "Interrupt Moderation", &adapter->enable_aim,
439	    igb_enable_aim);
440
441	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
442	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
443	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
444	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
445
446	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
447
448	/* Determine hardware and mac info */
449	igb_identify_hardware(adapter);
450
451	/* Setup PCI resources */
452	if (igb_allocate_pci_resources(adapter)) {
453		device_printf(dev, "Allocation of PCI resources failed\n");
454		error = ENXIO;
455		goto err_pci;
456	}
457
458	/* Do Shared Code initialization */
459	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
460		device_printf(dev, "Setup of Shared code failed\n");
461		error = ENXIO;
462		goto err_pci;
463	}
464
465	e1000_get_bus_info(&adapter->hw);
466
467	/* Sysctls for limiting the amount of work done in the taskqueues */
468	igb_set_sysctl_value(adapter, "rx_processing_limit",
469	    "max number of rx packets to process",
470	    &adapter->rx_process_limit, igb_rx_process_limit);
471
472	igb_set_sysctl_value(adapter, "tx_processing_limit",
473	    "max number of tx packets to process",
474	    &adapter->tx_process_limit, igb_tx_process_limit);
475
476	/*
477	 * Validate number of transmit and receive descriptors. It
478	 * must not exceed hardware maximum, and must be multiple
479	 * of E1000_DBA_ALIGN.
480	 */
481	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
482	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
483		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
484		    IGB_DEFAULT_TXD, igb_txd);
485		adapter->num_tx_desc = IGB_DEFAULT_TXD;
486	} else
487		adapter->num_tx_desc = igb_txd;
488	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
489	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
490		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
491		    IGB_DEFAULT_RXD, igb_rxd);
492		adapter->num_rx_desc = IGB_DEFAULT_RXD;
493	} else
494		adapter->num_rx_desc = igb_rxd;
495
496	adapter->hw.mac.autoneg = DO_AUTO_NEG;
497	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
498	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
499
500	/* Copper options */
501	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
502		adapter->hw.phy.mdix = AUTO_ALL_MODES;
503		adapter->hw.phy.disable_polarity_correction = FALSE;
504		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
505	}
506
507	/*
508	 * Set the frame limits assuming
509	 * standard ethernet sized frames.
510	 */
511	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
512
513	/*
514	** Allocate and Setup Queues
515	*/
516	if (igb_allocate_queues(adapter)) {
517		error = ENOMEM;
518		goto err_pci;
519	}
520
521	/* Allocate the appropriate stats memory */
522	if (adapter->vf_ifp) {
523		adapter->stats =
524		    (struct e1000_vf_stats *)malloc(sizeof \
525		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526		igb_vf_init_stats(adapter);
527	} else
528		adapter->stats =
529		    (struct e1000_hw_stats *)malloc(sizeof \
530		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
531	if (adapter->stats == NULL) {
532		device_printf(dev, "Can not allocate stats memory\n");
533		error = ENOMEM;
534		goto err_late;
535	}
536
537	/* Allocate multicast array memory. */
538	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
539	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
540	if (adapter->mta == NULL) {
541		device_printf(dev, "Can not allocate multicast setup array\n");
542		error = ENOMEM;
543		goto err_late;
544	}
545
546	/* Some adapter-specific advanced features */
547	if (adapter->hw.mac.type >= e1000_i350) {
548		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
549		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
550		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
551		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
552		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
553		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
554		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
555		    adapter, 0, igb_sysctl_eee, "I",
556		    "Disable Energy Efficient Ethernet");
557		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
558			if (adapter->hw.mac.type == e1000_i354)
559				e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
560			else
561				e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
562		}
563	}
564
565	/*
566	** Start from a known state, this is
567	** important in reading the nvm and
568	** mac from that.
569	*/
570	e1000_reset_hw(&adapter->hw);
571
572	/* Make sure we have a good EEPROM before we read from it */
573	if (((adapter->hw.mac.type != e1000_i210) &&
574	    (adapter->hw.mac.type != e1000_i211)) &&
575	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
576		/*
577		** Some PCI-E parts fail the first check due to
578		** the link being in sleep state, call it again,
579		** if it fails a second time its a real issue.
580		*/
581		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
582			device_printf(dev,
583			    "The EEPROM Checksum Is Not Valid\n");
584			error = EIO;
585			goto err_late;
586		}
587	}
588
589	/*
590	** Copy the permanent MAC address out of the EEPROM
591	*/
592	if (e1000_read_mac_addr(&adapter->hw) < 0) {
593		device_printf(dev, "EEPROM read error while reading MAC"
594		    " address\n");
595		error = EIO;
596		goto err_late;
597	}
598	/* Check its sanity */
599	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
600		device_printf(dev, "Invalid MAC address\n");
601		error = EIO;
602		goto err_late;
603	}
604
605	/* Setup OS specific network interface */
606	if (igb_setup_interface(dev, adapter) != 0)
607		goto err_late;
608
609	/* Now get a good starting state */
610	igb_reset(adapter);
611
612	/* Initialize statistics */
613	igb_update_stats_counters(adapter);
614
615	adapter->hw.mac.get_link_status = 1;
616	igb_update_link_status(adapter);
617
618	/* Indicate SOL/IDER usage */
619	if (e1000_check_reset_block(&adapter->hw))
620		device_printf(dev,
621		    "PHY reset is blocked due to SOL/IDER session.\n");
622
623	/* Determine if we have to control management hardware */
624	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
625
626	/*
627	 * Setup Wake-on-Lan
628	 */
629	/* APME bit in EEPROM is mapped to WUC.APME */
630	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
631	if (eeprom_data)
632		adapter->wol = E1000_WUFC_MAG;
633
634	/* Register for VLAN events */
635	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
636	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
637	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
638	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
639
640	igb_add_hw_stats(adapter);
641
642	/* Tell the stack that the interface is not active */
643	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
644	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
645
646	adapter->led_dev = led_create(igb_led_func, adapter,
647	    device_get_nameunit(dev));
648
649	/*
650	** Configure Interrupts
651	*/
652	if ((adapter->msix > 1) && (igb_enable_msix))
653		error = igb_allocate_msix(adapter);
654	else /* MSI or Legacy */
655		error = igb_allocate_legacy(adapter);
656	if (error)
657		goto err_late;
658
659#ifdef DEV_NETMAP
660	igb_netmap_attach(adapter);
661#endif /* DEV_NETMAP */
662	INIT_DEBUGOUT("igb_attach: end");
663
664	return (0);
665
666err_late:
667	if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */
668		return(error);
669	igb_free_transmit_structures(adapter);
670	igb_free_receive_structures(adapter);
671	igb_release_hw_control(adapter);
672err_pci:
673	igb_free_pci_resources(adapter);
674	if (adapter->ifp != NULL)
675		if_free(adapter->ifp);
676	free(adapter->mta, M_DEVBUF);
677	IGB_CORE_LOCK_DESTROY(adapter);
678
679	return (error);
680}
681
682/*********************************************************************
683 *  Device removal routine
684 *
685 *  The detach entry point is called when the driver is being removed.
686 *  This routine stops the adapter and deallocates all the resources
687 *  that were allocated for driver operation.
688 *
689 *  return 0 on success, positive on failure
690 *********************************************************************/
691
692static int
693igb_detach(device_t dev)
694{
695	struct adapter	*adapter = device_get_softc(dev);
696	struct ifnet	*ifp = adapter->ifp;
697
698	INIT_DEBUGOUT("igb_detach: begin");
699
700	/* Make sure VLANS are not using driver */
701	if (adapter->ifp->if_vlantrunk != NULL) {
702		device_printf(dev,"Vlan in use, detach first\n");
703		return (EBUSY);
704	}
705
706	ether_ifdetach(adapter->ifp);
707
708	if (adapter->led_dev != NULL)
709		led_destroy(adapter->led_dev);
710
711#ifdef DEVICE_POLLING
712	if (ifp->if_capenable & IFCAP_POLLING)
713		ether_poll_deregister(ifp);
714#endif
715
716	IGB_CORE_LOCK(adapter);
717	adapter->in_detach = 1;
718	igb_stop(adapter);
719	IGB_CORE_UNLOCK(adapter);
720
721	e1000_phy_hw_reset(&adapter->hw);
722
723	/* Give control back to firmware */
724	igb_release_manageability(adapter);
725	igb_release_hw_control(adapter);
726
727	/* Unregister VLAN events */
728	if (adapter->vlan_attach != NULL)
729		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730	if (adapter->vlan_detach != NULL)
731		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
732
733	callout_drain(&adapter->timer);
734
735#ifdef DEV_NETMAP
736	netmap_detach(adapter->ifp);
737#endif /* DEV_NETMAP */
738	igb_free_pci_resources(adapter);
739	bus_generic_detach(dev);
740	if_free(ifp);
741
742	igb_free_transmit_structures(adapter);
743	igb_free_receive_structures(adapter);
744	if (adapter->mta != NULL)
745		free(adapter->mta, M_DEVBUF);
746
747	IGB_CORE_LOCK_DESTROY(adapter);
748
749	return (0);
750}
751
752/*********************************************************************
753 *
754 *  Shutdown entry point
755 *
756 **********************************************************************/
757
758static int
759igb_shutdown(device_t dev)
760{
761	return igb_suspend(dev);
762}
763
764/*
765 * Suspend/resume device methods.
766 */
767static int
768igb_suspend(device_t dev)
769{
770	struct adapter *adapter = device_get_softc(dev);
771
772	IGB_CORE_LOCK(adapter);
773
774	igb_stop(adapter);
775
776        igb_release_manageability(adapter);
777	igb_release_hw_control(adapter);
778	igb_enable_wakeup(dev);
779
780	IGB_CORE_UNLOCK(adapter);
781
782	return bus_generic_suspend(dev);
783}
784
785static int
786igb_resume(device_t dev)
787{
788	struct adapter *adapter = device_get_softc(dev);
789	struct tx_ring	*txr = adapter->tx_rings;
790	struct ifnet *ifp = adapter->ifp;
791
792	IGB_CORE_LOCK(adapter);
793	igb_init_locked(adapter);
794	igb_init_manageability(adapter);
795
796	if ((ifp->if_flags & IFF_UP) &&
797	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
798		for (int i = 0; i < adapter->num_queues; i++, txr++) {
799			IGB_TX_LOCK(txr);
800#ifndef IGB_LEGACY_TX
801			/* Process the stack queue only if not depleted */
802			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
803			    !drbr_empty(ifp, txr->br))
804				igb_mq_start_locked(ifp, txr);
805#else
806			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
807				igb_start_locked(txr, ifp);
808#endif
809			IGB_TX_UNLOCK(txr);
810		}
811	}
812	IGB_CORE_UNLOCK(adapter);
813
814	return bus_generic_resume(dev);
815}
816
817
818#ifdef IGB_LEGACY_TX
819
820/*********************************************************************
821 *  Transmit entry point
822 *
823 *  igb_start is called by the stack to initiate a transmit.
824 *  The driver will remain in this routine as long as there are
825 *  packets to transmit and transmit resources are available.
826 *  In case resources are not available stack is notified and
827 *  the packet is requeued.
828 **********************************************************************/
829
830static void
831igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
832{
833	struct adapter	*adapter = ifp->if_softc;
834	struct mbuf	*m_head;
835
836	IGB_TX_LOCK_ASSERT(txr);
837
838	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
839	    IFF_DRV_RUNNING)
840		return;
841	if (!adapter->link_active)
842		return;
843
844	/* Call cleanup if number of TX descriptors low */
845	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
846		igb_txeof(txr);
847
848	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
849		if (txr->tx_avail <= IGB_MAX_SCATTER) {
850			txr->queue_status |= IGB_QUEUE_DEPLETED;
851			break;
852		}
853		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
854		if (m_head == NULL)
855			break;
856		/*
857		 *  Encapsulation can modify our pointer, and or make it
858		 *  NULL on failure.  In that event, we can't requeue.
859		 */
860		if (igb_xmit(txr, &m_head)) {
861			if (m_head != NULL)
862				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
863			if (txr->tx_avail <= IGB_MAX_SCATTER)
864				txr->queue_status |= IGB_QUEUE_DEPLETED;
865			break;
866		}
867
868		/* Send a copy of the frame to the BPF listener */
869		ETHER_BPF_MTAP(ifp, m_head);
870
871		/* Set watchdog on */
872		txr->watchdog_time = ticks;
873		txr->queue_status |= IGB_QUEUE_WORKING;
874	}
875}
876
877/*
878 * Legacy TX driver routine, called from the
879 * stack, always uses tx[0], and spins for it.
880 * Should not be used with multiqueue tx
881 */
882static void
883igb_start(struct ifnet *ifp)
884{
885	struct adapter	*adapter = ifp->if_softc;
886	struct tx_ring	*txr = adapter->tx_rings;
887
888	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
889		IGB_TX_LOCK(txr);
890		igb_start_locked(txr, ifp);
891		IGB_TX_UNLOCK(txr);
892	}
893	return;
894}
895
896#else /* ~IGB_LEGACY_TX */
897
898/*
899** Multiqueue Transmit Entry:
900**  quick turnaround to the stack
901**
902*/
903static int
904igb_mq_start(struct ifnet *ifp, struct mbuf *m)
905{
906	struct adapter		*adapter = ifp->if_softc;
907	struct igb_queue	*que;
908	struct tx_ring		*txr;
909	int 			i, err = 0;
910
911	/* Which queue to use */
912	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
913		i = m->m_pkthdr.flowid % adapter->num_queues;
914	else
915		i = curcpu % adapter->num_queues;
916	txr = &adapter->tx_rings[i];
917	que = &adapter->queues[i];
918
919	err = drbr_enqueue(ifp, txr->br, m);
920	if (err)
921		return (err);
922	if (IGB_TX_TRYLOCK(txr)) {
923		igb_mq_start_locked(ifp, txr);
924		IGB_TX_UNLOCK(txr);
925	} else
926		taskqueue_enqueue(que->tq, &txr->txq_task);
927
928	return (0);
929}
930
931static int
932igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
933{
934	struct adapter  *adapter = txr->adapter;
935        struct mbuf     *next;
936        int             err = 0, enq = 0;
937
938	IGB_TX_LOCK_ASSERT(txr);
939
940	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
941	    adapter->link_active == 0)
942		return (ENETDOWN);
943
944	/* Process the queue */
945	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
946		if ((err = igb_xmit(txr, &next)) != 0) {
947			if (next == NULL) {
948				/* It was freed, move forward */
949				drbr_advance(ifp, txr->br);
950			} else {
951				/*
952				 * Still have one left, it may not be
953				 * the same since the transmit function
954				 * may have changed it.
955				 */
956				drbr_putback(ifp, txr->br, next);
957			}
958			break;
959		}
960		drbr_advance(ifp, txr->br);
961		enq++;
962		ifp->if_obytes += next->m_pkthdr.len;
963		if (next->m_flags & M_MCAST)
964			ifp->if_omcasts++;
965		ETHER_BPF_MTAP(ifp, next);
966		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
967			break;
968	}
969	if (enq > 0) {
970		/* Set the watchdog */
971		txr->queue_status |= IGB_QUEUE_WORKING;
972		txr->watchdog_time = ticks;
973	}
974	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
975		igb_txeof(txr);
976	if (txr->tx_avail <= IGB_MAX_SCATTER)
977		txr->queue_status |= IGB_QUEUE_DEPLETED;
978	return (err);
979}
980
981/*
982 * Called from a taskqueue to drain queued transmit packets.
983 */
984static void
985igb_deferred_mq_start(void *arg, int pending)
986{
987	struct tx_ring *txr = arg;
988	struct adapter *adapter = txr->adapter;
989	struct ifnet *ifp = adapter->ifp;
990
991	IGB_TX_LOCK(txr);
992	if (!drbr_empty(ifp, txr->br))
993		igb_mq_start_locked(ifp, txr);
994	IGB_TX_UNLOCK(txr);
995}
996
997/*
998** Flush all ring buffers
999*/
1000static void
1001igb_qflush(struct ifnet *ifp)
1002{
1003	struct adapter	*adapter = ifp->if_softc;
1004	struct tx_ring	*txr = adapter->tx_rings;
1005	struct mbuf	*m;
1006
1007	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1008		IGB_TX_LOCK(txr);
1009		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1010			m_freem(m);
1011		IGB_TX_UNLOCK(txr);
1012	}
1013	if_qflush(ifp);
1014}
1015#endif /* ~IGB_LEGACY_TX */
1016
1017/*********************************************************************
1018 *  Ioctl entry point
1019 *
1020 *  igb_ioctl is called when the user wants to configure the
1021 *  interface.
1022 *
1023 *  return 0 on success, positive on failure
1024 **********************************************************************/
1025
1026static int
1027igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1028{
1029	struct adapter	*adapter = ifp->if_softc;
1030	struct ifreq	*ifr = (struct ifreq *)data;
1031#if defined(INET) || defined(INET6)
1032	struct ifaddr	*ifa = (struct ifaddr *)data;
1033#endif
1034	bool		avoid_reset = FALSE;
1035	int		error = 0;
1036
1037	if (adapter->in_detach)
1038		return (error);
1039
1040	switch (command) {
1041	case SIOCSIFADDR:
1042#ifdef INET
1043		if (ifa->ifa_addr->sa_family == AF_INET)
1044			avoid_reset = TRUE;
1045#endif
1046#ifdef INET6
1047		if (ifa->ifa_addr->sa_family == AF_INET6)
1048			avoid_reset = TRUE;
1049#endif
1050		/*
1051		** Calling init results in link renegotiation,
1052		** so we avoid doing it when possible.
1053		*/
1054		if (avoid_reset) {
1055			ifp->if_flags |= IFF_UP;
1056			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1057				igb_init(adapter);
1058#ifdef INET
1059			if (!(ifp->if_flags & IFF_NOARP))
1060				arp_ifinit(ifp, ifa);
1061#endif
1062		} else
1063			error = ether_ioctl(ifp, command, data);
1064		break;
1065	case SIOCSIFMTU:
1066	    {
1067		int max_frame_size;
1068
1069		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1070
1071		IGB_CORE_LOCK(adapter);
1072		max_frame_size = 9234;
1073		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1074		    ETHER_CRC_LEN) {
1075			IGB_CORE_UNLOCK(adapter);
1076			error = EINVAL;
1077			break;
1078		}
1079
1080		ifp->if_mtu = ifr->ifr_mtu;
1081		adapter->max_frame_size =
1082		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1083		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1084			igb_init_locked(adapter);
1085		IGB_CORE_UNLOCK(adapter);
1086		break;
1087	    }
1088	case SIOCSIFFLAGS:
1089		IOCTL_DEBUGOUT("ioctl rcv'd:\
1090		    SIOCSIFFLAGS (Set Interface Flags)");
1091		IGB_CORE_LOCK(adapter);
1092		if (ifp->if_flags & IFF_UP) {
1093			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1094				if ((ifp->if_flags ^ adapter->if_flags) &
1095				    (IFF_PROMISC | IFF_ALLMULTI)) {
1096					igb_disable_promisc(adapter);
1097					igb_set_promisc(adapter);
1098				}
1099			} else
1100				igb_init_locked(adapter);
1101		} else
1102			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1103				igb_stop(adapter);
1104		adapter->if_flags = ifp->if_flags;
1105		IGB_CORE_UNLOCK(adapter);
1106		break;
1107	case SIOCADDMULTI:
1108	case SIOCDELMULTI:
1109		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1110		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1111			IGB_CORE_LOCK(adapter);
1112			igb_disable_intr(adapter);
1113			igb_set_multi(adapter);
1114#ifdef DEVICE_POLLING
1115			if (!(ifp->if_capenable & IFCAP_POLLING))
1116#endif
1117				igb_enable_intr(adapter);
1118			IGB_CORE_UNLOCK(adapter);
1119		}
1120		break;
1121	case SIOCSIFMEDIA:
1122		/* Check SOL/IDER usage */
1123		IGB_CORE_LOCK(adapter);
1124		if (e1000_check_reset_block(&adapter->hw)) {
1125			IGB_CORE_UNLOCK(adapter);
1126			device_printf(adapter->dev, "Media change is"
1127			    " blocked due to SOL/IDER session.\n");
1128			break;
1129		}
1130		IGB_CORE_UNLOCK(adapter);
1131	case SIOCGIFMEDIA:
1132		IOCTL_DEBUGOUT("ioctl rcv'd: \
1133		    SIOCxIFMEDIA (Get/Set Interface Media)");
1134		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1135		break;
1136	case SIOCSIFCAP:
1137	    {
1138		int mask, reinit;
1139
1140		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1141		reinit = 0;
1142		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1143#ifdef DEVICE_POLLING
1144		if (mask & IFCAP_POLLING) {
1145			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1146				error = ether_poll_register(igb_poll, ifp);
1147				if (error)
1148					return (error);
1149				IGB_CORE_LOCK(adapter);
1150				igb_disable_intr(adapter);
1151				ifp->if_capenable |= IFCAP_POLLING;
1152				IGB_CORE_UNLOCK(adapter);
1153			} else {
1154				error = ether_poll_deregister(ifp);
1155				/* Enable interrupt even in error case */
1156				IGB_CORE_LOCK(adapter);
1157				igb_enable_intr(adapter);
1158				ifp->if_capenable &= ~IFCAP_POLLING;
1159				IGB_CORE_UNLOCK(adapter);
1160			}
1161		}
1162#endif
1163#if __FreeBSD_version >= 1000000
1164		/* HW cannot turn these on/off separately */
1165		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1166			ifp->if_capenable ^= IFCAP_RXCSUM;
1167			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1168			reinit = 1;
1169		}
1170		if (mask & IFCAP_TXCSUM) {
1171			ifp->if_capenable ^= IFCAP_TXCSUM;
1172			reinit = 1;
1173		}
1174		if (mask & IFCAP_TXCSUM_IPV6) {
1175			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1176			reinit = 1;
1177		}
1178#else
1179		if (mask & IFCAP_HWCSUM) {
1180			ifp->if_capenable ^= IFCAP_HWCSUM;
1181			reinit = 1;
1182		}
1183#endif
1184		if (mask & IFCAP_TSO4) {
1185			ifp->if_capenable ^= IFCAP_TSO4;
1186			reinit = 1;
1187		}
1188		if (mask & IFCAP_TSO6) {
1189			ifp->if_capenable ^= IFCAP_TSO6;
1190			reinit = 1;
1191		}
1192		if (mask & IFCAP_VLAN_HWTAGGING) {
1193			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1194			reinit = 1;
1195		}
1196		if (mask & IFCAP_VLAN_HWFILTER) {
1197			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1198			reinit = 1;
1199		}
1200		if (mask & IFCAP_VLAN_HWTSO) {
1201			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1202			reinit = 1;
1203		}
1204		if (mask & IFCAP_LRO) {
1205			ifp->if_capenable ^= IFCAP_LRO;
1206			reinit = 1;
1207		}
1208		if (mask & IFCAP_WOL) {
1209			if (mask & IFCAP_WOL_MAGIC)
1210				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1211			if (mask & IFCAP_WOL_MCAST)
1212				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1213			if (mask & IFCAP_WOL_UCAST)
1214				ifp->if_capenable ^= IFCAP_WOL_UCAST;
1215		}
1216		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1217			igb_init(adapter);
1218		VLAN_CAPABILITIES(ifp);
1219		break;
1220	    }
1221
1222	default:
1223		error = ether_ioctl(ifp, command, data);
1224		break;
1225	}
1226
1227	return (error);
1228}
1229
1230
1231/*********************************************************************
1232 *  Init entry point
1233 *
1234 *  This routine is used in two ways. It is used by the stack as
1235 *  init entry point in network interface structure. It is also used
1236 *  by the driver as a hw/sw initialization routine to get to a
1237 *  consistent state.
1238 *
1239 *  return 0 on success, positive on failure
1240 **********************************************************************/
1241
1242static void
1243igb_init_locked(struct adapter *adapter)
1244{
1245	struct ifnet	*ifp = adapter->ifp;
1246	device_t	dev = adapter->dev;
1247
1248	INIT_DEBUGOUT("igb_init: begin");
1249
1250	IGB_CORE_LOCK_ASSERT(adapter);
1251
1252	igb_disable_intr(adapter);
1253	callout_stop(&adapter->timer);
1254
1255	/* Get the latest mac address, User can use a LAA */
1256        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1257              ETHER_ADDR_LEN);
1258
1259	/* Put the address into the Receive Address Array */
1260	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1261
1262	igb_reset(adapter);
1263	igb_update_link_status(adapter);
1264
1265	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1266
1267	/* Set hardware offload abilities */
1268	ifp->if_hwassist = 0;
1269	if (ifp->if_capenable & IFCAP_TXCSUM) {
1270#if __FreeBSD_version >= 1000000
1271		ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
1272		if (adapter->hw.mac.type != e1000_82575)
1273			ifp->if_hwassist |= CSUM_IP_SCTP;
1274#else
1275		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1276#if __FreeBSD_version >= 800000
1277		if (adapter->hw.mac.type != e1000_82575)
1278			ifp->if_hwassist |= CSUM_SCTP;
1279#endif
1280#endif
1281	}
1282
1283#if __FreeBSD_version >= 1000000
1284	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
1285		ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
1286		if (adapter->hw.mac.type != e1000_82575)
1287			ifp->if_hwassist |= CSUM_IP6_SCTP;
1288	}
1289#endif
1290	if (ifp->if_capenable & IFCAP_TSO)
1291		ifp->if_hwassist |= CSUM_TSO;
1292
1293	/* Clear bad data from Rx FIFOs */
1294	e1000_rx_fifo_flush_82575(&adapter->hw);
1295
1296	/* Configure for OS presence */
1297	igb_init_manageability(adapter);
1298
1299	/* Prepare transmit descriptors and buffers */
1300	igb_setup_transmit_structures(adapter);
1301	igb_initialize_transmit_units(adapter);
1302
1303	/* Setup Multicast table */
1304	igb_set_multi(adapter);
1305
1306	/*
1307	** Figure out the desired mbuf pool
1308	** for doing jumbo/packetsplit
1309	*/
1310	if (adapter->max_frame_size <= 2048)
1311		adapter->rx_mbuf_sz = MCLBYTES;
1312#ifndef CONTIGMALLOC_WORKS
1313       else
1314               adapter->rx_mbuf_sz = MJUMPAGESIZE;
1315#else
1316	else if (adapter->max_frame_size <= 4096)
1317		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1318	else
1319		adapter->rx_mbuf_sz = MJUM9BYTES;
1320#endif
1321
1322	/* Prepare receive descriptors and buffers */
1323	if (igb_setup_receive_structures(adapter)) {
1324		device_printf(dev, "Could not setup receive structures\n");
1325		return;
1326	}
1327	igb_initialize_receive_units(adapter);
1328
1329        /* Enable VLAN support */
1330	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1331		igb_setup_vlan_hw_support(adapter);
1332
1333	/* Don't lose promiscuous settings */
1334	igb_set_promisc(adapter);
1335
1336	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1337	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1338
1339	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1340	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1341
1342	if (adapter->msix > 1) /* Set up queue routing */
1343		igb_configure_queues(adapter);
1344
1345	/* this clears any pending interrupts */
1346	E1000_READ_REG(&adapter->hw, E1000_ICR);
1347#ifdef DEVICE_POLLING
1348	/*
1349	 * Only enable interrupts if we are not polling, make sure
1350	 * they are off otherwise.
1351	 */
1352	if (ifp->if_capenable & IFCAP_POLLING)
1353		igb_disable_intr(adapter);
1354	else
1355#endif /* DEVICE_POLLING */
1356	{
1357		igb_enable_intr(adapter);
1358		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1359	}
1360
1361	/* Set Energy Efficient Ethernet */
1362	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1363		if (adapter->hw.mac.type == e1000_i354)
1364			e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1365		else
1366			e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1367	}
1368}
1369
1370static void
1371igb_init(void *arg)
1372{
1373	struct adapter *adapter = arg;
1374
1375	IGB_CORE_LOCK(adapter);
1376	igb_init_locked(adapter);
1377	IGB_CORE_UNLOCK(adapter);
1378}
1379
1380
1381static void
1382igb_handle_que(void *context, int pending)
1383{
1384	struct igb_queue *que = context;
1385	struct adapter *adapter = que->adapter;
1386	struct tx_ring *txr = que->txr;
1387	struct ifnet	*ifp = adapter->ifp;
1388
1389	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1390		bool	more;
1391
1392		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1393
1394		IGB_TX_LOCK(txr);
1395		igb_txeof(txr);
1396#ifndef IGB_LEGACY_TX
1397		/* Process the stack queue only if not depleted */
1398		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1399		    !drbr_empty(ifp, txr->br))
1400			igb_mq_start_locked(ifp, txr);
1401#else
1402		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1403			igb_start_locked(txr, ifp);
1404#endif
1405		IGB_TX_UNLOCK(txr);
1406		/* Do we need another? */
1407		if (more) {
1408			taskqueue_enqueue(que->tq, &que->que_task);
1409			return;
1410		}
1411	}
1412
1413#ifdef DEVICE_POLLING
1414	if (ifp->if_capenable & IFCAP_POLLING)
1415		return;
1416#endif
1417	/* Reenable this interrupt */
1418	if (que->eims)
1419		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1420	else
1421		igb_enable_intr(adapter);
1422}
1423
1424/* Deal with link in a sleepable context */
1425static void
1426igb_handle_link(void *context, int pending)
1427{
1428	struct adapter *adapter = context;
1429
1430	IGB_CORE_LOCK(adapter);
1431	igb_handle_link_locked(adapter);
1432	IGB_CORE_UNLOCK(adapter);
1433}
1434
1435static void
1436igb_handle_link_locked(struct adapter *adapter)
1437{
1438	struct tx_ring	*txr = adapter->tx_rings;
1439	struct ifnet *ifp = adapter->ifp;
1440
1441	IGB_CORE_LOCK_ASSERT(adapter);
1442	adapter->hw.mac.get_link_status = 1;
1443	igb_update_link_status(adapter);
1444	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1445		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1446			IGB_TX_LOCK(txr);
1447#ifndef IGB_LEGACY_TX
1448			/* Process the stack queue only if not depleted */
1449			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1450			    !drbr_empty(ifp, txr->br))
1451				igb_mq_start_locked(ifp, txr);
1452#else
1453			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1454				igb_start_locked(txr, ifp);
1455#endif
1456			IGB_TX_UNLOCK(txr);
1457		}
1458	}
1459}
1460
1461/*********************************************************************
1462 *
1463 *  MSI/Legacy Deferred
1464 *  Interrupt Service routine
1465 *
1466 *********************************************************************/
1467static int
1468igb_irq_fast(void *arg)
1469{
1470	struct adapter		*adapter = arg;
1471	struct igb_queue	*que = adapter->queues;
1472	u32			reg_icr;
1473
1474
1475	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1476
1477	/* Hot eject?  */
1478	if (reg_icr == 0xffffffff)
1479		return FILTER_STRAY;
1480
1481	/* Definitely not our interrupt.  */
1482	if (reg_icr == 0x0)
1483		return FILTER_STRAY;
1484
1485	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1486		return FILTER_STRAY;
1487
1488	/*
1489	 * Mask interrupts until the taskqueue is finished running.  This is
1490	 * cheap, just assume that it is needed.  This also works around the
1491	 * MSI message reordering errata on certain systems.
1492	 */
1493	igb_disable_intr(adapter);
1494	taskqueue_enqueue(que->tq, &que->que_task);
1495
1496	/* Link status change */
1497	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1498		taskqueue_enqueue(que->tq, &adapter->link_task);
1499
1500	if (reg_icr & E1000_ICR_RXO)
1501		adapter->rx_overruns++;
1502	return FILTER_HANDLED;
1503}
1504
1505#ifdef DEVICE_POLLING
1506#if __FreeBSD_version >= 800000
1507#define POLL_RETURN_COUNT(a) (a)
1508static int
1509#else
1510#define POLL_RETURN_COUNT(a)
1511static void
1512#endif
1513igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1514{
1515	struct adapter		*adapter = ifp->if_softc;
1516	struct igb_queue	*que;
1517	struct tx_ring		*txr;
1518	u32			reg_icr, rx_done = 0;
1519	u32			loop = IGB_MAX_LOOP;
1520	bool			more;
1521
1522	IGB_CORE_LOCK(adapter);
1523	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1524		IGB_CORE_UNLOCK(adapter);
1525		return POLL_RETURN_COUNT(rx_done);
1526	}
1527
1528	if (cmd == POLL_AND_CHECK_STATUS) {
1529		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1530		/* Link status change */
1531		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1532			igb_handle_link_locked(adapter);
1533
1534		if (reg_icr & E1000_ICR_RXO)
1535			adapter->rx_overruns++;
1536	}
1537	IGB_CORE_UNLOCK(adapter);
1538
1539	for (int i = 0; i < adapter->num_queues; i++) {
1540		que = &adapter->queues[i];
1541		txr = que->txr;
1542
1543		igb_rxeof(que, count, &rx_done);
1544
1545		IGB_TX_LOCK(txr);
1546		do {
1547			more = igb_txeof(txr);
1548		} while (loop-- && more);
1549#ifndef IGB_LEGACY_TX
1550		if (!drbr_empty(ifp, txr->br))
1551			igb_mq_start_locked(ifp, txr);
1552#else
1553		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1554			igb_start_locked(txr, ifp);
1555#endif
1556		IGB_TX_UNLOCK(txr);
1557	}
1558
1559	return POLL_RETURN_COUNT(rx_done);
1560}
1561#endif /* DEVICE_POLLING */
1562
1563/*********************************************************************
1564 *
1565 *  MSIX Que Interrupt Service routine
1566 *
1567 **********************************************************************/
1568static void
1569igb_msix_que(void *arg)
1570{
1571	struct igb_queue *que = arg;
1572	struct adapter *adapter = que->adapter;
1573	struct ifnet   *ifp = adapter->ifp;
1574	struct tx_ring *txr = que->txr;
1575	struct rx_ring *rxr = que->rxr;
1576	u32		newitr = 0;
1577	bool		more_rx;
1578
1579	/* Ignore spurious interrupts */
1580	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1581		return;
1582
1583	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1584	++que->irqs;
1585
1586	IGB_TX_LOCK(txr);
1587	igb_txeof(txr);
1588#ifndef IGB_LEGACY_TX
1589	/* Process the stack queue only if not depleted */
1590	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1591	    !drbr_empty(ifp, txr->br))
1592		igb_mq_start_locked(ifp, txr);
1593#else
1594	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1595		igb_start_locked(txr, ifp);
1596#endif
1597	IGB_TX_UNLOCK(txr);
1598
1599	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1600
1601	if (adapter->enable_aim == FALSE)
1602		goto no_calc;
1603	/*
1604	** Do Adaptive Interrupt Moderation:
1605        **  - Write out last calculated setting
1606	**  - Calculate based on average size over
1607	**    the last interval.
1608	*/
1609        if (que->eitr_setting)
1610                E1000_WRITE_REG(&adapter->hw,
1611                    E1000_EITR(que->msix), que->eitr_setting);
1612
1613        que->eitr_setting = 0;
1614
1615        /* Idle, do nothing */
1616        if ((txr->bytes == 0) && (rxr->bytes == 0))
1617                goto no_calc;
1618
1619        /* Used half Default if sub-gig */
1620        if (adapter->link_speed != 1000)
1621                newitr = IGB_DEFAULT_ITR / 2;
1622        else {
1623		if ((txr->bytes) && (txr->packets))
1624                	newitr = txr->bytes/txr->packets;
1625		if ((rxr->bytes) && (rxr->packets))
1626			newitr = max(newitr,
1627			    (rxr->bytes / rxr->packets));
1628                newitr += 24; /* account for hardware frame, crc */
1629		/* set an upper boundary */
1630		newitr = min(newitr, 3000);
1631		/* Be nice to the mid range */
1632                if ((newitr > 300) && (newitr < 1200))
1633                        newitr = (newitr / 3);
1634                else
1635                        newitr = (newitr / 2);
1636        }
1637        newitr &= 0x7FFC;  /* Mask invalid bits */
1638        if (adapter->hw.mac.type == e1000_82575)
1639                newitr |= newitr << 16;
1640        else
1641                newitr |= E1000_EITR_CNT_IGNR;
1642
1643        /* save for next interrupt */
1644        que->eitr_setting = newitr;
1645
1646        /* Reset state */
1647        txr->bytes = 0;
1648        txr->packets = 0;
1649        rxr->bytes = 0;
1650        rxr->packets = 0;
1651
1652no_calc:
1653	/* Schedule a clean task if needed*/
1654	if (more_rx)
1655		taskqueue_enqueue(que->tq, &que->que_task);
1656	else
1657		/* Reenable this interrupt */
1658		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1659	return;
1660}
1661
1662
1663/*********************************************************************
1664 *
1665 *  MSIX Link Interrupt Service routine
1666 *
1667 **********************************************************************/
1668
1669static void
1670igb_msix_link(void *arg)
1671{
1672	struct adapter	*adapter = arg;
1673	u32       	icr;
1674
1675	++adapter->link_irq;
1676	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1677	if (!(icr & E1000_ICR_LSC))
1678		goto spurious;
1679	igb_handle_link(adapter, 0);
1680
1681spurious:
1682	/* Rearm */
1683	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1684	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1685	return;
1686}
1687
1688
1689/*********************************************************************
1690 *
1691 *  Media Ioctl callback
1692 *
1693 *  This routine is called whenever the user queries the status of
1694 *  the interface using ifconfig.
1695 *
1696 **********************************************************************/
1697static void
1698igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1699{
1700	struct adapter *adapter = ifp->if_softc;
1701
1702	INIT_DEBUGOUT("igb_media_status: begin");
1703
1704	IGB_CORE_LOCK(adapter);
1705	igb_update_link_status(adapter);
1706
1707	ifmr->ifm_status = IFM_AVALID;
1708	ifmr->ifm_active = IFM_ETHER;
1709
1710	if (!adapter->link_active) {
1711		IGB_CORE_UNLOCK(adapter);
1712		return;
1713	}
1714
1715	ifmr->ifm_status |= IFM_ACTIVE;
1716
1717	switch (adapter->link_speed) {
1718	case 10:
1719		ifmr->ifm_active |= IFM_10_T;
1720		break;
1721	case 100:
1722		/*
1723		** Support for 100Mb SFP - these are Fiber
1724		** but the media type appears as serdes
1725		*/
1726		if (adapter->hw.phy.media_type ==
1727		    e1000_media_type_internal_serdes)
1728			ifmr->ifm_active |= IFM_100_FX;
1729		else
1730			ifmr->ifm_active |= IFM_100_TX;
1731		break;
1732	case 1000:
1733		ifmr->ifm_active |= IFM_1000_T;
1734		break;
1735	case 2500:
1736		ifmr->ifm_active |= IFM_2500_SX;
1737		break;
1738	}
1739
1740	if (adapter->link_duplex == FULL_DUPLEX)
1741		ifmr->ifm_active |= IFM_FDX;
1742	else
1743		ifmr->ifm_active |= IFM_HDX;
1744
1745	IGB_CORE_UNLOCK(adapter);
1746}
1747
1748/*********************************************************************
1749 *
1750 *  Media Ioctl callback
1751 *
1752 *  This routine is called when the user changes speed/duplex using
1753 *  media/mediopt option with ifconfig.
1754 *
1755 **********************************************************************/
1756static int
1757igb_media_change(struct ifnet *ifp)
1758{
1759	struct adapter *adapter = ifp->if_softc;
1760	struct ifmedia  *ifm = &adapter->media;
1761
1762	INIT_DEBUGOUT("igb_media_change: begin");
1763
1764	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1765		return (EINVAL);
1766
1767	IGB_CORE_LOCK(adapter);
1768	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1769	case IFM_AUTO:
1770		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1771		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1772		break;
1773	case IFM_1000_LX:
1774	case IFM_1000_SX:
1775	case IFM_1000_T:
1776		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1777		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1778		break;
1779	case IFM_100_TX:
1780		adapter->hw.mac.autoneg = FALSE;
1781		adapter->hw.phy.autoneg_advertised = 0;
1782		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1783			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1784		else
1785			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1786		break;
1787	case IFM_10_T:
1788		adapter->hw.mac.autoneg = FALSE;
1789		adapter->hw.phy.autoneg_advertised = 0;
1790		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1791			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1792		else
1793			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1794		break;
1795	default:
1796		device_printf(adapter->dev, "Unsupported media type\n");
1797	}
1798
1799	igb_init_locked(adapter);
1800	IGB_CORE_UNLOCK(adapter);
1801
1802	return (0);
1803}
1804
1805
1806/*********************************************************************
1807 *
1808 *  This routine maps the mbufs to Advanced TX descriptors.
1809 *
1810 **********************************************************************/
1811static int
1812igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1813{
1814	struct adapter  *adapter = txr->adapter;
1815	u32		olinfo_status = 0, cmd_type_len;
1816	int             i, j, error, nsegs;
1817	int		first;
1818	bool		remap = TRUE;
1819	struct mbuf	*m_head;
1820	bus_dma_segment_t segs[IGB_MAX_SCATTER];
1821	bus_dmamap_t	map;
1822	struct igb_tx_buf *txbuf;
1823	union e1000_adv_tx_desc *txd = NULL;
1824
1825	m_head = *m_headp;
1826
1827	/* Basic descriptor defines */
1828        cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1829	    E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1830
1831	if (m_head->m_flags & M_VLANTAG)
1832        	cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1833
1834        /*
1835         * Important to capture the first descriptor
1836         * used because it will contain the index of
1837         * the one we tell the hardware to report back
1838         */
1839        first = txr->next_avail_desc;
1840	txbuf = &txr->tx_buffers[first];
1841	map = txbuf->map;
1842
1843	/*
1844	 * Map the packet for DMA.
1845	 */
1846retry:
1847	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1848	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1849
1850	if (__predict_false(error)) {
1851		struct mbuf *m;
1852
1853		switch (error) {
1854		case EFBIG:
1855			/* Try it again? - one try */
1856			if (remap == TRUE) {
1857				remap = FALSE;
1858				m = m_collapse(*m_headp, M_NOWAIT,
1859				    IGB_MAX_SCATTER);
1860				if (m == NULL) {
1861					adapter->mbuf_defrag_failed++;
1862					m_freem(*m_headp);
1863					*m_headp = NULL;
1864					return (ENOBUFS);
1865				}
1866				*m_headp = m;
1867				goto retry;
1868			} else
1869				return (error);
1870		default:
1871			txr->no_tx_dma_setup++;
1872			m_freem(*m_headp);
1873			*m_headp = NULL;
1874			return (error);
1875		}
1876	}
1877
1878	/* Make certain there are enough descriptors */
1879	if (txr->tx_avail < (nsegs + 2)) {
1880		txr->no_desc_avail++;
1881		bus_dmamap_unload(txr->txtag, map);
1882		return (ENOBUFS);
1883	}
1884	m_head = *m_headp;
1885
1886	/*
1887	** Set up the appropriate offload context
1888	** this will consume the first descriptor
1889	*/
1890	error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1891	if (__predict_false(error)) {
1892		m_freem(*m_headp);
1893		*m_headp = NULL;
1894		return (error);
1895	}
1896
1897	/* 82575 needs the queue index added */
1898	if (adapter->hw.mac.type == e1000_82575)
1899		olinfo_status |= txr->me << 4;
1900
1901	i = txr->next_avail_desc;
1902	for (j = 0; j < nsegs; j++) {
1903		bus_size_t seglen;
1904		bus_addr_t segaddr;
1905
1906		txbuf = &txr->tx_buffers[i];
1907		txd = &txr->tx_base[i];
1908		seglen = segs[j].ds_len;
1909		segaddr = htole64(segs[j].ds_addr);
1910
1911		txd->read.buffer_addr = segaddr;
1912		txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1913		    cmd_type_len | seglen);
1914		txd->read.olinfo_status = htole32(olinfo_status);
1915
1916		if (++i == txr->num_desc)
1917			i = 0;
1918	}
1919
1920	txd->read.cmd_type_len |=
1921	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1922	txr->tx_avail -= nsegs;
1923	txr->next_avail_desc = i;
1924
1925	txbuf->m_head = m_head;
1926	/*
1927	** Here we swap the map so the last descriptor,
1928	** which gets the completion interrupt has the
1929	** real map, and the first descriptor gets the
1930	** unused map from this descriptor.
1931	*/
1932	txr->tx_buffers[first].map = txbuf->map;
1933	txbuf->map = map;
1934	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1935
1936        /* Set the EOP descriptor that will be marked done */
1937        txbuf = &txr->tx_buffers[first];
1938	txbuf->eop = txd;
1939
1940        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1941            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1942	/*
1943	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1944	 * hardware that this frame is available to transmit.
1945	 */
1946	++txr->total_packets;
1947	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1948
1949	return (0);
1950}
1951static void
1952igb_set_promisc(struct adapter *adapter)
1953{
1954	struct ifnet	*ifp = adapter->ifp;
1955	struct e1000_hw *hw = &adapter->hw;
1956	u32		reg;
1957
1958	if (adapter->vf_ifp) {
1959		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1960		return;
1961	}
1962
1963	reg = E1000_READ_REG(hw, E1000_RCTL);
1964	if (ifp->if_flags & IFF_PROMISC) {
1965		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1966		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1967	} else if (ifp->if_flags & IFF_ALLMULTI) {
1968		reg |= E1000_RCTL_MPE;
1969		reg &= ~E1000_RCTL_UPE;
1970		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1971	}
1972}
1973
1974static void
1975igb_disable_promisc(struct adapter *adapter)
1976{
1977	struct e1000_hw *hw = &adapter->hw;
1978	struct ifnet	*ifp = adapter->ifp;
1979	u32		reg;
1980	int		mcnt = 0;
1981
1982	if (adapter->vf_ifp) {
1983		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1984		return;
1985	}
1986	reg = E1000_READ_REG(hw, E1000_RCTL);
1987	reg &=  (~E1000_RCTL_UPE);
1988	if (ifp->if_flags & IFF_ALLMULTI)
1989		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1990	else {
1991		struct  ifmultiaddr *ifma;
1992#if __FreeBSD_version < 800000
1993		IF_ADDR_LOCK(ifp);
1994#else
1995		if_maddr_rlock(ifp);
1996#endif
1997		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1998			if (ifma->ifma_addr->sa_family != AF_LINK)
1999				continue;
2000			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2001				break;
2002			mcnt++;
2003		}
2004#if __FreeBSD_version < 800000
2005		IF_ADDR_UNLOCK(ifp);
2006#else
2007		if_maddr_runlock(ifp);
2008#endif
2009	}
2010	/* Don't disable if in MAX groups */
2011	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2012		reg &=  (~E1000_RCTL_MPE);
2013	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2014}
2015
2016
2017/*********************************************************************
2018 *  Multicast Update
2019 *
2020 *  This routine is called whenever multicast address list is updated.
2021 *
2022 **********************************************************************/
2023
2024static void
2025igb_set_multi(struct adapter *adapter)
2026{
2027	struct ifnet	*ifp = adapter->ifp;
2028	struct ifmultiaddr *ifma;
2029	u32 reg_rctl = 0;
2030	u8  *mta;
2031
2032	int mcnt = 0;
2033
2034	IOCTL_DEBUGOUT("igb_set_multi: begin");
2035
2036	mta = adapter->mta;
2037	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2038	    MAX_NUM_MULTICAST_ADDRESSES);
2039
2040#if __FreeBSD_version < 800000
2041	IF_ADDR_LOCK(ifp);
2042#else
2043	if_maddr_rlock(ifp);
2044#endif
2045	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2046		if (ifma->ifma_addr->sa_family != AF_LINK)
2047			continue;
2048
2049		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2050			break;
2051
2052		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2053		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2054		mcnt++;
2055	}
2056#if __FreeBSD_version < 800000
2057	IF_ADDR_UNLOCK(ifp);
2058#else
2059	if_maddr_runlock(ifp);
2060#endif
2061
2062	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2063		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2064		reg_rctl |= E1000_RCTL_MPE;
2065		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2066	} else
2067		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2068}
2069
2070
2071/*********************************************************************
2072 *  Timer routine:
2073 *  	This routine checks for link status,
2074 *	updates statistics, and does the watchdog.
2075 *
2076 **********************************************************************/
2077
2078static void
2079igb_local_timer(void *arg)
2080{
2081	struct adapter		*adapter = arg;
2082	device_t		dev = adapter->dev;
2083	struct ifnet		*ifp = adapter->ifp;
2084	struct tx_ring		*txr = adapter->tx_rings;
2085	struct igb_queue	*que = adapter->queues;
2086	int			hung = 0, busy = 0;
2087
2088
2089	IGB_CORE_LOCK_ASSERT(adapter);
2090
2091	igb_update_link_status(adapter);
2092	igb_update_stats_counters(adapter);
2093
2094        /*
2095        ** Check the TX queues status
2096	**	- central locked handling of OACTIVE
2097	**	- watchdog only if all queues show hung
2098        */
2099	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2100		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2101		    (adapter->pause_frames == 0))
2102			++hung;
2103		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2104			++busy;
2105		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2106			taskqueue_enqueue(que->tq, &que->que_task);
2107	}
2108	if (hung == adapter->num_queues)
2109		goto timeout;
2110	if (busy == adapter->num_queues)
2111		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2112	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2113	    (busy < adapter->num_queues))
2114		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2115
2116	adapter->pause_frames = 0;
2117	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2118#ifndef DEVICE_POLLING
2119	/* Schedule all queue interrupts - deadlock protection */
2120	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2121#endif
2122	return;
2123
2124timeout:
2125	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2126	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2127            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2128            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2129	device_printf(dev,"TX(%d) desc avail = %d,"
2130            "Next TX to Clean = %d\n",
2131            txr->me, txr->tx_avail, txr->next_to_clean);
2132	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2133	adapter->watchdog_events++;
2134	igb_init_locked(adapter);
2135}
2136
2137static void
2138igb_update_link_status(struct adapter *adapter)
2139{
2140	struct e1000_hw		*hw = &adapter->hw;
2141	struct e1000_fc_info	*fc = &hw->fc;
2142	struct ifnet		*ifp = adapter->ifp;
2143	device_t		dev = adapter->dev;
2144	struct tx_ring		*txr = adapter->tx_rings;
2145	u32			link_check, thstat, ctrl;
2146	char			*flowctl = NULL;
2147
2148	link_check = thstat = ctrl = 0;
2149
2150	/* Get the cached link value or read for real */
2151        switch (hw->phy.media_type) {
2152        case e1000_media_type_copper:
2153                if (hw->mac.get_link_status) {
2154			/* Do the work to read phy */
2155                        e1000_check_for_link(hw);
2156                        link_check = !hw->mac.get_link_status;
2157                } else
2158                        link_check = TRUE;
2159                break;
2160        case e1000_media_type_fiber:
2161                e1000_check_for_link(hw);
2162                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2163                                 E1000_STATUS_LU);
2164                break;
2165        case e1000_media_type_internal_serdes:
2166                e1000_check_for_link(hw);
2167                link_check = adapter->hw.mac.serdes_has_link;
2168                break;
2169	/* VF device is type_unknown */
2170        case e1000_media_type_unknown:
2171                e1000_check_for_link(hw);
2172		link_check = !hw->mac.get_link_status;
2173		/* Fall thru */
2174        default:
2175                break;
2176        }
2177
2178	/* Check for thermal downshift or shutdown */
2179	if (hw->mac.type == e1000_i350) {
2180		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2181		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2182	}
2183
2184	/* Get the flow control for display */
2185	switch (fc->current_mode) {
2186	case e1000_fc_rx_pause:
2187		flowctl = "RX";
2188		break;
2189	case e1000_fc_tx_pause:
2190		flowctl = "TX";
2191		break;
2192	case e1000_fc_full:
2193		flowctl = "Full";
2194		break;
2195	case e1000_fc_none:
2196	default:
2197		flowctl = "None";
2198		break;
2199	}
2200
2201	/* Now we check if a transition has happened */
2202	if (link_check && (adapter->link_active == 0)) {
2203		e1000_get_speed_and_duplex(&adapter->hw,
2204		    &adapter->link_speed, &adapter->link_duplex);
2205		if (bootverbose)
2206			device_printf(dev, "Link is up %d Mbps %s,"
2207			    " Flow Control: %s\n",
2208			    adapter->link_speed,
2209			    ((adapter->link_duplex == FULL_DUPLEX) ?
2210			    "Full Duplex" : "Half Duplex"), flowctl);
2211		adapter->link_active = 1;
2212		ifp->if_baudrate = adapter->link_speed * 1000000;
2213		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2214		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2215			device_printf(dev, "Link: thermal downshift\n");
2216		/* Delay Link Up for Phy update */
2217		if (((hw->mac.type == e1000_i210) ||
2218		    (hw->mac.type == e1000_i211)) &&
2219		    (hw->phy.id == I210_I_PHY_ID))
2220			msec_delay(I210_LINK_DELAY);
2221		/* Reset if the media type changed. */
2222		if (hw->dev_spec._82575.media_changed) {
2223			hw->dev_spec._82575.media_changed = false;
2224			adapter->flags |= IGB_MEDIA_RESET;
2225			igb_reset(adapter);
2226		}
2227		/* This can sleep */
2228		if_link_state_change(ifp, LINK_STATE_UP);
2229	} else if (!link_check && (adapter->link_active == 1)) {
2230		ifp->if_baudrate = adapter->link_speed = 0;
2231		adapter->link_duplex = 0;
2232		if (bootverbose)
2233			device_printf(dev, "Link is Down\n");
2234		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2235		    (thstat & E1000_THSTAT_PWR_DOWN))
2236			device_printf(dev, "Link: thermal shutdown\n");
2237		adapter->link_active = 0;
2238		/* This can sleep */
2239		if_link_state_change(ifp, LINK_STATE_DOWN);
2240		/* Reset queue state */
2241		for (int i = 0; i < adapter->num_queues; i++, txr++)
2242			txr->queue_status = IGB_QUEUE_IDLE;
2243	}
2244}
2245
2246/*********************************************************************
2247 *
2248 *  This routine disables all traffic on the adapter by issuing a
2249 *  global reset on the MAC and deallocates TX/RX buffers.
2250 *
2251 **********************************************************************/
2252
2253static void
2254igb_stop(void *arg)
2255{
2256	struct adapter	*adapter = arg;
2257	struct ifnet	*ifp = adapter->ifp;
2258	struct tx_ring *txr = adapter->tx_rings;
2259
2260	IGB_CORE_LOCK_ASSERT(adapter);
2261
2262	INIT_DEBUGOUT("igb_stop: begin");
2263
2264	igb_disable_intr(adapter);
2265
2266	callout_stop(&adapter->timer);
2267
2268	/* Tell the stack that the interface is no longer active */
2269	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2270	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2271
2272	/* Disarm watchdog timer. */
2273	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2274		IGB_TX_LOCK(txr);
2275		txr->queue_status = IGB_QUEUE_IDLE;
2276		IGB_TX_UNLOCK(txr);
2277	}
2278
2279	e1000_reset_hw(&adapter->hw);
2280	E1000_WRITE_REG(&adapter->hw, E1000_WUFC, 0);
2281
2282	e1000_led_off(&adapter->hw);
2283	e1000_cleanup_led(&adapter->hw);
2284}
2285
2286
2287/*********************************************************************
2288 *
2289 *  Determine hardware revision.
2290 *
2291 **********************************************************************/
2292static void
2293igb_identify_hardware(struct adapter *adapter)
2294{
2295	device_t dev = adapter->dev;
2296
2297	/* Make sure our PCI config space has the necessary stuff set */
2298	pci_enable_busmaster(dev);
2299	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2300
2301	/* Save off the information about this board */
2302	adapter->hw.vendor_id = pci_get_vendor(dev);
2303	adapter->hw.device_id = pci_get_device(dev);
2304	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2305	adapter->hw.subsystem_vendor_id =
2306	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2307	adapter->hw.subsystem_device_id =
2308	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2309
2310	/* Set MAC type early for PCI setup */
2311	e1000_set_mac_type(&adapter->hw);
2312
2313	/* Are we a VF device? */
2314	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2315	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2316		adapter->vf_ifp = 1;
2317	else
2318		adapter->vf_ifp = 0;
2319}
2320
2321static int
2322igb_allocate_pci_resources(struct adapter *adapter)
2323{
2324	device_t	dev = adapter->dev;
2325	int		rid;
2326
2327	rid = PCIR_BAR(0);
2328	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2329	    &rid, RF_ACTIVE);
2330	if (adapter->pci_mem == NULL) {
2331		device_printf(dev, "Unable to allocate bus resource: memory\n");
2332		return (ENXIO);
2333	}
2334	adapter->osdep.mem_bus_space_tag =
2335	    rman_get_bustag(adapter->pci_mem);
2336	adapter->osdep.mem_bus_space_handle =
2337	    rman_get_bushandle(adapter->pci_mem);
2338	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2339
2340	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2341
2342	/* This will setup either MSI/X or MSI */
2343	adapter->msix = igb_setup_msix(adapter);
2344	adapter->hw.back = &adapter->osdep;
2345
2346	return (0);
2347}
2348
2349/*********************************************************************
2350 *
2351 *  Setup the Legacy or MSI Interrupt handler
2352 *
2353 **********************************************************************/
2354static int
2355igb_allocate_legacy(struct adapter *adapter)
2356{
2357	device_t		dev = adapter->dev;
2358	struct igb_queue	*que = adapter->queues;
2359#ifndef IGB_LEGACY_TX
2360	struct tx_ring		*txr = adapter->tx_rings;
2361#endif
2362	int			error, rid = 0;
2363
2364	/* Turn off all interrupts */
2365	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2366
2367	/* MSI RID is 1 */
2368	if (adapter->msix == 1)
2369		rid = 1;
2370
2371	/* We allocate a single interrupt resource */
2372	adapter->res = bus_alloc_resource_any(dev,
2373	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2374	if (adapter->res == NULL) {
2375		device_printf(dev, "Unable to allocate bus resource: "
2376		    "interrupt\n");
2377		return (ENXIO);
2378	}
2379
2380#ifndef IGB_LEGACY_TX
2381	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2382#endif
2383
2384	/*
2385	 * Try allocating a fast interrupt and the associated deferred
2386	 * processing contexts.
2387	 */
2388	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2389	/* Make tasklet for deferred link handling */
2390	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2391	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2392	    taskqueue_thread_enqueue, &que->tq);
2393	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2394	    device_get_nameunit(adapter->dev));
2395	if ((error = bus_setup_intr(dev, adapter->res,
2396	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2397	    adapter, &adapter->tag)) != 0) {
2398		device_printf(dev, "Failed to register fast interrupt "
2399			    "handler: %d\n", error);
2400		taskqueue_free(que->tq);
2401		que->tq = NULL;
2402		return (error);
2403	}
2404
2405	return (0);
2406}
2407
2408
2409/*********************************************************************
2410 *
2411 *  Setup the MSIX Queue Interrupt handlers:
2412 *
2413 **********************************************************************/
2414static int
2415igb_allocate_msix(struct adapter *adapter)
2416{
2417	device_t		dev = adapter->dev;
2418	struct igb_queue	*que = adapter->queues;
2419	int			error, rid, vector = 0;
2420
2421	/* Be sure to start with all interrupts disabled */
2422	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2423	E1000_WRITE_FLUSH(&adapter->hw);
2424
2425	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2426		rid = vector +1;
2427		que->res = bus_alloc_resource_any(dev,
2428		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2429		if (que->res == NULL) {
2430			device_printf(dev,
2431			    "Unable to allocate bus resource: "
2432			    "MSIX Queue Interrupt\n");
2433			return (ENXIO);
2434		}
2435		error = bus_setup_intr(dev, que->res,
2436	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2437		    igb_msix_que, que, &que->tag);
2438		if (error) {
2439			que->res = NULL;
2440			device_printf(dev, "Failed to register Queue handler");
2441			return (error);
2442		}
2443#if __FreeBSD_version >= 800504
2444		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2445#endif
2446		que->msix = vector;
2447		if (adapter->hw.mac.type == e1000_82575)
2448			que->eims = E1000_EICR_TX_QUEUE0 << i;
2449		else
2450			que->eims = 1 << vector;
2451		/*
2452		** Bind the msix vector, and thus the
2453		** rings to the corresponding cpu.
2454		*/
2455		if (adapter->num_queues > 1) {
2456			if (igb_last_bind_cpu < 0)
2457				igb_last_bind_cpu = CPU_FIRST();
2458			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2459			device_printf(dev,
2460				"Bound queue %d to cpu %d\n",
2461				i,igb_last_bind_cpu);
2462			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2463		}
2464#ifndef IGB_LEGACY_TX
2465		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2466		    que->txr);
2467#endif
2468		/* Make tasklet for deferred handling */
2469		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2470		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2471		    taskqueue_thread_enqueue, &que->tq);
2472		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2473		    device_get_nameunit(adapter->dev));
2474	}
2475
2476	/* And Link */
2477	rid = vector + 1;
2478	adapter->res = bus_alloc_resource_any(dev,
2479	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2480	if (adapter->res == NULL) {
2481		device_printf(dev,
2482		    "Unable to allocate bus resource: "
2483		    "MSIX Link Interrupt\n");
2484		return (ENXIO);
2485	}
2486	if ((error = bus_setup_intr(dev, adapter->res,
2487	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2488	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2489		device_printf(dev, "Failed to register Link handler");
2490		return (error);
2491	}
2492#if __FreeBSD_version >= 800504
2493	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2494#endif
2495	adapter->linkvec = vector;
2496
2497	return (0);
2498}
2499
2500
2501static void
2502igb_configure_queues(struct adapter *adapter)
2503{
2504	struct	e1000_hw	*hw = &adapter->hw;
2505	struct	igb_queue	*que;
2506	u32			tmp, ivar = 0, newitr = 0;
2507
2508	/* First turn on RSS capability */
2509	if (adapter->hw.mac.type != e1000_82575)
2510		E1000_WRITE_REG(hw, E1000_GPIE,
2511		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2512		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2513
2514	/* Turn on MSIX */
2515	switch (adapter->hw.mac.type) {
2516	case e1000_82580:
2517	case e1000_i350:
2518	case e1000_i354:
2519	case e1000_i210:
2520	case e1000_i211:
2521	case e1000_vfadapt:
2522	case e1000_vfadapt_i350:
2523		/* RX entries */
2524		for (int i = 0; i < adapter->num_queues; i++) {
2525			u32 index = i >> 1;
2526			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2527			que = &adapter->queues[i];
2528			if (i & 1) {
2529				ivar &= 0xFF00FFFF;
2530				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2531			} else {
2532				ivar &= 0xFFFFFF00;
2533				ivar |= que->msix | E1000_IVAR_VALID;
2534			}
2535			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2536		}
2537		/* TX entries */
2538		for (int i = 0; i < adapter->num_queues; i++) {
2539			u32 index = i >> 1;
2540			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2541			que = &adapter->queues[i];
2542			if (i & 1) {
2543				ivar &= 0x00FFFFFF;
2544				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2545			} else {
2546				ivar &= 0xFFFF00FF;
2547				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2548			}
2549			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2550			adapter->que_mask |= que->eims;
2551		}
2552
2553		/* And for the link interrupt */
2554		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2555		adapter->link_mask = 1 << adapter->linkvec;
2556		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2557		break;
2558	case e1000_82576:
2559		/* RX entries */
2560		for (int i = 0; i < adapter->num_queues; i++) {
2561			u32 index = i & 0x7; /* Each IVAR has two entries */
2562			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2563			que = &adapter->queues[i];
2564			if (i < 8) {
2565				ivar &= 0xFFFFFF00;
2566				ivar |= que->msix | E1000_IVAR_VALID;
2567			} else {
2568				ivar &= 0xFF00FFFF;
2569				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2570			}
2571			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2572			adapter->que_mask |= que->eims;
2573		}
2574		/* TX entries */
2575		for (int i = 0; i < adapter->num_queues; i++) {
2576			u32 index = i & 0x7; /* Each IVAR has two entries */
2577			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2578			que = &adapter->queues[i];
2579			if (i < 8) {
2580				ivar &= 0xFFFF00FF;
2581				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2582			} else {
2583				ivar &= 0x00FFFFFF;
2584				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2585			}
2586			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2587			adapter->que_mask |= que->eims;
2588		}
2589
2590		/* And for the link interrupt */
2591		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2592		adapter->link_mask = 1 << adapter->linkvec;
2593		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2594		break;
2595
2596	case e1000_82575:
2597                /* enable MSI-X support*/
2598		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2599                tmp |= E1000_CTRL_EXT_PBA_CLR;
2600                /* Auto-Mask interrupts upon ICR read. */
2601                tmp |= E1000_CTRL_EXT_EIAME;
2602                tmp |= E1000_CTRL_EXT_IRCA;
2603                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2604
2605		/* Queues */
2606		for (int i = 0; i < adapter->num_queues; i++) {
2607			que = &adapter->queues[i];
2608			tmp = E1000_EICR_RX_QUEUE0 << i;
2609			tmp |= E1000_EICR_TX_QUEUE0 << i;
2610			que->eims = tmp;
2611			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2612			    i, que->eims);
2613			adapter->que_mask |= que->eims;
2614		}
2615
2616		/* Link */
2617		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2618		    E1000_EIMS_OTHER);
2619		adapter->link_mask |= E1000_EIMS_OTHER;
2620	default:
2621		break;
2622	}
2623
2624	/* Set the starting interrupt rate */
2625	if (igb_max_interrupt_rate > 0)
2626		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2627
2628        if (hw->mac.type == e1000_82575)
2629                newitr |= newitr << 16;
2630        else
2631                newitr |= E1000_EITR_CNT_IGNR;
2632
2633	for (int i = 0; i < adapter->num_queues; i++) {
2634		que = &adapter->queues[i];
2635		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2636	}
2637
2638	return;
2639}
2640
2641
2642static void
2643igb_free_pci_resources(struct adapter *adapter)
2644{
2645	struct		igb_queue *que = adapter->queues;
2646	device_t	dev = adapter->dev;
2647	int		rid;
2648
2649	/*
2650	** There is a slight possibility of a failure mode
2651	** in attach that will result in entering this function
2652	** before interrupt resources have been initialized, and
2653	** in that case we do not want to execute the loops below
2654	** We can detect this reliably by the state of the adapter
2655	** res pointer.
2656	*/
2657	if (adapter->res == NULL)
2658		goto mem;
2659
2660	/*
2661	 * First release all the interrupt resources:
2662	 */
2663	for (int i = 0; i < adapter->num_queues; i++, que++) {
2664		rid = que->msix + 1;
2665		if (que->tag != NULL) {
2666			bus_teardown_intr(dev, que->res, que->tag);
2667			que->tag = NULL;
2668		}
2669		if (que->res != NULL)
2670			bus_release_resource(dev,
2671			    SYS_RES_IRQ, rid, que->res);
2672	}
2673
2674	/* Clean the Legacy or Link interrupt last */
2675	if (adapter->linkvec) /* we are doing MSIX */
2676		rid = adapter->linkvec + 1;
2677	else
2678		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2679
2680	que = adapter->queues;
2681	if (adapter->tag != NULL) {
2682		taskqueue_drain(que->tq, &adapter->link_task);
2683		bus_teardown_intr(dev, adapter->res, adapter->tag);
2684		adapter->tag = NULL;
2685	}
2686	if (adapter->res != NULL)
2687		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2688
2689	for (int i = 0; i < adapter->num_queues; i++, que++) {
2690		if (que->tq != NULL) {
2691#ifndef IGB_LEGACY_TX
2692			taskqueue_drain(que->tq, &que->txr->txq_task);
2693#endif
2694			taskqueue_drain(que->tq, &que->que_task);
2695			taskqueue_free(que->tq);
2696		}
2697	}
2698mem:
2699	if (adapter->msix)
2700		pci_release_msi(dev);
2701
2702	if (adapter->msix_mem != NULL)
2703		bus_release_resource(dev, SYS_RES_MEMORY,
2704		    adapter->memrid, adapter->msix_mem);
2705
2706	if (adapter->pci_mem != NULL)
2707		bus_release_resource(dev, SYS_RES_MEMORY,
2708		    PCIR_BAR(0), adapter->pci_mem);
2709
2710}
2711
2712/*
2713 * Setup Either MSI/X or MSI
2714 */
2715static int
2716igb_setup_msix(struct adapter *adapter)
2717{
2718	device_t	dev = adapter->dev;
2719	int		bar, want, queues, msgs, maxqueues;
2720
2721	/* tuneable override */
2722	if (igb_enable_msix == 0)
2723		goto msi;
2724
2725	/* First try MSI/X */
2726	msgs = pci_msix_count(dev);
2727	if (msgs == 0)
2728		goto msi;
2729	/*
2730	** Some new devices, as with ixgbe, now may
2731	** use a different BAR, so we need to keep
2732	** track of which is used.
2733	*/
2734	adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2735	bar = pci_read_config(dev, adapter->memrid, 4);
2736	if (bar == 0) /* use next bar */
2737		adapter->memrid += 4;
2738	adapter->msix_mem = bus_alloc_resource_any(dev,
2739	    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2740       	if (adapter->msix_mem == NULL) {
2741		/* May not be enabled */
2742		device_printf(adapter->dev,
2743		    "Unable to map MSIX table \n");
2744		goto msi;
2745	}
2746
2747	/* Figure out a reasonable auto config value */
2748	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2749
2750	/* Manual override */
2751	if (igb_num_queues != 0)
2752		queues = igb_num_queues;
2753
2754	/* Sanity check based on HW */
2755	switch (adapter->hw.mac.type) {
2756		case e1000_82575:
2757			maxqueues = 4;
2758			break;
2759		case e1000_82576:
2760		case e1000_82580:
2761		case e1000_i350:
2762		case e1000_i354:
2763			maxqueues = 8;
2764			break;
2765		case e1000_i210:
2766			maxqueues = 4;
2767			break;
2768		case e1000_i211:
2769			maxqueues = 2;
2770			break;
2771		default:  /* VF interfaces */
2772			maxqueues = 1;
2773			break;
2774	}
2775	if (queues > maxqueues)
2776		queues = maxqueues;
2777
2778	/* Manual override */
2779	if (igb_num_queues != 0)
2780		queues = igb_num_queues;
2781
2782	/*
2783	** One vector (RX/TX pair) per queue
2784	** plus an additional for Link interrupt
2785	*/
2786	want = queues + 1;
2787	if (msgs >= want)
2788		msgs = want;
2789	else {
2790               	device_printf(adapter->dev,
2791		    "MSIX Configuration Problem, "
2792		    "%d vectors configured, but %d queues wanted!\n",
2793		    msgs, want);
2794		goto msi;
2795	}
2796	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2797               	device_printf(adapter->dev,
2798		    "Using MSIX interrupts with %d vectors\n", msgs);
2799		adapter->num_queues = queues;
2800		return (msgs);
2801	}
2802	/*
2803	** If MSIX alloc failed or provided us with
2804	** less than needed, free and fall through to MSI
2805	*/
2806	pci_release_msi(dev);
2807
2808msi:
2809       	if (adapter->msix_mem != NULL) {
2810		bus_release_resource(dev, SYS_RES_MEMORY,
2811		    adapter->memrid, adapter->msix_mem);
2812		adapter->msix_mem = NULL;
2813	}
2814       	msgs = 1;
2815	if (pci_alloc_msi(dev, &msgs) == 0) {
2816		device_printf(adapter->dev," Using an MSI interrupt\n");
2817		return (msgs);
2818	}
2819	device_printf(adapter->dev," Using a Legacy interrupt\n");
2820	return (0);
2821}
2822
2823/*********************************************************************
2824 *
2825 *  Initialize the DMA Coalescing feature
2826 *
2827 **********************************************************************/
2828static void
2829igb_init_dmac(struct adapter *adapter, u32 pba)
2830{
2831	device_t	dev = adapter->dev;
2832	struct e1000_hw *hw = &adapter->hw;
2833	u32 		dmac, reg = ~E1000_DMACR_DMAC_EN;
2834	u16		hwm;
2835
2836	if (hw->mac.type == e1000_i211)
2837		return;
2838
2839	if (hw->mac.type > e1000_82580) {
2840
2841		if (adapter->dmac == 0) { /* Disabling it */
2842			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2843			return;
2844		} else
2845			device_printf(dev, "DMA Coalescing enabled\n");
2846
2847		/* Set starting threshold */
2848		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2849
2850		hwm = 64 * pba - adapter->max_frame_size / 16;
2851		if (hwm < 64 * (pba - 6))
2852			hwm = 64 * (pba - 6);
2853		reg = E1000_READ_REG(hw, E1000_FCRTC);
2854		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2855		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2856		    & E1000_FCRTC_RTH_COAL_MASK);
2857		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2858
2859
2860		dmac = pba - adapter->max_frame_size / 512;
2861		if (dmac < pba - 10)
2862			dmac = pba - 10;
2863		reg = E1000_READ_REG(hw, E1000_DMACR);
2864		reg &= ~E1000_DMACR_DMACTHR_MASK;
2865		reg |= ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2866		    & E1000_DMACR_DMACTHR_MASK);
2867
2868		/* transition to L0x or L1 if available..*/
2869		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2870
2871		/* Check if status is 2.5Gb backplane connection
2872		* before configuration of watchdog timer, which is
2873		* in msec values in 12.8usec intervals
2874		* watchdog timer= msec values in 32usec intervals
2875		* for non 2.5Gb connection
2876		*/
2877		if (hw->mac.type == e1000_i354) {
2878			int status = E1000_READ_REG(hw, E1000_STATUS);
2879			if ((status & E1000_STATUS_2P5_SKU) &&
2880			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2881				reg |= ((adapter->dmac * 5) >> 6);
2882			else
2883				reg |= (adapter->dmac >> 5);
2884		} else {
2885			reg |= (adapter->dmac >> 5);
2886		}
2887
2888		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2889
2890		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2891
2892		/* Set the interval before transition */
2893		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2894		if (hw->mac.type == e1000_i350)
2895			reg |= IGB_DMCTLX_DCFLUSH_DIS;
2896		/*
2897		** in 2.5Gb connection, TTLX unit is 0.4 usec
2898		** which is 0x4*2 = 0xA. But delay is still 4 usec
2899		*/
2900		if (hw->mac.type == e1000_i354) {
2901			int status = E1000_READ_REG(hw, E1000_STATUS);
2902			if ((status & E1000_STATUS_2P5_SKU) &&
2903			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2904				reg |= 0xA;
2905			else
2906				reg |= 0x4;
2907		} else {
2908			reg |= 0x4;
2909		}
2910
2911		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2912
2913		/* free space in tx packet buffer to wake from DMA coal */
2914		E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2915		    (2 * adapter->max_frame_size)) >> 6);
2916
2917		/* make low power state decision controlled by DMA coal */
2918		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2919		reg &= ~E1000_PCIEMISC_LX_DECISION;
2920		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2921
2922	} else if (hw->mac.type == e1000_82580) {
2923		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2924		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2925		    reg & ~E1000_PCIEMISC_LX_DECISION);
2926		E1000_WRITE_REG(hw, E1000_DMACR, 0);
2927	}
2928}
2929
2930
2931/*********************************************************************
2932 *
2933 *  Set up an fresh starting state
2934 *
2935 **********************************************************************/
2936static void
2937igb_reset(struct adapter *adapter)
2938{
2939	device_t	dev = adapter->dev;
2940	struct e1000_hw *hw = &adapter->hw;
2941	struct e1000_fc_info *fc = &hw->fc;
2942	struct ifnet	*ifp = adapter->ifp;
2943	u32		pba = 0;
2944	u16		hwm;
2945
2946	INIT_DEBUGOUT("igb_reset: begin");
2947
2948	/* Let the firmware know the OS is in control */
2949	igb_get_hw_control(adapter);
2950
2951	/*
2952	 * Packet Buffer Allocation (PBA)
2953	 * Writing PBA sets the receive portion of the buffer
2954	 * the remainder is used for the transmit buffer.
2955	 */
2956	switch (hw->mac.type) {
2957	case e1000_82575:
2958		pba = E1000_PBA_32K;
2959		break;
2960	case e1000_82576:
2961	case e1000_vfadapt:
2962		pba = E1000_READ_REG(hw, E1000_RXPBS);
2963		pba &= E1000_RXPBS_SIZE_MASK_82576;
2964		break;
2965	case e1000_82580:
2966	case e1000_i350:
2967	case e1000_i354:
2968	case e1000_vfadapt_i350:
2969		pba = E1000_READ_REG(hw, E1000_RXPBS);
2970		pba = e1000_rxpbs_adjust_82580(pba);
2971		break;
2972	case e1000_i210:
2973	case e1000_i211:
2974		pba = E1000_PBA_34K;
2975	default:
2976		break;
2977	}
2978
2979	/* Special needs in case of Jumbo frames */
2980	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2981		u32 tx_space, min_tx, min_rx;
2982		pba = E1000_READ_REG(hw, E1000_PBA);
2983		tx_space = pba >> 16;
2984		pba &= 0xffff;
2985		min_tx = (adapter->max_frame_size +
2986		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2987		min_tx = roundup2(min_tx, 1024);
2988		min_tx >>= 10;
2989                min_rx = adapter->max_frame_size;
2990                min_rx = roundup2(min_rx, 1024);
2991                min_rx >>= 10;
2992		if (tx_space < min_tx &&
2993		    ((min_tx - tx_space) < pba)) {
2994			pba = pba - (min_tx - tx_space);
2995			/*
2996                         * if short on rx space, rx wins
2997                         * and must trump tx adjustment
2998			 */
2999                        if (pba < min_rx)
3000                                pba = min_rx;
3001		}
3002		E1000_WRITE_REG(hw, E1000_PBA, pba);
3003	}
3004
3005	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3006
3007	/*
3008	 * These parameters control the automatic generation (Tx) and
3009	 * response (Rx) to Ethernet PAUSE frames.
3010	 * - High water mark should allow for at least two frames to be
3011	 *   received after sending an XOFF.
3012	 * - Low water mark works best when it is very near the high water mark.
3013	 *   This allows the receiver to restart by sending XON when it has
3014	 *   drained a bit.
3015	 */
3016	hwm = min(((pba << 10) * 9 / 10),
3017	    ((pba << 10) - 2 * adapter->max_frame_size));
3018
3019	if (hw->mac.type < e1000_82576) {
3020		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3021		fc->low_water = fc->high_water - 8;
3022	} else {
3023		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3024		fc->low_water = fc->high_water - 16;
3025	}
3026
3027	fc->pause_time = IGB_FC_PAUSE_TIME;
3028	fc->send_xon = TRUE;
3029	if (adapter->fc)
3030		fc->requested_mode = adapter->fc;
3031	else
3032		fc->requested_mode = e1000_fc_default;
3033
3034	/* Issue a global reset */
3035	e1000_reset_hw(hw);
3036	E1000_WRITE_REG(hw, E1000_WUFC, 0);
3037
3038	/* Reset for AutoMediaDetect */
3039	if (adapter->flags & IGB_MEDIA_RESET) {
3040		e1000_setup_init_funcs(hw, TRUE);
3041		e1000_get_bus_info(hw);
3042		adapter->flags &= ~IGB_MEDIA_RESET;
3043	}
3044
3045	if (e1000_init_hw(hw) < 0)
3046		device_printf(dev, "Hardware Initialization Failed\n");
3047
3048	/* Setup DMA Coalescing */
3049	igb_init_dmac(adapter, pba);
3050
3051	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3052	e1000_get_phy_info(hw);
3053	e1000_check_for_link(hw);
3054	return;
3055}
3056
3057/*********************************************************************
3058 *
3059 *  Setup networking device structure and register an interface.
3060 *
3061 **********************************************************************/
3062static int
3063igb_setup_interface(device_t dev, struct adapter *adapter)
3064{
3065	struct ifnet   *ifp;
3066
3067	INIT_DEBUGOUT("igb_setup_interface: begin");
3068
3069	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3070	if (ifp == NULL) {
3071		device_printf(dev, "can not allocate ifnet structure\n");
3072		return (-1);
3073	}
3074	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3075	ifp->if_init =  igb_init;
3076	ifp->if_softc = adapter;
3077	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3078	ifp->if_ioctl = igb_ioctl;
3079
3080	/* TSO parameters */
3081	ifp->if_hw_tsomax = IP_MAXPACKET;
3082	ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3083	ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3084
3085#ifndef IGB_LEGACY_TX
3086	ifp->if_transmit = igb_mq_start;
3087	ifp->if_qflush = igb_qflush;
3088#else
3089	ifp->if_start = igb_start;
3090	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3091	ifp->if_snd.ifq_drv_maxlen = 0;
3092	IFQ_SET_READY(&ifp->if_snd);
3093#endif
3094
3095	ether_ifattach(ifp, adapter->hw.mac.addr);
3096
3097	ifp->if_capabilities = ifp->if_capenable = 0;
3098
3099	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3100#if __FreeBSD_version >= 1000000
3101	ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
3102#endif
3103	ifp->if_capabilities |= IFCAP_TSO;
3104	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3105	ifp->if_capenable = ifp->if_capabilities;
3106
3107	/* Don't enable LRO by default */
3108	ifp->if_capabilities |= IFCAP_LRO;
3109
3110#ifdef DEVICE_POLLING
3111	ifp->if_capabilities |= IFCAP_POLLING;
3112#endif
3113
3114	/*
3115	 * Tell the upper layer(s) we
3116	 * support full VLAN capability.
3117	 */
3118	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3119	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3120			     |  IFCAP_VLAN_HWTSO
3121			     |  IFCAP_VLAN_MTU;
3122	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3123			  |  IFCAP_VLAN_HWTSO
3124			  |  IFCAP_VLAN_MTU;
3125
3126	/*
3127	 * Enable only WOL MAGIC by default if WOL is enabled in EEPROM.
3128	 */
3129	ifp->if_capabilities |= IFCAP_WOL;
3130	if (adapter->wol)
3131		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3132
3133	/*
3134	** Don't turn this on by default, if vlans are
3135	** created on another pseudo device (eg. lagg)
3136	** then vlan events are not passed thru, breaking
3137	** operation, but with HW FILTER off it works. If
3138	** using vlans directly on the igb driver you can
3139	** enable this and get full hardware tag filtering.
3140	*/
3141	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3142
3143	/*
3144	 * Specify the media types supported by this adapter and register
3145	 * callbacks to update media and link information
3146	 */
3147	ifmedia_init(&adapter->media, IFM_IMASK,
3148	    igb_media_change, igb_media_status);
3149	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3150	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3151		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3152			    0, NULL);
3153		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3154	} else {
3155		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3156		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3157			    0, NULL);
3158		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3159			    0, NULL);
3160		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3161			    0, NULL);
3162		if (adapter->hw.phy.type != e1000_phy_ife) {
3163			ifmedia_add(&adapter->media,
3164				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3165			ifmedia_add(&adapter->media,
3166				IFM_ETHER | IFM_1000_T, 0, NULL);
3167		}
3168	}
3169	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3170	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3171	return (0);
3172}
3173
3174
3175/*
3176 * Manage DMA'able memory.
3177 */
3178static void
3179igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3180{
3181	if (error)
3182		return;
3183	*(bus_addr_t *) arg = segs[0].ds_addr;
3184}
3185
3186static int
3187igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3188        struct igb_dma_alloc *dma, int mapflags)
3189{
3190	int error;
3191
3192	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3193				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3194				BUS_SPACE_MAXADDR,	/* lowaddr */
3195				BUS_SPACE_MAXADDR,	/* highaddr */
3196				NULL, NULL,		/* filter, filterarg */
3197				size,			/* maxsize */
3198				1,			/* nsegments */
3199				size,			/* maxsegsize */
3200				0,			/* flags */
3201				NULL,			/* lockfunc */
3202				NULL,			/* lockarg */
3203				&dma->dma_tag);
3204	if (error) {
3205		device_printf(adapter->dev,
3206		    "%s: bus_dma_tag_create failed: %d\n",
3207		    __func__, error);
3208		goto fail_0;
3209	}
3210
3211	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3212	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3213	if (error) {
3214		device_printf(adapter->dev,
3215		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3216		    __func__, (uintmax_t)size, error);
3217		goto fail_2;
3218	}
3219
3220	dma->dma_paddr = 0;
3221	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3222	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3223	if (error || dma->dma_paddr == 0) {
3224		device_printf(adapter->dev,
3225		    "%s: bus_dmamap_load failed: %d\n",
3226		    __func__, error);
3227		goto fail_3;
3228	}
3229
3230	return (0);
3231
3232fail_3:
3233	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3234fail_2:
3235	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3236	bus_dma_tag_destroy(dma->dma_tag);
3237fail_0:
3238	dma->dma_tag = NULL;
3239
3240	return (error);
3241}
3242
3243static void
3244igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3245{
3246	if (dma->dma_tag == NULL)
3247		return;
3248	if (dma->dma_paddr != 0) {
3249		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3250		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3251		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3252		dma->dma_paddr = 0;
3253	}
3254	if (dma->dma_vaddr != NULL) {
3255		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3256		dma->dma_vaddr = NULL;
3257	}
3258	bus_dma_tag_destroy(dma->dma_tag);
3259	dma->dma_tag = NULL;
3260}
3261
3262
3263/*********************************************************************
3264 *
3265 *  Allocate memory for the transmit and receive rings, and then
3266 *  the descriptors associated with each, called only once at attach.
3267 *
3268 **********************************************************************/
3269static int
3270igb_allocate_queues(struct adapter *adapter)
3271{
3272	device_t dev = adapter->dev;
3273	struct igb_queue	*que = NULL;
3274	struct tx_ring		*txr = NULL;
3275	struct rx_ring		*rxr = NULL;
3276	int rsize, tsize, error = E1000_SUCCESS;
3277	int txconf = 0, rxconf = 0;
3278
3279	/* First allocate the top level queue structs */
3280	if (!(adapter->queues =
3281	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3282	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3283		device_printf(dev, "Unable to allocate queue memory\n");
3284		error = ENOMEM;
3285		goto fail;
3286	}
3287
3288	/* Next allocate the TX ring struct memory */
3289	if (!(adapter->tx_rings =
3290	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3291	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3292		device_printf(dev, "Unable to allocate TX ring memory\n");
3293		error = ENOMEM;
3294		goto tx_fail;
3295	}
3296
3297	/* Now allocate the RX */
3298	if (!(adapter->rx_rings =
3299	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3300	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3301		device_printf(dev, "Unable to allocate RX ring memory\n");
3302		error = ENOMEM;
3303		goto rx_fail;
3304	}
3305
3306	tsize = roundup2(adapter->num_tx_desc *
3307	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3308	/*
3309	 * Now set up the TX queues, txconf is needed to handle the
3310	 * possibility that things fail midcourse and we need to
3311	 * undo memory gracefully
3312	 */
3313	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3314		/* Set up some basics */
3315		txr = &adapter->tx_rings[i];
3316		txr->adapter = adapter;
3317		txr->me = i;
3318		txr->num_desc = adapter->num_tx_desc;
3319
3320		/* Initialize the TX lock */
3321		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3322		    device_get_nameunit(dev), txr->me);
3323		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3324
3325		if (igb_dma_malloc(adapter, tsize,
3326			&txr->txdma, BUS_DMA_NOWAIT)) {
3327			device_printf(dev,
3328			    "Unable to allocate TX Descriptor memory\n");
3329			error = ENOMEM;
3330			goto err_tx_desc;
3331		}
3332		txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3333		bzero((void *)txr->tx_base, tsize);
3334
3335        	/* Now allocate transmit buffers for the ring */
3336        	if (igb_allocate_transmit_buffers(txr)) {
3337			device_printf(dev,
3338			    "Critical Failure setting up transmit buffers\n");
3339			error = ENOMEM;
3340			goto err_tx_desc;
3341        	}
3342#ifndef IGB_LEGACY_TX
3343		/* Allocate a buf ring */
3344		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3345		    M_WAITOK, &txr->tx_mtx);
3346#endif
3347	}
3348
3349	/*
3350	 * Next the RX queues...
3351	 */
3352	rsize = roundup2(adapter->num_rx_desc *
3353	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3354	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3355		rxr = &adapter->rx_rings[i];
3356		rxr->adapter = adapter;
3357		rxr->me = i;
3358
3359		/* Initialize the RX lock */
3360		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3361		    device_get_nameunit(dev), txr->me);
3362		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3363
3364		if (igb_dma_malloc(adapter, rsize,
3365			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3366			device_printf(dev,
3367			    "Unable to allocate RxDescriptor memory\n");
3368			error = ENOMEM;
3369			goto err_rx_desc;
3370		}
3371		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3372		bzero((void *)rxr->rx_base, rsize);
3373
3374        	/* Allocate receive buffers for the ring*/
3375		if (igb_allocate_receive_buffers(rxr)) {
3376			device_printf(dev,
3377			    "Critical Failure setting up receive buffers\n");
3378			error = ENOMEM;
3379			goto err_rx_desc;
3380		}
3381	}
3382
3383	/*
3384	** Finally set up the queue holding structs
3385	*/
3386	for (int i = 0; i < adapter->num_queues; i++) {
3387		que = &adapter->queues[i];
3388		que->adapter = adapter;
3389		que->txr = &adapter->tx_rings[i];
3390		que->rxr = &adapter->rx_rings[i];
3391	}
3392
3393	return (0);
3394
3395err_rx_desc:
3396	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3397		igb_dma_free(adapter, &rxr->rxdma);
3398err_tx_desc:
3399	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3400		igb_dma_free(adapter, &txr->txdma);
3401	free(adapter->rx_rings, M_DEVBUF);
3402rx_fail:
3403#ifndef IGB_LEGACY_TX
3404	buf_ring_free(txr->br, M_DEVBUF);
3405#endif
3406	free(adapter->tx_rings, M_DEVBUF);
3407tx_fail:
3408	free(adapter->queues, M_DEVBUF);
3409fail:
3410	return (error);
3411}
3412
3413/*********************************************************************
3414 *
3415 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3416 *  the information needed to transmit a packet on the wire. This is
3417 *  called only once at attach, setup is done every reset.
3418 *
3419 **********************************************************************/
3420static int
3421igb_allocate_transmit_buffers(struct tx_ring *txr)
3422{
3423	struct adapter *adapter = txr->adapter;
3424	device_t dev = adapter->dev;
3425	struct igb_tx_buf *txbuf;
3426	int error, i;
3427
3428	/*
3429	 * Setup DMA descriptor areas.
3430	 */
3431	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3432			       1, 0,			/* alignment, bounds */
3433			       BUS_SPACE_MAXADDR,	/* lowaddr */
3434			       BUS_SPACE_MAXADDR,	/* highaddr */
3435			       NULL, NULL,		/* filter, filterarg */
3436			       IGB_TSO_SIZE,		/* maxsize */
3437			       IGB_MAX_SCATTER,		/* nsegments */
3438			       PAGE_SIZE,		/* maxsegsize */
3439			       0,			/* flags */
3440			       NULL,			/* lockfunc */
3441			       NULL,			/* lockfuncarg */
3442			       &txr->txtag))) {
3443		device_printf(dev,"Unable to allocate TX DMA tag\n");
3444		goto fail;
3445	}
3446
3447	if (!(txr->tx_buffers =
3448	    (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3449	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3450		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3451		error = ENOMEM;
3452		goto fail;
3453	}
3454
3455        /* Create the descriptor buffer dma maps */
3456	txbuf = txr->tx_buffers;
3457	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3458		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3459		if (error != 0) {
3460			device_printf(dev, "Unable to create TX DMA map\n");
3461			goto fail;
3462		}
3463	}
3464
3465	return 0;
3466fail:
3467	/* We free all, it handles case where we are in the middle */
3468	igb_free_transmit_structures(adapter);
3469	return (error);
3470}
3471
3472/*********************************************************************
3473 *
3474 *  Initialize a transmit ring.
3475 *
3476 **********************************************************************/
3477static void
3478igb_setup_transmit_ring(struct tx_ring *txr)
3479{
3480	struct adapter *adapter = txr->adapter;
3481	struct igb_tx_buf *txbuf;
3482	int i;
3483#ifdef DEV_NETMAP
3484	struct netmap_adapter *na = NA(adapter->ifp);
3485	struct netmap_slot *slot;
3486#endif /* DEV_NETMAP */
3487
3488	/* Clear the old descriptor contents */
3489	IGB_TX_LOCK(txr);
3490#ifdef DEV_NETMAP
3491	slot = netmap_reset(na, NR_TX, txr->me, 0);
3492#endif /* DEV_NETMAP */
3493	bzero((void *)txr->tx_base,
3494	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3495	/* Reset indices */
3496	txr->next_avail_desc = 0;
3497	txr->next_to_clean = 0;
3498
3499	/* Free any existing tx buffers. */
3500        txbuf = txr->tx_buffers;
3501	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3502		if (txbuf->m_head != NULL) {
3503			bus_dmamap_sync(txr->txtag, txbuf->map,
3504			    BUS_DMASYNC_POSTWRITE);
3505			bus_dmamap_unload(txr->txtag, txbuf->map);
3506			m_freem(txbuf->m_head);
3507			txbuf->m_head = NULL;
3508		}
3509#ifdef DEV_NETMAP
3510		if (slot) {
3511			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3512			/* no need to set the address */
3513			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3514		}
3515#endif /* DEV_NETMAP */
3516		/* clear the watch index */
3517		txbuf->eop = NULL;
3518        }
3519
3520	/* Set number of descriptors available */
3521	txr->tx_avail = adapter->num_tx_desc;
3522
3523	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3524	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3525	IGB_TX_UNLOCK(txr);
3526}
3527
3528/*********************************************************************
3529 *
3530 *  Initialize all transmit rings.
3531 *
3532 **********************************************************************/
3533static void
3534igb_setup_transmit_structures(struct adapter *adapter)
3535{
3536	struct tx_ring *txr = adapter->tx_rings;
3537
3538	for (int i = 0; i < adapter->num_queues; i++, txr++)
3539		igb_setup_transmit_ring(txr);
3540
3541	return;
3542}
3543
3544/*********************************************************************
3545 *
3546 *  Enable transmit unit.
3547 *
3548 **********************************************************************/
3549static void
3550igb_initialize_transmit_units(struct adapter *adapter)
3551{
3552	struct tx_ring	*txr = adapter->tx_rings;
3553	struct e1000_hw *hw = &adapter->hw;
3554	u32		tctl, txdctl;
3555
3556	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3557	tctl = txdctl = 0;
3558
3559	/* Setup the Tx Descriptor Rings */
3560	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3561		u64 bus_addr = txr->txdma.dma_paddr;
3562
3563		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3564		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3565		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3566		    (uint32_t)(bus_addr >> 32));
3567		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3568		    (uint32_t)bus_addr);
3569
3570		/* Setup the HW Tx Head and Tail descriptor pointers */
3571		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3572		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3573
3574		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3575		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3576		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3577
3578		txr->queue_status = IGB_QUEUE_IDLE;
3579
3580		txdctl |= IGB_TX_PTHRESH;
3581		txdctl |= IGB_TX_HTHRESH << 8;
3582		txdctl |= IGB_TX_WTHRESH << 16;
3583		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3584		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3585	}
3586
3587	if (adapter->vf_ifp)
3588		return;
3589
3590	e1000_config_collision_dist(hw);
3591
3592	/* Program the Transmit Control Register */
3593	tctl = E1000_READ_REG(hw, E1000_TCTL);
3594	tctl &= ~E1000_TCTL_CT;
3595	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3596		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3597
3598	/* This write will effectively turn on the transmit unit. */
3599	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3600}
3601
3602/*********************************************************************
3603 *
3604 *  Free all transmit rings.
3605 *
3606 **********************************************************************/
3607static void
3608igb_free_transmit_structures(struct adapter *adapter)
3609{
3610	struct tx_ring *txr = adapter->tx_rings;
3611
3612	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3613		IGB_TX_LOCK(txr);
3614		igb_free_transmit_buffers(txr);
3615		igb_dma_free(adapter, &txr->txdma);
3616		IGB_TX_UNLOCK(txr);
3617		IGB_TX_LOCK_DESTROY(txr);
3618	}
3619	free(adapter->tx_rings, M_DEVBUF);
3620}
3621
3622/*********************************************************************
3623 *
3624 *  Free transmit ring related data structures.
3625 *
3626 **********************************************************************/
3627static void
3628igb_free_transmit_buffers(struct tx_ring *txr)
3629{
3630	struct adapter *adapter = txr->adapter;
3631	struct igb_tx_buf *tx_buffer;
3632	int             i;
3633
3634	INIT_DEBUGOUT("free_transmit_ring: begin");
3635
3636	if (txr->tx_buffers == NULL)
3637		return;
3638
3639	tx_buffer = txr->tx_buffers;
3640	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3641		if (tx_buffer->m_head != NULL) {
3642			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3643			    BUS_DMASYNC_POSTWRITE);
3644			bus_dmamap_unload(txr->txtag,
3645			    tx_buffer->map);
3646			m_freem(tx_buffer->m_head);
3647			tx_buffer->m_head = NULL;
3648			if (tx_buffer->map != NULL) {
3649				bus_dmamap_destroy(txr->txtag,
3650				    tx_buffer->map);
3651				tx_buffer->map = NULL;
3652			}
3653		} else if (tx_buffer->map != NULL) {
3654			bus_dmamap_unload(txr->txtag,
3655			    tx_buffer->map);
3656			bus_dmamap_destroy(txr->txtag,
3657			    tx_buffer->map);
3658			tx_buffer->map = NULL;
3659		}
3660	}
3661#ifndef IGB_LEGACY_TX
3662	if (txr->br != NULL)
3663		buf_ring_free(txr->br, M_DEVBUF);
3664#endif
3665	if (txr->tx_buffers != NULL) {
3666		free(txr->tx_buffers, M_DEVBUF);
3667		txr->tx_buffers = NULL;
3668	}
3669	if (txr->txtag != NULL) {
3670		bus_dma_tag_destroy(txr->txtag);
3671		txr->txtag = NULL;
3672	}
3673	return;
3674}
3675
3676/**********************************************************************
3677 *
3678 *  Setup work for hardware segmentation offload (TSO) on
3679 *  adapters using advanced tx descriptors
3680 *
3681 **********************************************************************/
3682static int
3683igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3684    u32 *cmd_type_len, u32 *olinfo_status)
3685{
3686	struct adapter *adapter = txr->adapter;
3687	struct e1000_adv_tx_context_desc *TXD;
3688	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3689	u32 mss_l4len_idx = 0, paylen;
3690	u16 vtag = 0, eh_type;
3691	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3692	struct ether_vlan_header *eh;
3693#ifdef INET6
3694	struct ip6_hdr *ip6;
3695#endif
3696#ifdef INET
3697	struct ip *ip;
3698#endif
3699	struct tcphdr *th;
3700
3701
3702	/*
3703	 * Determine where frame payload starts.
3704	 * Jump over vlan headers if already present
3705	 */
3706	eh = mtod(mp, struct ether_vlan_header *);
3707	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3708		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3709		eh_type = eh->evl_proto;
3710	} else {
3711		ehdrlen = ETHER_HDR_LEN;
3712		eh_type = eh->evl_encap_proto;
3713	}
3714
3715	switch (ntohs(eh_type)) {
3716#ifdef INET6
3717	case ETHERTYPE_IPV6:
3718		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3719		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
3720		if (ip6->ip6_nxt != IPPROTO_TCP)
3721			return (ENXIO);
3722		ip_hlen = sizeof(struct ip6_hdr);
3723		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3724		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3725		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3726		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3727		break;
3728#endif
3729#ifdef INET
3730	case ETHERTYPE_IP:
3731		ip = (struct ip *)(mp->m_data + ehdrlen);
3732		if (ip->ip_p != IPPROTO_TCP)
3733			return (ENXIO);
3734		ip->ip_sum = 0;
3735		ip_hlen = ip->ip_hl << 2;
3736		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3737		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3738		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3739		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3740		/* Tell transmit desc to also do IPv4 checksum. */
3741		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3742		break;
3743#endif
3744	default:
3745		device_printf(adapter->dev,
3746		    "CSUM_TSO but no supported IP version (0x%04x)",
3747		    ntohs(eh_type));
3748		return (ENXIO);
3749	}
3750
3751	ctxd = txr->next_avail_desc;
3752	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3753
3754	tcp_hlen = th->th_off << 2;
3755
3756	/* This is used in the transmit desc in encap */
3757	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3758
3759	/* VLAN MACLEN IPLEN */
3760	if (mp->m_flags & M_VLANTAG) {
3761		vtag = htole16(mp->m_pkthdr.ether_vtag);
3762                vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3763	}
3764
3765	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3766	vlan_macip_lens |= ip_hlen;
3767	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3768
3769	/* ADV DTYPE TUCMD */
3770	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3771	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3772	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3773
3774	/* MSS L4LEN IDX */
3775	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3776	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3777	/* 82575 needs the queue index added */
3778	if (adapter->hw.mac.type == e1000_82575)
3779		mss_l4len_idx |= txr->me << 4;
3780	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3781
3782	TXD->seqnum_seed = htole32(0);
3783
3784	if (++ctxd == txr->num_desc)
3785		ctxd = 0;
3786
3787	txr->tx_avail--;
3788	txr->next_avail_desc = ctxd;
3789	*cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3790	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3791	*olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3792	++txr->tso_tx;
3793	return (0);
3794}
3795
3796/*********************************************************************
3797 *
3798 *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3799 *
3800 **********************************************************************/
3801
3802static int
3803igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3804    u32 *cmd_type_len, u32 *olinfo_status)
3805{
3806	struct e1000_adv_tx_context_desc *TXD;
3807	struct adapter *adapter = txr->adapter;
3808	struct ether_vlan_header *eh;
3809	struct ip *ip;
3810	struct ip6_hdr *ip6;
3811	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3812	int	ehdrlen, ip_hlen = 0;
3813	u16	etype;
3814	u8	ipproto = 0;
3815	int	ctxd = txr->next_avail_desc;
3816	u16	vtag = 0;
3817
3818	/* First check if TSO is to be used */
3819	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3820		return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3821
3822	/* Indicate the whole packet as payload when not doing TSO */
3823       	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3824
3825	/* Now ready a context descriptor */
3826	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3827
3828	/*
3829	** In advanced descriptors the vlan tag must
3830	** be placed into the context descriptor. Hence
3831	** we need to make one even if not doing offloads.
3832	*/
3833	if (mp->m_flags & M_VLANTAG) {
3834		vtag = htole16(mp->m_pkthdr.ether_vtag);
3835		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3836	} else if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) {
3837		return (0);
3838	}
3839
3840	/*
3841	 * Determine where frame payload starts.
3842	 * Jump over vlan headers if already present,
3843	 * helpful for QinQ too.
3844	 */
3845	eh = mtod(mp, struct ether_vlan_header *);
3846	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3847		etype = ntohs(eh->evl_proto);
3848		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3849	} else {
3850		etype = ntohs(eh->evl_encap_proto);
3851		ehdrlen = ETHER_HDR_LEN;
3852	}
3853
3854	/* Set the ether header length */
3855	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3856
3857	switch (etype) {
3858		case ETHERTYPE_IP:
3859			ip = (struct ip *)(mp->m_data + ehdrlen);
3860			ip_hlen = ip->ip_hl << 2;
3861			ipproto = ip->ip_p;
3862			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3863			break;
3864		case ETHERTYPE_IPV6:
3865			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3866			ip_hlen = sizeof(struct ip6_hdr);
3867			/* XXX-BZ this will go badly in case of ext hdrs. */
3868			ipproto = ip6->ip6_nxt;
3869			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3870			break;
3871		default:
3872			break;
3873	}
3874
3875	vlan_macip_lens |= ip_hlen;
3876	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3877
3878	switch (ipproto) {
3879		case IPPROTO_TCP:
3880#if __FreeBSD_version >= 1000000
3881			if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) {
3882#else
3883			if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3884#endif
3885				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3886				*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3887			}
3888			break;
3889		case IPPROTO_UDP:
3890#if __FreeBSD_version >= 1000000
3891			if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) {
3892#else
3893			if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3894#endif
3895				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3896				*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3897			}
3898			break;
3899
3900#if __FreeBSD_version >= 800000
3901		case IPPROTO_SCTP:
3902#if __FreeBSD_version >= 1000000
3903			if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) {
3904#else
3905			if (mp->m_pkthdr.csum_flags & CSUM_SCTP) {
3906#endif
3907				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3908				*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3909			}
3910			break;
3911#endif
3912		default:
3913			break;
3914	}
3915
3916	/* 82575 needs the queue index added */
3917	if (adapter->hw.mac.type == e1000_82575)
3918		mss_l4len_idx = txr->me << 4;
3919
3920	/* Now copy bits into descriptor */
3921	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3922	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3923	TXD->seqnum_seed = htole32(0);
3924	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3925
3926	/* We've consumed the first desc, adjust counters */
3927	if (++ctxd == txr->num_desc)
3928		ctxd = 0;
3929	txr->next_avail_desc = ctxd;
3930	--txr->tx_avail;
3931
3932        return (0);
3933}
3934
3935/**********************************************************************
3936 *
3937 *  Examine each tx_buffer in the used queue. If the hardware is done
3938 *  processing the packet then free associated resources. The
3939 *  tx_buffer is put back on the free queue.
3940 *
3941 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3942 **********************************************************************/
3943static bool
3944igb_txeof(struct tx_ring *txr)
3945{
3946	struct adapter		*adapter = txr->adapter;
3947	struct ifnet		*ifp = adapter->ifp;
3948	u32			work, processed = 0;
3949	int			limit = adapter->tx_process_limit;
3950	struct igb_tx_buf	*buf;
3951	union e1000_adv_tx_desc *txd;
3952
3953	mtx_assert(&txr->tx_mtx, MA_OWNED);
3954
3955#ifdef DEV_NETMAP
3956	if (netmap_tx_irq(ifp, txr->me))
3957		return (FALSE);
3958#endif /* DEV_NETMAP */
3959
3960	if (txr->tx_avail == txr->num_desc) {
3961		txr->queue_status = IGB_QUEUE_IDLE;
3962		return FALSE;
3963	}
3964
3965	/* Get work starting point */
3966	work = txr->next_to_clean;
3967	buf = &txr->tx_buffers[work];
3968	txd = &txr->tx_base[work];
3969	work -= txr->num_desc; /* The distance to ring end */
3970        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3971            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3972	do {
3973		union e1000_adv_tx_desc *eop = buf->eop;
3974		if (eop == NULL) /* No work */
3975			break;
3976
3977		if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
3978			break;	/* I/O not complete */
3979
3980		if (buf->m_head) {
3981			txr->bytes +=
3982			    buf->m_head->m_pkthdr.len;
3983			bus_dmamap_sync(txr->txtag,
3984			    buf->map,
3985			    BUS_DMASYNC_POSTWRITE);
3986			bus_dmamap_unload(txr->txtag,
3987			    buf->map);
3988			m_freem(buf->m_head);
3989			buf->m_head = NULL;
3990		}
3991		buf->eop = NULL;
3992		++txr->tx_avail;
3993
3994		/* We clean the range if multi segment */
3995		while (txd != eop) {
3996			++txd;
3997			++buf;
3998			++work;
3999			/* wrap the ring? */
4000			if (__predict_false(!work)) {
4001				work -= txr->num_desc;
4002				buf = txr->tx_buffers;
4003				txd = txr->tx_base;
4004			}
4005			if (buf->m_head) {
4006				txr->bytes +=
4007				    buf->m_head->m_pkthdr.len;
4008				bus_dmamap_sync(txr->txtag,
4009				    buf->map,
4010				    BUS_DMASYNC_POSTWRITE);
4011				bus_dmamap_unload(txr->txtag,
4012				    buf->map);
4013				m_freem(buf->m_head);
4014				buf->m_head = NULL;
4015			}
4016			++txr->tx_avail;
4017			buf->eop = NULL;
4018
4019		}
4020		++txr->packets;
4021		++processed;
4022		++ifp->if_opackets;
4023		txr->watchdog_time = ticks;
4024
4025		/* Try the next packet */
4026		++txd;
4027		++buf;
4028		++work;
4029		/* reset with a wrap */
4030		if (__predict_false(!work)) {
4031			work -= txr->num_desc;
4032			buf = txr->tx_buffers;
4033			txd = txr->tx_base;
4034		}
4035		prefetch(txd);
4036	} while (__predict_true(--limit));
4037
4038	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4039	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4040
4041	work += txr->num_desc;
4042	txr->next_to_clean = work;
4043
4044	/*
4045	** Watchdog calculation, we know there's
4046	** work outstanding or the first return
4047	** would have been taken, so none processed
4048	** for too long indicates a hang.
4049	*/
4050	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4051		txr->queue_status |= IGB_QUEUE_HUNG;
4052
4053	if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4054		txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4055
4056	if (txr->tx_avail == txr->num_desc) {
4057		txr->queue_status = IGB_QUEUE_IDLE;
4058		return (FALSE);
4059	}
4060
4061	return (TRUE);
4062}
4063
4064/*********************************************************************
4065 *
4066 *  Refresh mbuf buffers for RX descriptor rings
4067 *   - now keeps its own state so discards due to resource
4068 *     exhaustion are unnecessary, if an mbuf cannot be obtained
4069 *     it just returns, keeping its placeholder, thus it can simply
4070 *     be recalled to try again.
4071 *
4072 **********************************************************************/
4073static void
4074igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4075{
4076	struct adapter		*adapter = rxr->adapter;
4077	bus_dma_segment_t	hseg[1];
4078	bus_dma_segment_t	pseg[1];
4079	struct igb_rx_buf	*rxbuf;
4080	struct mbuf		*mh, *mp;
4081	int			i, j, nsegs, error;
4082	bool			refreshed = FALSE;
4083
4084	i = j = rxr->next_to_refresh;
4085	/*
4086	** Get one descriptor beyond
4087	** our work mark to control
4088	** the loop.
4089        */
4090	if (++j == adapter->num_rx_desc)
4091		j = 0;
4092
4093	while (j != limit) {
4094		rxbuf = &rxr->rx_buffers[i];
4095		/* No hdr mbuf used with header split off */
4096		if (rxr->hdr_split == FALSE)
4097			goto no_split;
4098		if (rxbuf->m_head == NULL) {
4099			mh = m_gethdr(M_NOWAIT, MT_DATA);
4100			if (mh == NULL)
4101				goto update;
4102		} else
4103			mh = rxbuf->m_head;
4104
4105		mh->m_pkthdr.len = mh->m_len = MHLEN;
4106		mh->m_len = MHLEN;
4107		mh->m_flags |= M_PKTHDR;
4108		/* Get the memory mapping */
4109		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4110		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4111		if (error != 0) {
4112			printf("Refresh mbufs: hdr dmamap load"
4113			    " failure - %d\n", error);
4114			m_free(mh);
4115			rxbuf->m_head = NULL;
4116			goto update;
4117		}
4118		rxbuf->m_head = mh;
4119		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4120		    BUS_DMASYNC_PREREAD);
4121		rxr->rx_base[i].read.hdr_addr =
4122		    htole64(hseg[0].ds_addr);
4123no_split:
4124		if (rxbuf->m_pack == NULL) {
4125			mp = m_getjcl(M_NOWAIT, MT_DATA,
4126			    M_PKTHDR, adapter->rx_mbuf_sz);
4127			if (mp == NULL)
4128				goto update;
4129		} else
4130			mp = rxbuf->m_pack;
4131
4132		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4133		/* Get the memory mapping */
4134		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4135		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4136		if (error != 0) {
4137			printf("Refresh mbufs: payload dmamap load"
4138			    " failure - %d\n", error);
4139			m_free(mp);
4140			rxbuf->m_pack = NULL;
4141			goto update;
4142		}
4143		rxbuf->m_pack = mp;
4144		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4145		    BUS_DMASYNC_PREREAD);
4146		rxr->rx_base[i].read.pkt_addr =
4147		    htole64(pseg[0].ds_addr);
4148		refreshed = TRUE; /* I feel wefreshed :) */
4149
4150		i = j; /* our next is precalculated */
4151		rxr->next_to_refresh = i;
4152		if (++j == adapter->num_rx_desc)
4153			j = 0;
4154	}
4155update:
4156	if (refreshed) /* update tail */
4157		E1000_WRITE_REG(&adapter->hw,
4158		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4159	return;
4160}
4161
4162
4163/*********************************************************************
4164 *
4165 *  Allocate memory for rx_buffer structures. Since we use one
4166 *  rx_buffer per received packet, the maximum number of rx_buffer's
4167 *  that we'll need is equal to the number of receive descriptors
4168 *  that we've allocated.
4169 *
4170 **********************************************************************/
4171static int
4172igb_allocate_receive_buffers(struct rx_ring *rxr)
4173{
4174	struct	adapter 	*adapter = rxr->adapter;
4175	device_t 		dev = adapter->dev;
4176	struct igb_rx_buf	*rxbuf;
4177	int             	i, bsize, error;
4178
4179	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4180	if (!(rxr->rx_buffers =
4181	    (struct igb_rx_buf *) malloc(bsize,
4182	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4183		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4184		error = ENOMEM;
4185		goto fail;
4186	}
4187
4188	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4189				   1, 0,		/* alignment, bounds */
4190				   BUS_SPACE_MAXADDR,	/* lowaddr */
4191				   BUS_SPACE_MAXADDR,	/* highaddr */
4192				   NULL, NULL,		/* filter, filterarg */
4193				   MSIZE,		/* maxsize */
4194				   1,			/* nsegments */
4195				   MSIZE,		/* maxsegsize */
4196				   0,			/* flags */
4197				   NULL,		/* lockfunc */
4198				   NULL,		/* lockfuncarg */
4199				   &rxr->htag))) {
4200		device_printf(dev, "Unable to create RX DMA tag\n");
4201		goto fail;
4202	}
4203
4204	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4205				   1, 0,		/* alignment, bounds */
4206				   BUS_SPACE_MAXADDR,	/* lowaddr */
4207				   BUS_SPACE_MAXADDR,	/* highaddr */
4208				   NULL, NULL,		/* filter, filterarg */
4209				   MJUM9BYTES,		/* maxsize */
4210				   1,			/* nsegments */
4211				   MJUM9BYTES,		/* maxsegsize */
4212				   0,			/* flags */
4213				   NULL,		/* lockfunc */
4214				   NULL,		/* lockfuncarg */
4215				   &rxr->ptag))) {
4216		device_printf(dev, "Unable to create RX payload DMA tag\n");
4217		goto fail;
4218	}
4219
4220	for (i = 0; i < adapter->num_rx_desc; i++) {
4221		rxbuf = &rxr->rx_buffers[i];
4222		error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4223		if (error) {
4224			device_printf(dev,
4225			    "Unable to create RX head DMA maps\n");
4226			goto fail;
4227		}
4228		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4229		if (error) {
4230			device_printf(dev,
4231			    "Unable to create RX packet DMA maps\n");
4232			goto fail;
4233		}
4234	}
4235
4236	return (0);
4237
4238fail:
4239	/* Frees all, but can handle partial completion */
4240	igb_free_receive_structures(adapter);
4241	return (error);
4242}
4243
4244
4245static void
4246igb_free_receive_ring(struct rx_ring *rxr)
4247{
4248	struct	adapter		*adapter = rxr->adapter;
4249	struct igb_rx_buf	*rxbuf;
4250
4251
4252	for (int i = 0; i < adapter->num_rx_desc; i++) {
4253		rxbuf = &rxr->rx_buffers[i];
4254		if (rxbuf->m_head != NULL) {
4255			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4256			    BUS_DMASYNC_POSTREAD);
4257			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4258			rxbuf->m_head->m_flags |= M_PKTHDR;
4259			m_freem(rxbuf->m_head);
4260		}
4261		if (rxbuf->m_pack != NULL) {
4262			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4263			    BUS_DMASYNC_POSTREAD);
4264			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4265			rxbuf->m_pack->m_flags |= M_PKTHDR;
4266			m_freem(rxbuf->m_pack);
4267		}
4268		rxbuf->m_head = NULL;
4269		rxbuf->m_pack = NULL;
4270	}
4271}
4272
4273
4274/*********************************************************************
4275 *
4276 *  Initialize a receive ring and its buffers.
4277 *
4278 **********************************************************************/
4279static int
4280igb_setup_receive_ring(struct rx_ring *rxr)
4281{
4282	struct	adapter		*adapter;
4283	struct  ifnet		*ifp;
4284	device_t		dev;
4285	struct igb_rx_buf	*rxbuf;
4286	bus_dma_segment_t	pseg[1], hseg[1];
4287	struct lro_ctrl		*lro = &rxr->lro;
4288	int			rsize, nsegs, error = 0;
4289#ifdef DEV_NETMAP
4290	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4291	struct netmap_slot *slot;
4292#endif /* DEV_NETMAP */
4293
4294	adapter = rxr->adapter;
4295	dev = adapter->dev;
4296	ifp = adapter->ifp;
4297
4298	/* Clear the ring contents */
4299	IGB_RX_LOCK(rxr);
4300#ifdef DEV_NETMAP
4301	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4302#endif /* DEV_NETMAP */
4303	rsize = roundup2(adapter->num_rx_desc *
4304	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4305	bzero((void *)rxr->rx_base, rsize);
4306
4307	/*
4308	** Free current RX buffer structures and their mbufs
4309	*/
4310	igb_free_receive_ring(rxr);
4311
4312	/* Configure for header split? */
4313	if (igb_header_split)
4314		rxr->hdr_split = TRUE;
4315
4316        /* Now replenish the ring mbufs */
4317	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4318		struct mbuf	*mh, *mp;
4319
4320		rxbuf = &rxr->rx_buffers[j];
4321#ifdef DEV_NETMAP
4322		if (slot) {
4323			/* slot sj is mapped to the i-th NIC-ring entry */
4324			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4325			uint64_t paddr;
4326			void *addr;
4327
4328			addr = PNMB(na, slot + sj, &paddr);
4329			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4330			/* Update descriptor */
4331			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4332			continue;
4333		}
4334#endif /* DEV_NETMAP */
4335		if (rxr->hdr_split == FALSE)
4336			goto skip_head;
4337
4338		/* First the header */
4339		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4340		if (rxbuf->m_head == NULL) {
4341			error = ENOBUFS;
4342                        goto fail;
4343		}
4344		m_adj(rxbuf->m_head, ETHER_ALIGN);
4345		mh = rxbuf->m_head;
4346		mh->m_len = mh->m_pkthdr.len = MHLEN;
4347		mh->m_flags |= M_PKTHDR;
4348		/* Get the memory mapping */
4349		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4350		    rxbuf->hmap, rxbuf->m_head, hseg,
4351		    &nsegs, BUS_DMA_NOWAIT);
4352		if (error != 0) /* Nothing elegant to do here */
4353                        goto fail;
4354		bus_dmamap_sync(rxr->htag,
4355		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4356		/* Update descriptor */
4357		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4358
4359skip_head:
4360		/* Now the payload cluster */
4361		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4362		    M_PKTHDR, adapter->rx_mbuf_sz);
4363		if (rxbuf->m_pack == NULL) {
4364			error = ENOBUFS;
4365                        goto fail;
4366		}
4367		mp = rxbuf->m_pack;
4368		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4369		/* Get the memory mapping */
4370		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4371		    rxbuf->pmap, mp, pseg,
4372		    &nsegs, BUS_DMA_NOWAIT);
4373		if (error != 0)
4374                        goto fail;
4375		bus_dmamap_sync(rxr->ptag,
4376		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4377		/* Update descriptor */
4378		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4379        }
4380
4381	/* Setup our descriptor indices */
4382	rxr->next_to_check = 0;
4383	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4384	rxr->lro_enabled = FALSE;
4385	rxr->rx_split_packets = 0;
4386	rxr->rx_bytes = 0;
4387
4388	rxr->fmp = NULL;
4389	rxr->lmp = NULL;
4390
4391	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4392	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4393
4394	/*
4395	** Now set up the LRO interface, we
4396	** also only do head split when LRO
4397	** is enabled, since so often they
4398	** are undesireable in similar setups.
4399	*/
4400	if (ifp->if_capenable & IFCAP_LRO) {
4401		error = tcp_lro_init(lro);
4402		if (error) {
4403			device_printf(dev, "LRO Initialization failed!\n");
4404			goto fail;
4405		}
4406		INIT_DEBUGOUT("RX LRO Initialized\n");
4407		rxr->lro_enabled = TRUE;
4408		lro->ifp = adapter->ifp;
4409	}
4410
4411	IGB_RX_UNLOCK(rxr);
4412	return (0);
4413
4414fail:
4415	igb_free_receive_ring(rxr);
4416	IGB_RX_UNLOCK(rxr);
4417	return (error);
4418}
4419
4420
4421/*********************************************************************
4422 *
4423 *  Initialize all receive rings.
4424 *
4425 **********************************************************************/
4426static int
4427igb_setup_receive_structures(struct adapter *adapter)
4428{
4429	struct rx_ring *rxr = adapter->rx_rings;
4430	int i;
4431
4432	for (i = 0; i < adapter->num_queues; i++, rxr++)
4433		if (igb_setup_receive_ring(rxr))
4434			goto fail;
4435
4436	return (0);
4437fail:
4438	/*
4439	 * Free RX buffers allocated so far, we will only handle
4440	 * the rings that completed, the failing case will have
4441	 * cleaned up for itself. 'i' is the endpoint.
4442	 */
4443	for (int j = 0; j < i; ++j) {
4444		rxr = &adapter->rx_rings[j];
4445		IGB_RX_LOCK(rxr);
4446		igb_free_receive_ring(rxr);
4447		IGB_RX_UNLOCK(rxr);
4448	}
4449
4450	return (ENOBUFS);
4451}
4452
4453/*********************************************************************
4454 *
4455 *  Enable receive unit.
4456 *
4457 **********************************************************************/
4458static void
4459igb_initialize_receive_units(struct adapter *adapter)
4460{
4461	struct rx_ring	*rxr = adapter->rx_rings;
4462	struct ifnet	*ifp = adapter->ifp;
4463	struct e1000_hw *hw = &adapter->hw;
4464	u32		rctl, rxcsum, psize, srrctl = 0;
4465
4466	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4467
4468	/*
4469	 * Make sure receives are disabled while setting
4470	 * up the descriptor ring
4471	 */
4472	rctl = E1000_READ_REG(hw, E1000_RCTL);
4473	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4474
4475	/*
4476	** Set up for header split
4477	*/
4478	if (igb_header_split) {
4479		/* Use a standard mbuf for the header */
4480		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4481		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4482	} else
4483		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4484
4485	/*
4486	** Set up for jumbo frames
4487	*/
4488	if (ifp->if_mtu > ETHERMTU) {
4489		rctl |= E1000_RCTL_LPE;
4490		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4491			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4492			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4493		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4494			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4495			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4496		}
4497		/* Set maximum packet len */
4498		psize = adapter->max_frame_size;
4499		/* are we on a vlan? */
4500		if (adapter->ifp->if_vlantrunk != NULL)
4501			psize += VLAN_TAG_SIZE;
4502		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4503	} else {
4504		rctl &= ~E1000_RCTL_LPE;
4505		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4506		rctl |= E1000_RCTL_SZ_2048;
4507	}
4508
4509	/*
4510	 * If TX flow control is disabled and there's >1 queue defined,
4511	 * enable DROP.
4512	 *
4513	 * This drops frames rather than hanging the RX MAC for all queues.
4514	 */
4515	if ((adapter->num_queues > 1) &&
4516	    (adapter->fc == e1000_fc_none ||
4517	     adapter->fc == e1000_fc_rx_pause)) {
4518		srrctl |= E1000_SRRCTL_DROP_EN;
4519	}
4520
4521	/* Setup the Base and Length of the Rx Descriptor Rings */
4522	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4523		u64 bus_addr = rxr->rxdma.dma_paddr;
4524		u32 rxdctl;
4525
4526		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4527		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4528		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4529		    (uint32_t)(bus_addr >> 32));
4530		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4531		    (uint32_t)bus_addr);
4532		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4533		/* Enable this Queue */
4534		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4535		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4536		rxdctl &= 0xFFF00000;
4537		rxdctl |= IGB_RX_PTHRESH;
4538		rxdctl |= IGB_RX_HTHRESH << 8;
4539		rxdctl |= IGB_RX_WTHRESH << 16;
4540		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4541	}
4542
4543	/*
4544	** Setup for RX MultiQueue
4545	*/
4546	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4547	if (adapter->num_queues >1) {
4548		u32 random[10], mrqc, shift = 0;
4549		union igb_reta {
4550			u32 dword;
4551			u8  bytes[4];
4552		} reta;
4553
4554		arc4rand(&random, sizeof(random), 0);
4555		if (adapter->hw.mac.type == e1000_82575)
4556			shift = 6;
4557		/* Warning FM follows */
4558		for (int i = 0; i < 128; i++) {
4559			reta.bytes[i & 3] =
4560			    (i % adapter->num_queues) << shift;
4561			if ((i & 3) == 3)
4562				E1000_WRITE_REG(hw,
4563				    E1000_RETA(i >> 2), reta.dword);
4564		}
4565		/* Now fill in hash table */
4566		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4567		for (int i = 0; i < 10; i++)
4568			E1000_WRITE_REG_ARRAY(hw,
4569			    E1000_RSSRK(0), i, random[i]);
4570
4571		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4572		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4573		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4574		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4575		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4576		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4577		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4578		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4579
4580		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4581
4582		/*
4583		** NOTE: Receive Full-Packet Checksum Offload
4584		** is mutually exclusive with Multiqueue. However
4585		** this is not the same as TCP/IP checksums which
4586		** still work.
4587		*/
4588		rxcsum |= E1000_RXCSUM_PCSD;
4589#if __FreeBSD_version >= 800000
4590		/* For SCTP Offload */
4591		if ((hw->mac.type != e1000_82575) &&
4592		    (ifp->if_capenable & IFCAP_RXCSUM))
4593			rxcsum |= E1000_RXCSUM_CRCOFL;
4594#endif
4595	} else {
4596		/* Non RSS setup */
4597		if (ifp->if_capenable & IFCAP_RXCSUM) {
4598			rxcsum |= E1000_RXCSUM_IPPCSE;
4599#if __FreeBSD_version >= 800000
4600			if (adapter->hw.mac.type != e1000_82575)
4601				rxcsum |= E1000_RXCSUM_CRCOFL;
4602#endif
4603		} else
4604			rxcsum &= ~E1000_RXCSUM_TUOFL;
4605	}
4606	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4607
4608	/* Setup the Receive Control Register */
4609	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4610	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4611		   E1000_RCTL_RDMTS_HALF |
4612		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4613	/* Strip CRC bytes. */
4614	rctl |= E1000_RCTL_SECRC;
4615	/* Make sure VLAN Filters are off */
4616	rctl &= ~E1000_RCTL_VFE;
4617	/* Don't store bad packets */
4618	rctl &= ~E1000_RCTL_SBP;
4619
4620	/* Enable Receives */
4621	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4622
4623	/*
4624	 * Setup the HW Rx Head and Tail Descriptor Pointers
4625	 *   - needs to be after enable
4626	 */
4627	for (int i = 0; i < adapter->num_queues; i++) {
4628		rxr = &adapter->rx_rings[i];
4629		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4630#ifdef DEV_NETMAP
4631		/*
4632		 * an init() while a netmap client is active must
4633		 * preserve the rx buffers passed to userspace.
4634		 * In this driver it means we adjust RDT to
4635		 * something different from next_to_refresh
4636		 * (which is not used in netmap mode).
4637		 */
4638		if (ifp->if_capenable & IFCAP_NETMAP) {
4639			struct netmap_adapter *na = NA(adapter->ifp);
4640			struct netmap_kring *kring = &na->rx_rings[i];
4641			int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4642
4643			if (t >= adapter->num_rx_desc)
4644				t -= adapter->num_rx_desc;
4645			else if (t < 0)
4646				t += adapter->num_rx_desc;
4647			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4648		} else
4649#endif /* DEV_NETMAP */
4650		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4651	}
4652	return;
4653}
4654
4655/*********************************************************************
4656 *
4657 *  Free receive rings.
4658 *
4659 **********************************************************************/
4660static void
4661igb_free_receive_structures(struct adapter *adapter)
4662{
4663	struct rx_ring *rxr = adapter->rx_rings;
4664
4665	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4666		struct lro_ctrl	*lro = &rxr->lro;
4667		igb_free_receive_buffers(rxr);
4668		tcp_lro_free(lro);
4669		igb_dma_free(adapter, &rxr->rxdma);
4670	}
4671
4672	free(adapter->rx_rings, M_DEVBUF);
4673}
4674
4675/*********************************************************************
4676 *
4677 *  Free receive ring data structures.
4678 *
4679 **********************************************************************/
4680static void
4681igb_free_receive_buffers(struct rx_ring *rxr)
4682{
4683	struct adapter		*adapter = rxr->adapter;
4684	struct igb_rx_buf	*rxbuf;
4685	int i;
4686
4687	INIT_DEBUGOUT("free_receive_structures: begin");
4688
4689	/* Cleanup any existing buffers */
4690	if (rxr->rx_buffers != NULL) {
4691		for (i = 0; i < adapter->num_rx_desc; i++) {
4692			rxbuf = &rxr->rx_buffers[i];
4693			if (rxbuf->m_head != NULL) {
4694				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4695				    BUS_DMASYNC_POSTREAD);
4696				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4697				rxbuf->m_head->m_flags |= M_PKTHDR;
4698				m_freem(rxbuf->m_head);
4699			}
4700			if (rxbuf->m_pack != NULL) {
4701				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4702				    BUS_DMASYNC_POSTREAD);
4703				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4704				rxbuf->m_pack->m_flags |= M_PKTHDR;
4705				m_freem(rxbuf->m_pack);
4706			}
4707			rxbuf->m_head = NULL;
4708			rxbuf->m_pack = NULL;
4709			if (rxbuf->hmap != NULL) {
4710				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4711				rxbuf->hmap = NULL;
4712			}
4713			if (rxbuf->pmap != NULL) {
4714				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4715				rxbuf->pmap = NULL;
4716			}
4717		}
4718		if (rxr->rx_buffers != NULL) {
4719			free(rxr->rx_buffers, M_DEVBUF);
4720			rxr->rx_buffers = NULL;
4721		}
4722	}
4723
4724	if (rxr->htag != NULL) {
4725		bus_dma_tag_destroy(rxr->htag);
4726		rxr->htag = NULL;
4727	}
4728	if (rxr->ptag != NULL) {
4729		bus_dma_tag_destroy(rxr->ptag);
4730		rxr->ptag = NULL;
4731	}
4732}
4733
4734static __inline void
4735igb_rx_discard(struct rx_ring *rxr, int i)
4736{
4737	struct igb_rx_buf	*rbuf;
4738
4739	rbuf = &rxr->rx_buffers[i];
4740
4741	/* Partially received? Free the chain */
4742	if (rxr->fmp != NULL) {
4743		rxr->fmp->m_flags |= M_PKTHDR;
4744		m_freem(rxr->fmp);
4745		rxr->fmp = NULL;
4746		rxr->lmp = NULL;
4747	}
4748
4749	/*
4750	** With advanced descriptors the writeback
4751	** clobbers the buffer addrs, so its easier
4752	** to just free the existing mbufs and take
4753	** the normal refresh path to get new buffers
4754	** and mapping.
4755	*/
4756	if (rbuf->m_head) {
4757		m_free(rbuf->m_head);
4758		rbuf->m_head = NULL;
4759		bus_dmamap_unload(rxr->htag, rbuf->hmap);
4760	}
4761
4762	if (rbuf->m_pack) {
4763		m_free(rbuf->m_pack);
4764		rbuf->m_pack = NULL;
4765		bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4766	}
4767
4768	return;
4769}
4770
4771static __inline void
4772igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4773{
4774
4775	/*
4776	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4777	 * should be computed by hardware. Also it should not have VLAN tag in
4778	 * ethernet header.
4779	 */
4780	if (rxr->lro_enabled &&
4781	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4782	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4783	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4784	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4785	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4786	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4787		/*
4788		 * Send to the stack if:
4789		 **  - LRO not enabled, or
4790		 **  - no LRO resources, or
4791		 **  - lro enqueue fails
4792		 */
4793		if (rxr->lro.lro_cnt != 0)
4794			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4795				return;
4796	}
4797	IGB_RX_UNLOCK(rxr);
4798	(*ifp->if_input)(ifp, m);
4799	IGB_RX_LOCK(rxr);
4800}
4801
4802/*********************************************************************
4803 *
4804 *  This routine executes in interrupt context. It replenishes
4805 *  the mbufs in the descriptor and sends data which has been
4806 *  dma'ed into host memory to upper layer.
4807 *
4808 *  We loop at most count times if count is > 0, or until done if
4809 *  count < 0.
4810 *
4811 *  Return TRUE if more to clean, FALSE otherwise
4812 *********************************************************************/
4813static bool
4814igb_rxeof(struct igb_queue *que, int count, int *done)
4815{
4816	struct adapter		*adapter = que->adapter;
4817	struct rx_ring		*rxr = que->rxr;
4818	struct ifnet		*ifp = adapter->ifp;
4819	struct lro_ctrl		*lro = &rxr->lro;
4820	struct lro_entry	*queued;
4821	int			i, processed = 0, rxdone = 0;
4822	u32			ptype, staterr = 0;
4823	union e1000_adv_rx_desc	*cur;
4824
4825	IGB_RX_LOCK(rxr);
4826	/* Sync the ring. */
4827	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4828	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4829
4830#ifdef DEV_NETMAP
4831	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4832		IGB_RX_UNLOCK(rxr);
4833		return (FALSE);
4834	}
4835#endif /* DEV_NETMAP */
4836
4837	/* Main clean loop */
4838	for (i = rxr->next_to_check; count != 0;) {
4839		struct mbuf		*sendmp, *mh, *mp;
4840		struct igb_rx_buf	*rxbuf;
4841		u16			hlen, plen, hdr, vtag;
4842		bool			eop = FALSE;
4843
4844		cur = &rxr->rx_base[i];
4845		staterr = le32toh(cur->wb.upper.status_error);
4846		if ((staterr & E1000_RXD_STAT_DD) == 0)
4847			break;
4848		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4849			break;
4850		count--;
4851		sendmp = mh = mp = NULL;
4852		cur->wb.upper.status_error = 0;
4853		rxbuf = &rxr->rx_buffers[i];
4854		plen = le16toh(cur->wb.upper.length);
4855		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4856		if (((adapter->hw.mac.type == e1000_i350) ||
4857		    (adapter->hw.mac.type == e1000_i354)) &&
4858		    (staterr & E1000_RXDEXT_STATERR_LB))
4859			vtag = be16toh(cur->wb.upper.vlan);
4860		else
4861			vtag = le16toh(cur->wb.upper.vlan);
4862		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4863		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4864
4865		/*
4866		 * Free the frame (all segments) if we're at EOP and
4867		 * it's an error.
4868		 *
4869		 * The datasheet states that EOP + status is only valid for
4870		 * the final segment in a multi-segment frame.
4871		 */
4872		if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4873			adapter->dropped_pkts++;
4874			++rxr->rx_discarded;
4875			igb_rx_discard(rxr, i);
4876			goto next_desc;
4877		}
4878
4879		/*
4880		** The way the hardware is configured to
4881		** split, it will ONLY use the header buffer
4882		** when header split is enabled, otherwise we
4883		** get normal behavior, ie, both header and
4884		** payload are DMA'd into the payload buffer.
4885		**
4886		** The fmp test is to catch the case where a
4887		** packet spans multiple descriptors, in that
4888		** case only the first header is valid.
4889		*/
4890		if (rxr->hdr_split && rxr->fmp == NULL) {
4891			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4892			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4893			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4894			if (hlen > IGB_HDR_BUF)
4895				hlen = IGB_HDR_BUF;
4896			mh = rxr->rx_buffers[i].m_head;
4897			mh->m_len = hlen;
4898			/* clear buf pointer for refresh */
4899			rxbuf->m_head = NULL;
4900			/*
4901			** Get the payload length, this
4902			** could be zero if its a small
4903			** packet.
4904			*/
4905			if (plen > 0) {
4906				mp = rxr->rx_buffers[i].m_pack;
4907				mp->m_len = plen;
4908				mh->m_next = mp;
4909				/* clear buf pointer */
4910				rxbuf->m_pack = NULL;
4911				rxr->rx_split_packets++;
4912			}
4913		} else {
4914			/*
4915			** Either no header split, or a
4916			** secondary piece of a fragmented
4917			** split packet.
4918			*/
4919			mh = rxr->rx_buffers[i].m_pack;
4920			mh->m_len = plen;
4921			/* clear buf info for refresh */
4922			rxbuf->m_pack = NULL;
4923		}
4924		bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4925
4926		++processed; /* So we know when to refresh */
4927
4928		/* Initial frame - setup */
4929		if (rxr->fmp == NULL) {
4930			mh->m_pkthdr.len = mh->m_len;
4931			/* Save the head of the chain */
4932			rxr->fmp = mh;
4933			rxr->lmp = mh;
4934			if (mp != NULL) {
4935				/* Add payload if split */
4936				mh->m_pkthdr.len += mp->m_len;
4937				rxr->lmp = mh->m_next;
4938			}
4939		} else {
4940			/* Chain mbuf's together */
4941			rxr->lmp->m_next = mh;
4942			rxr->lmp = rxr->lmp->m_next;
4943			rxr->fmp->m_pkthdr.len += mh->m_len;
4944		}
4945
4946		if (eop) {
4947			rxr->fmp->m_pkthdr.rcvif = ifp;
4948			ifp->if_ipackets++;
4949			rxr->rx_packets++;
4950			/* capture data for AIM */
4951			rxr->packets++;
4952			rxr->bytes += rxr->fmp->m_pkthdr.len;
4953			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4954
4955			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4956				igb_rx_checksum(staterr, rxr->fmp, ptype);
4957
4958			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4959			    (staterr & E1000_RXD_STAT_VP) != 0) {
4960				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4961				rxr->fmp->m_flags |= M_VLANTAG;
4962			}
4963
4964			/*
4965			 * In case of multiqueue, we have RXCSUM.PCSD bit set
4966			 * and never cleared. This means we have RSS hash
4967			 * available to be used.
4968			 */
4969			if (adapter->num_queues > 1) {
4970				rxr->fmp->m_pkthdr.flowid =
4971				    le32toh(cur->wb.lower.hi_dword.rss);
4972				/*
4973				 * Full RSS support is not avilable in
4974				 * FreeBSD 10 so setting the hash type to
4975				 * OPAQUE.
4976				 */
4977				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
4978			} else {
4979#ifndef IGB_LEGACY_TX
4980				rxr->fmp->m_pkthdr.flowid = que->msix;
4981				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
4982#endif
4983			}
4984			sendmp = rxr->fmp;
4985			/* Make sure to set M_PKTHDR. */
4986			sendmp->m_flags |= M_PKTHDR;
4987			rxr->fmp = NULL;
4988			rxr->lmp = NULL;
4989		}
4990
4991next_desc:
4992		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4993		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4994
4995		/* Advance our pointers to the next descriptor. */
4996		if (++i == adapter->num_rx_desc)
4997			i = 0;
4998		/*
4999		** Send to the stack or LRO
5000		*/
5001		if (sendmp != NULL) {
5002			rxr->next_to_check = i;
5003			igb_rx_input(rxr, ifp, sendmp, ptype);
5004			i = rxr->next_to_check;
5005			rxdone++;
5006		}
5007
5008		/* Every 8 descriptors we go to refresh mbufs */
5009		if (processed == 8) {
5010                        igb_refresh_mbufs(rxr, i);
5011                        processed = 0;
5012		}
5013	}
5014
5015	/* Catch any remainders */
5016	if (igb_rx_unrefreshed(rxr))
5017		igb_refresh_mbufs(rxr, i);
5018
5019	rxr->next_to_check = i;
5020
5021	/*
5022	 * Flush any outstanding LRO work
5023	 */
5024	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5025		SLIST_REMOVE_HEAD(&lro->lro_active, next);
5026		tcp_lro_flush(lro, queued);
5027	}
5028
5029	if (done != NULL)
5030		*done += rxdone;
5031
5032	IGB_RX_UNLOCK(rxr);
5033	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5034}
5035
5036/*********************************************************************
5037 *
5038 *  Verify that the hardware indicated that the checksum is valid.
5039 *  Inform the stack about the status of checksum so that stack
5040 *  doesn't spend time verifying the checksum.
5041 *
5042 *********************************************************************/
5043static void
5044igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5045{
5046	u16 status = (u16)staterr;
5047	u8  errors = (u8) (staterr >> 24);
5048	int sctp;
5049
5050	/* Ignore Checksum bit is set */
5051	if (status & E1000_RXD_STAT_IXSM) {
5052		mp->m_pkthdr.csum_flags = 0;
5053		return;
5054	}
5055
5056	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5057	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5058		sctp = 1;
5059	else
5060		sctp = 0;
5061	if (status & E1000_RXD_STAT_IPCS) {
5062		/* Did it pass? */
5063		if (!(errors & E1000_RXD_ERR_IPE)) {
5064			/* IP Checksum Good */
5065			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5066			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5067		} else
5068			mp->m_pkthdr.csum_flags = 0;
5069	}
5070
5071	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5072		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5073#if __FreeBSD_version >= 800000
5074		if (sctp) /* reassign */
5075			type = CSUM_SCTP_VALID;
5076#endif
5077		/* Did it pass? */
5078		if (!(errors & E1000_RXD_ERR_TCPE)) {
5079			mp->m_pkthdr.csum_flags |= type;
5080			if (sctp == 0)
5081				mp->m_pkthdr.csum_data = htons(0xffff);
5082		}
5083	}
5084	return;
5085}
5086
5087/*
5088 * This routine is run via an vlan
5089 * config EVENT
5090 */
5091static void
5092igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5093{
5094	struct adapter	*adapter = ifp->if_softc;
5095	u32		index, bit;
5096
5097	if (ifp->if_softc !=  arg)   /* Not our event */
5098		return;
5099
5100	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5101                return;
5102
5103	IGB_CORE_LOCK(adapter);
5104	index = (vtag >> 5) & 0x7F;
5105	bit = vtag & 0x1F;
5106	adapter->shadow_vfta[index] |= (1 << bit);
5107	++adapter->num_vlans;
5108	/* Change hw filter setting */
5109	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5110		igb_setup_vlan_hw_support(adapter);
5111	IGB_CORE_UNLOCK(adapter);
5112}
5113
5114/*
5115 * This routine is run via an vlan
5116 * unconfig EVENT
5117 */
5118static void
5119igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5120{
5121	struct adapter	*adapter = ifp->if_softc;
5122	u32		index, bit;
5123
5124	if (ifp->if_softc !=  arg)
5125		return;
5126
5127	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5128                return;
5129
5130	IGB_CORE_LOCK(adapter);
5131	index = (vtag >> 5) & 0x7F;
5132	bit = vtag & 0x1F;
5133	adapter->shadow_vfta[index] &= ~(1 << bit);
5134	--adapter->num_vlans;
5135	/* Change hw filter setting */
5136	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5137		igb_setup_vlan_hw_support(adapter);
5138	IGB_CORE_UNLOCK(adapter);
5139}
5140
5141static void
5142igb_setup_vlan_hw_support(struct adapter *adapter)
5143{
5144	struct e1000_hw *hw = &adapter->hw;
5145	struct ifnet	*ifp = adapter->ifp;
5146	u32             reg;
5147
5148	if (adapter->vf_ifp) {
5149		e1000_rlpml_set_vf(hw,
5150		    adapter->max_frame_size + VLAN_TAG_SIZE);
5151		return;
5152	}
5153
5154	reg = E1000_READ_REG(hw, E1000_CTRL);
5155	reg |= E1000_CTRL_VME;
5156	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5157
5158	/* Enable the Filter Table */
5159	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5160		reg = E1000_READ_REG(hw, E1000_RCTL);
5161		reg &= ~E1000_RCTL_CFIEN;
5162		reg |= E1000_RCTL_VFE;
5163		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5164	}
5165
5166	/* Update the frame size */
5167	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5168	    adapter->max_frame_size + VLAN_TAG_SIZE);
5169
5170	/* Don't bother with table if no vlans */
5171	if ((adapter->num_vlans == 0) ||
5172	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5173                return;
5174	/*
5175	** A soft reset zero's out the VFTA, so
5176	** we need to repopulate it now.
5177	*/
5178	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5179                if (adapter->shadow_vfta[i] != 0) {
5180			if (adapter->vf_ifp)
5181				e1000_vfta_set_vf(hw,
5182				    adapter->shadow_vfta[i], TRUE);
5183			else
5184				e1000_write_vfta(hw,
5185				    i, adapter->shadow_vfta[i]);
5186		}
5187}
5188
5189static void
5190igb_enable_intr(struct adapter *adapter)
5191{
5192	/* With RSS set up what to auto clear */
5193	if (adapter->msix_mem) {
5194		u32 mask = (adapter->que_mask | adapter->link_mask);
5195		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5196		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5197		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5198		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5199		    E1000_IMS_LSC);
5200	} else {
5201		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5202		    IMS_ENABLE_MASK);
5203	}
5204	E1000_WRITE_FLUSH(&adapter->hw);
5205
5206	return;
5207}
5208
5209static void
5210igb_disable_intr(struct adapter *adapter)
5211{
5212	if (adapter->msix_mem) {
5213		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5214		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5215	}
5216	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5217	E1000_WRITE_FLUSH(&adapter->hw);
5218	return;
5219}
5220
5221/*
5222 * Bit of a misnomer, what this really means is
5223 * to enable OS management of the system... aka
5224 * to disable special hardware management features
5225 */
5226static void
5227igb_init_manageability(struct adapter *adapter)
5228{
5229	if (adapter->has_manage) {
5230		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5231		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5232
5233		/* disable hardware interception of ARP */
5234		manc &= ~(E1000_MANC_ARP_EN);
5235
5236                /* enable receiving management packets to the host */
5237		manc |= E1000_MANC_EN_MNG2HOST;
5238		manc2h |= 1 << 5;  /* Mng Port 623 */
5239		manc2h |= 1 << 6;  /* Mng Port 664 */
5240		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5241		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5242	}
5243}
5244
5245/*
5246 * Give control back to hardware management
5247 * controller if there is one.
5248 */
5249static void
5250igb_release_manageability(struct adapter *adapter)
5251{
5252	if (adapter->has_manage) {
5253		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5254
5255		/* re-enable hardware interception of ARP */
5256		manc |= E1000_MANC_ARP_EN;
5257		manc &= ~E1000_MANC_EN_MNG2HOST;
5258
5259		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5260	}
5261}
5262
5263/*
5264 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5265 * For ASF and Pass Through versions of f/w this means that
5266 * the driver is loaded.
5267 *
5268 */
5269static void
5270igb_get_hw_control(struct adapter *adapter)
5271{
5272	u32 ctrl_ext;
5273
5274	if (adapter->vf_ifp)
5275		return;
5276
5277	/* Let firmware know the driver has taken over */
5278	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5279	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5280	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5281}
5282
5283/*
5284 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5285 * For ASF and Pass Through versions of f/w this means that the
5286 * driver is no longer loaded.
5287 *
5288 */
5289static void
5290igb_release_hw_control(struct adapter *adapter)
5291{
5292	u32 ctrl_ext;
5293
5294	if (adapter->vf_ifp)
5295		return;
5296
5297	/* Let firmware taken over control of h/w */
5298	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5299	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5300	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5301}
5302
5303static int
5304igb_is_valid_ether_addr(uint8_t *addr)
5305{
5306	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5307
5308	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5309		return (FALSE);
5310	}
5311
5312	return (TRUE);
5313}
5314
5315
5316/*
5317 * Enable PCI Wake On Lan capability
5318 */
5319static void
5320igb_enable_wakeup(device_t dev)
5321{
5322	struct adapter	*adapter = device_get_softc(dev);
5323	struct ifnet	*ifp = adapter->ifp;
5324	u32		pmc, ctrl, ctrl_ext, rctl, wuc;
5325	u16		status;
5326
5327	if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5328		return;
5329
5330	adapter->wol = E1000_READ_REG(&adapter->hw, E1000_WUFC);
5331	if (ifp->if_capenable & IFCAP_WOL_MAGIC)
5332		adapter->wol |=  E1000_WUFC_MAG;
5333	else
5334		adapter->wol &= ~E1000_WUFC_MAG;
5335
5336	if (ifp->if_capenable & IFCAP_WOL_MCAST) {
5337		adapter->wol |=  E1000_WUFC_MC;
5338		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5339		rctl |= E1000_RCTL_MPE;
5340		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5341	} else
5342		adapter->wol &= ~E1000_WUFC_MC;
5343
5344	if (ifp->if_capenable & IFCAP_WOL_UCAST)
5345		adapter->wol |=  E1000_WUFC_EX;
5346	else
5347		adapter->wol &= ~E1000_WUFC_EX;
5348
5349	if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5350		goto pme;
5351
5352	/* Advertise the wakeup capability */
5353	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5354	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5355	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5356
5357	/* Keep the laser running on Fiber adapters */
5358	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5359	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5360		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5361		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5362		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5363	}
5364
5365	/* Enable wakeup by the MAC */
5366	wuc = E1000_READ_REG(&adapter->hw, E1000_WUC);
5367	wuc |= E1000_WUC_PME_EN | E1000_WUC_APME;
5368	E1000_WRITE_REG(&adapter->hw, E1000_WUC, wuc);
5369	E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5370
5371pme:
5372	status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5373	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5374	if (ifp->if_capenable & IFCAP_WOL)
5375		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5376	pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5377}
5378
5379static void
5380igb_led_func(void *arg, int onoff)
5381{
5382	struct adapter	*adapter = arg;
5383
5384	IGB_CORE_LOCK(adapter);
5385	if (onoff) {
5386		e1000_setup_led(&adapter->hw);
5387		e1000_led_on(&adapter->hw);
5388	} else {
5389		e1000_led_off(&adapter->hw);
5390		e1000_cleanup_led(&adapter->hw);
5391	}
5392	IGB_CORE_UNLOCK(adapter);
5393}
5394
5395/**********************************************************************
5396 *
5397 *  Update the board statistics counters.
5398 *
5399 **********************************************************************/
5400static void
5401igb_update_stats_counters(struct adapter *adapter)
5402{
5403	struct ifnet		*ifp;
5404        struct e1000_hw		*hw = &adapter->hw;
5405	struct e1000_hw_stats	*stats;
5406
5407	/*
5408	** The virtual function adapter has only a
5409	** small controlled set of stats, do only
5410	** those and return.
5411	*/
5412	if (adapter->vf_ifp) {
5413		igb_update_vf_stats_counters(adapter);
5414		return;
5415	}
5416
5417	stats = (struct e1000_hw_stats	*)adapter->stats;
5418
5419	if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5420	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5421		stats->symerrs +=
5422		    E1000_READ_REG(hw,E1000_SYMERRS);
5423		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5424	}
5425
5426	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5427	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5428	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5429	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5430
5431	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5432	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5433	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5434	stats->dc += E1000_READ_REG(hw, E1000_DC);
5435	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5436	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5437	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5438	/*
5439	** For watchdog management we need to know if we have been
5440	** paused during the last interval, so capture that here.
5441	*/
5442        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5443        stats->xoffrxc += adapter->pause_frames;
5444	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5445	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5446	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5447	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5448	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5449	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5450	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5451	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5452	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5453	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5454	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5455	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5456
5457	/* For the 64-bit byte counters the low dword must be read first. */
5458	/* Both registers clear on the read of the high dword */
5459
5460	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5461	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5462	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5463	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5464
5465	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5466	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5467	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5468	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5469	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5470
5471	stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5472	stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5473	stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5474
5475	stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5476	    ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5477	stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5478	    ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5479
5480	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5481	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5482	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5483	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5484	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5485	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5486	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5487	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5488	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5489	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5490
5491	/* Interrupt Counts */
5492
5493	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5494	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5495	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5496	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5497	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5498	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5499	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5500	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5501	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5502
5503	/* Host to Card Statistics */
5504
5505	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5506	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5507	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5508	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5509	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5510	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5511	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5512	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5513	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5514	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5515	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5516	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5517	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5518	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5519
5520	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5521	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5522	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5523	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5524	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5525	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5526
5527	ifp = adapter->ifp;
5528	ifp->if_collisions = stats->colc;
5529
5530	/* Rx Errors */
5531	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5532	    stats->crcerrs + stats->algnerrc +
5533	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5534
5535	/* Tx Errors */
5536	ifp->if_oerrors = stats->ecol +
5537	    stats->latecol + adapter->watchdog_events;
5538
5539	/* Driver specific counters */
5540	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5541	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5542	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5543	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5544	adapter->packet_buf_alloc_tx =
5545	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5546	adapter->packet_buf_alloc_rx =
5547	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5548}
5549
5550
5551/**********************************************************************
5552 *
5553 *  Initialize the VF board statistics counters.
5554 *
5555 **********************************************************************/
5556static void
5557igb_vf_init_stats(struct adapter *adapter)
5558{
5559        struct e1000_hw *hw = &adapter->hw;
5560	struct e1000_vf_stats	*stats;
5561
5562	stats = (struct e1000_vf_stats	*)adapter->stats;
5563	if (stats == NULL)
5564		return;
5565        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5566        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5567        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5568        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5569        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5570}
5571
5572/**********************************************************************
5573 *
5574 *  Update the VF board statistics counters.
5575 *
5576 **********************************************************************/
5577static void
5578igb_update_vf_stats_counters(struct adapter *adapter)
5579{
5580	struct e1000_hw *hw = &adapter->hw;
5581	struct e1000_vf_stats	*stats;
5582
5583	if (adapter->link_speed == 0)
5584		return;
5585
5586	stats = (struct e1000_vf_stats	*)adapter->stats;
5587
5588	UPDATE_VF_REG(E1000_VFGPRC,
5589	    stats->last_gprc, stats->gprc);
5590	UPDATE_VF_REG(E1000_VFGORC,
5591	    stats->last_gorc, stats->gorc);
5592	UPDATE_VF_REG(E1000_VFGPTC,
5593	    stats->last_gptc, stats->gptc);
5594	UPDATE_VF_REG(E1000_VFGOTC,
5595	    stats->last_gotc, stats->gotc);
5596	UPDATE_VF_REG(E1000_VFMPRC,
5597	    stats->last_mprc, stats->mprc);
5598}
5599
5600/* Export a single 32-bit register via a read-only sysctl. */
5601static int
5602igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5603{
5604	struct adapter *adapter;
5605	u_int val;
5606
5607	adapter = oidp->oid_arg1;
5608	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5609	return (sysctl_handle_int(oidp, &val, 0, req));
5610}
5611
5612/*
5613**  Tuneable interrupt rate handler
5614*/
5615static int
5616igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5617{
5618	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5619	int			error;
5620	u32			reg, usec, rate;
5621
5622	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5623	usec = ((reg & 0x7FFC) >> 2);
5624	if (usec > 0)
5625		rate = 1000000 / usec;
5626	else
5627		rate = 0;
5628	error = sysctl_handle_int(oidp, &rate, 0, req);
5629	if (error || !req->newptr)
5630		return error;
5631	return 0;
5632}
5633
5634/*
5635 * Add sysctl variables, one per statistic, to the system.
5636 */
5637static void
5638igb_add_hw_stats(struct adapter *adapter)
5639{
5640	device_t dev = adapter->dev;
5641
5642	struct tx_ring *txr = adapter->tx_rings;
5643	struct rx_ring *rxr = adapter->rx_rings;
5644
5645	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5646	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5647	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5648	struct e1000_hw_stats *stats = adapter->stats;
5649
5650	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5651	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5652
5653#define QUEUE_NAME_LEN 32
5654	char namebuf[QUEUE_NAME_LEN];
5655
5656	/* Driver Statistics */
5657	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5658			CTLFLAG_RD, &adapter->dropped_pkts,
5659			"Driver dropped packets");
5660	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5661			CTLFLAG_RD, &adapter->link_irq,
5662			"Link MSIX IRQ Handled");
5663	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5664			CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5665			"Defragmenting mbuf chain failed");
5666	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5667			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5668			"Driver tx dma failure in xmit");
5669	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5670			CTLFLAG_RD, &adapter->rx_overruns,
5671			"RX overruns");
5672	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5673			CTLFLAG_RD, &adapter->watchdog_events,
5674			"Watchdog timeouts");
5675
5676	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5677			CTLFLAG_RD, &adapter->device_control,
5678			"Device Control Register");
5679	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5680			CTLFLAG_RD, &adapter->rx_control,
5681			"Receiver Control Register");
5682	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5683			CTLFLAG_RD, &adapter->int_mask,
5684			"Interrupt Mask");
5685	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5686			CTLFLAG_RD, &adapter->eint_mask,
5687			"Extended Interrupt Mask");
5688	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5689			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5690			"Transmit Buffer Packet Allocation");
5691	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5692			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5693			"Receive Buffer Packet Allocation");
5694	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5695			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5696			"Flow Control High Watermark");
5697	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5698			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5699			"Flow Control Low Watermark");
5700
5701	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5702		struct lro_ctrl *lro = &rxr->lro;
5703
5704		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5705		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5706					    CTLFLAG_RD, NULL, "Queue Name");
5707		queue_list = SYSCTL_CHILDREN(queue_node);
5708
5709		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5710				CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5711				sizeof(&adapter->queues[i]),
5712				igb_sysctl_interrupt_rate_handler,
5713				"IU", "Interrupt Rate");
5714
5715		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5716				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5717				igb_sysctl_reg_handler, "IU",
5718 				"Transmit Descriptor Head");
5719		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5720				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5721				igb_sysctl_reg_handler, "IU",
5722 				"Transmit Descriptor Tail");
5723		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5724				CTLFLAG_RD, &txr->no_desc_avail,
5725				"Queue No Descriptor Available");
5726		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5727				CTLFLAG_RD, &txr->total_packets,
5728				"Queue Packets Transmitted");
5729
5730		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5731				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5732				igb_sysctl_reg_handler, "IU",
5733				"Receive Descriptor Head");
5734		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5735				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5736				igb_sysctl_reg_handler, "IU",
5737				"Receive Descriptor Tail");
5738		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5739				CTLFLAG_RD, &rxr->rx_packets,
5740				"Queue Packets Received");
5741		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5742				CTLFLAG_RD, &rxr->rx_bytes,
5743				"Queue Bytes Received");
5744		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5745				CTLFLAG_RD, &lro->lro_queued, 0,
5746				"LRO Queued");
5747		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5748				CTLFLAG_RD, &lro->lro_flushed, 0,
5749				"LRO Flushed");
5750	}
5751
5752	/* MAC stats get their own sub node */
5753
5754	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5755				    CTLFLAG_RD, NULL, "MAC Statistics");
5756	stat_list = SYSCTL_CHILDREN(stat_node);
5757
5758	/*
5759	** VF adapter has a very limited set of stats
5760	** since its not managing the metal, so to speak.
5761	*/
5762	if (adapter->vf_ifp) {
5763	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5764			CTLFLAG_RD, &stats->gprc,
5765			"Good Packets Received");
5766	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5767			CTLFLAG_RD, &stats->gptc,
5768			"Good Packets Transmitted");
5769 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5770 			CTLFLAG_RD, &stats->gorc,
5771 			"Good Octets Received");
5772 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5773 			CTLFLAG_RD, &stats->gotc,
5774 			"Good Octets Transmitted");
5775	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5776			CTLFLAG_RD, &stats->mprc,
5777			"Multicast Packets Received");
5778		return;
5779	}
5780
5781	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5782			CTLFLAG_RD, &stats->ecol,
5783			"Excessive collisions");
5784	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5785			CTLFLAG_RD, &stats->scc,
5786			"Single collisions");
5787	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5788			CTLFLAG_RD, &stats->mcc,
5789			"Multiple collisions");
5790	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5791			CTLFLAG_RD, &stats->latecol,
5792			"Late collisions");
5793	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5794			CTLFLAG_RD, &stats->colc,
5795			"Collision Count");
5796	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5797			CTLFLAG_RD, &stats->symerrs,
5798			"Symbol Errors");
5799	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5800			CTLFLAG_RD, &stats->sec,
5801			"Sequence Errors");
5802	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5803			CTLFLAG_RD, &stats->dc,
5804			"Defer Count");
5805	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5806			CTLFLAG_RD, &stats->mpc,
5807			"Missed Packets");
5808	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5809			CTLFLAG_RD, &stats->rlec,
5810			"Receive Length Errors");
5811	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5812			CTLFLAG_RD, &stats->rnbc,
5813			"Receive No Buffers");
5814	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5815			CTLFLAG_RD, &stats->ruc,
5816			"Receive Undersize");
5817	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5818			CTLFLAG_RD, &stats->rfc,
5819			"Fragmented Packets Received");
5820	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5821			CTLFLAG_RD, &stats->roc,
5822			"Oversized Packets Received");
5823	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5824			CTLFLAG_RD, &stats->rjc,
5825			"Recevied Jabber");
5826	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5827			CTLFLAG_RD, &stats->rxerrc,
5828			"Receive Errors");
5829	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5830			CTLFLAG_RD, &stats->crcerrs,
5831			"CRC errors");
5832	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5833			CTLFLAG_RD, &stats->algnerrc,
5834			"Alignment Errors");
5835	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
5836			CTLFLAG_RD, &stats->tncrs,
5837			"Transmit with No CRS");
5838	/* On 82575 these are collision counts */
5839	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5840			CTLFLAG_RD, &stats->cexterr,
5841			"Collision/Carrier extension errors");
5842	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5843			CTLFLAG_RD, &stats->xonrxc,
5844			"XON Received");
5845	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5846			CTLFLAG_RD, &stats->xontxc,
5847			"XON Transmitted");
5848	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5849			CTLFLAG_RD, &stats->xoffrxc,
5850			"XOFF Received");
5851	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5852			CTLFLAG_RD, &stats->xofftxc,
5853			"XOFF Transmitted");
5854	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
5855			CTLFLAG_RD, &stats->fcruc,
5856			"Unsupported Flow Control Received");
5857	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
5858			CTLFLAG_RD, &stats->mgprc,
5859			"Management Packets Received");
5860	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
5861			CTLFLAG_RD, &stats->mgpdc,
5862			"Management Packets Dropped");
5863	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
5864			CTLFLAG_RD, &stats->mgptc,
5865			"Management Packets Transmitted");
5866	/* Packet Reception Stats */
5867	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5868			CTLFLAG_RD, &stats->tpr,
5869			"Total Packets Received");
5870	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5871			CTLFLAG_RD, &stats->gprc,
5872			"Good Packets Received");
5873	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5874			CTLFLAG_RD, &stats->bprc,
5875			"Broadcast Packets Received");
5876	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5877			CTLFLAG_RD, &stats->mprc,
5878			"Multicast Packets Received");
5879	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5880			CTLFLAG_RD, &stats->prc64,
5881			"64 byte frames received");
5882	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5883			CTLFLAG_RD, &stats->prc127,
5884			"65-127 byte frames received");
5885	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5886			CTLFLAG_RD, &stats->prc255,
5887			"128-255 byte frames received");
5888	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5889			CTLFLAG_RD, &stats->prc511,
5890			"256-511 byte frames received");
5891	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5892			CTLFLAG_RD, &stats->prc1023,
5893			"512-1023 byte frames received");
5894	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5895			CTLFLAG_RD, &stats->prc1522,
5896			"1023-1522 byte frames received");
5897 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5898 			CTLFLAG_RD, &stats->gorc,
5899			"Good Octets Received");
5900	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
5901			CTLFLAG_RD, &stats->tor,
5902			"Total Octets Received");
5903
5904	/* Packet Transmission Stats */
5905 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5906 			CTLFLAG_RD, &stats->gotc,
5907 			"Good Octets Transmitted");
5908	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
5909			CTLFLAG_RD, &stats->tot,
5910			"Total Octets Transmitted");
5911	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5912			CTLFLAG_RD, &stats->tpt,
5913			"Total Packets Transmitted");
5914	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5915			CTLFLAG_RD, &stats->gptc,
5916			"Good Packets Transmitted");
5917	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5918			CTLFLAG_RD, &stats->bptc,
5919			"Broadcast Packets Transmitted");
5920	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5921			CTLFLAG_RD, &stats->mptc,
5922			"Multicast Packets Transmitted");
5923	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5924			CTLFLAG_RD, &stats->ptc64,
5925			"64 byte frames transmitted");
5926	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5927			CTLFLAG_RD, &stats->ptc127,
5928			"65-127 byte frames transmitted");
5929	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5930			CTLFLAG_RD, &stats->ptc255,
5931			"128-255 byte frames transmitted");
5932	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5933			CTLFLAG_RD, &stats->ptc511,
5934			"256-511 byte frames transmitted");
5935	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5936			CTLFLAG_RD, &stats->ptc1023,
5937			"512-1023 byte frames transmitted");
5938	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5939			CTLFLAG_RD, &stats->ptc1522,
5940			"1024-1522 byte frames transmitted");
5941	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5942			CTLFLAG_RD, &stats->tsctc,
5943			"TSO Contexts Transmitted");
5944	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5945			CTLFLAG_RD, &stats->tsctfc,
5946			"TSO Contexts Failed");
5947
5948
5949	/* Interrupt Stats */
5950
5951	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5952				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5953	int_list = SYSCTL_CHILDREN(int_node);
5954
5955	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5956			CTLFLAG_RD, &stats->iac,
5957			"Interrupt Assertion Count");
5958
5959	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5960			CTLFLAG_RD, &stats->icrxptc,
5961			"Interrupt Cause Rx Pkt Timer Expire Count");
5962
5963	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5964			CTLFLAG_RD, &stats->icrxatc,
5965			"Interrupt Cause Rx Abs Timer Expire Count");
5966
5967	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5968			CTLFLAG_RD, &stats->ictxptc,
5969			"Interrupt Cause Tx Pkt Timer Expire Count");
5970
5971	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5972			CTLFLAG_RD, &stats->ictxatc,
5973			"Interrupt Cause Tx Abs Timer Expire Count");
5974
5975	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5976			CTLFLAG_RD, &stats->ictxqec,
5977			"Interrupt Cause Tx Queue Empty Count");
5978
5979	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5980			CTLFLAG_RD, &stats->ictxqmtc,
5981			"Interrupt Cause Tx Queue Min Thresh Count");
5982
5983	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5984			CTLFLAG_RD, &stats->icrxdmtc,
5985			"Interrupt Cause Rx Desc Min Thresh Count");
5986
5987	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5988			CTLFLAG_RD, &stats->icrxoc,
5989			"Interrupt Cause Receiver Overrun Count");
5990
5991	/* Host to Card Stats */
5992
5993	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5994				    CTLFLAG_RD, NULL,
5995				    "Host to Card Statistics");
5996
5997	host_list = SYSCTL_CHILDREN(host_node);
5998
5999	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6000			CTLFLAG_RD, &stats->cbtmpc,
6001			"Circuit Breaker Tx Packet Count");
6002
6003	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6004			CTLFLAG_RD, &stats->htdpmc,
6005			"Host Transmit Discarded Packets");
6006
6007	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6008			CTLFLAG_RD, &stats->rpthc,
6009			"Rx Packets To Host");
6010
6011	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6012			CTLFLAG_RD, &stats->cbrmpc,
6013			"Circuit Breaker Rx Packet Count");
6014
6015	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6016			CTLFLAG_RD, &stats->cbrdpc,
6017			"Circuit Breaker Rx Dropped Count");
6018
6019	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6020			CTLFLAG_RD, &stats->hgptc,
6021			"Host Good Packets Tx Count");
6022
6023	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6024			CTLFLAG_RD, &stats->htcbdpc,
6025			"Host Tx Circuit Breaker Dropped Count");
6026
6027	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6028			CTLFLAG_RD, &stats->hgorc,
6029			"Host Good Octets Received Count");
6030
6031	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6032			CTLFLAG_RD, &stats->hgotc,
6033			"Host Good Octets Transmit Count");
6034
6035	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6036			CTLFLAG_RD, &stats->lenerrs,
6037			"Length Errors");
6038
6039	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6040			CTLFLAG_RD, &stats->scvpc,
6041			"SerDes/SGMII Code Violation Pkt Count");
6042
6043	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6044			CTLFLAG_RD, &stats->hrmpc,
6045			"Header Redirection Missed Packet Count");
6046}
6047
6048
6049/**********************************************************************
6050 *
6051 *  This routine provides a way to dump out the adapter eeprom,
6052 *  often a useful debug/service tool. This only dumps the first
6053 *  32 words, stuff that matters is in that extent.
6054 *
6055 **********************************************************************/
6056static int
6057igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6058{
6059	struct adapter *adapter;
6060	int error;
6061	int result;
6062
6063	result = -1;
6064	error = sysctl_handle_int(oidp, &result, 0, req);
6065
6066	if (error || !req->newptr)
6067		return (error);
6068
6069	/*
6070	 * This value will cause a hex dump of the
6071	 * first 32 16-bit words of the EEPROM to
6072	 * the screen.
6073	 */
6074	if (result == 1) {
6075		adapter = (struct adapter *)arg1;
6076		igb_print_nvm_info(adapter);
6077        }
6078
6079	return (error);
6080}
6081
6082static void
6083igb_print_nvm_info(struct adapter *adapter)
6084{
6085	u16	eeprom_data;
6086	int	i, j, row = 0;
6087
6088	/* Its a bit crude, but it gets the job done */
6089	printf("\nInterface EEPROM Dump:\n");
6090	printf("Offset\n0x0000  ");
6091	for (i = 0, j = 0; i < 32; i++, j++) {
6092		if (j == 8) { /* Make the offset block */
6093			j = 0; ++row;
6094			printf("\n0x00%x0  ",row);
6095		}
6096		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6097		printf("%04x ", eeprom_data);
6098	}
6099	printf("\n");
6100}
6101
6102static void
6103igb_set_sysctl_value(struct adapter *adapter, const char *name,
6104	const char *description, int *limit, int value)
6105{
6106	*limit = value;
6107	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6108	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6109	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6110}
6111
6112/*
6113** Set flow control using sysctl:
6114** Flow control values:
6115** 	0 - off
6116**	1 - rx pause
6117**	2 - tx pause
6118**	3 - full
6119*/
6120static int
6121igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6122{
6123	int		error;
6124	static int	input = 3; /* default is full */
6125	struct adapter	*adapter = (struct adapter *) arg1;
6126
6127	error = sysctl_handle_int(oidp, &input, 0, req);
6128
6129	if ((error) || (req->newptr == NULL))
6130		return (error);
6131
6132	switch (input) {
6133		case e1000_fc_rx_pause:
6134		case e1000_fc_tx_pause:
6135		case e1000_fc_full:
6136		case e1000_fc_none:
6137			adapter->hw.fc.requested_mode = input;
6138			adapter->fc = input;
6139			break;
6140		default:
6141			/* Do nothing */
6142			return (error);
6143	}
6144
6145	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6146	e1000_force_mac_fc(&adapter->hw);
6147	/* XXX TODO: update DROP_EN on each RX queue if appropriate */
6148	return (error);
6149}
6150
6151/*
6152** Manage DMA Coalesce:
6153** Control values:
6154** 	0/1 - off/on
6155**	Legal timer values are:
6156**	250,500,1000-10000 in thousands
6157*/
6158static int
6159igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6160{
6161	struct adapter *adapter = (struct adapter *) arg1;
6162	int		error;
6163
6164	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6165
6166	if ((error) || (req->newptr == NULL))
6167		return (error);
6168
6169	switch (adapter->dmac) {
6170		case 0:
6171			/* Disabling */
6172			break;
6173		case 1: /* Just enable and use default */
6174			adapter->dmac = 1000;
6175			break;
6176		case 250:
6177		case 500:
6178		case 1000:
6179		case 2000:
6180		case 3000:
6181		case 4000:
6182		case 5000:
6183		case 6000:
6184		case 7000:
6185		case 8000:
6186		case 9000:
6187		case 10000:
6188			/* Legal values - allow */
6189			break;
6190		default:
6191			/* Do nothing, illegal value */
6192			adapter->dmac = 0;
6193			return (EINVAL);
6194	}
6195	/* Reinit the interface */
6196	igb_init(adapter);
6197	return (error);
6198}
6199
6200/*
6201** Manage Energy Efficient Ethernet:
6202** Control values:
6203**     0/1 - enabled/disabled
6204*/
6205static int
6206igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6207{
6208	struct adapter	*adapter = (struct adapter *) arg1;
6209	int		error, value;
6210
6211	value = adapter->hw.dev_spec._82575.eee_disable;
6212	error = sysctl_handle_int(oidp, &value, 0, req);
6213	if (error || req->newptr == NULL)
6214		return (error);
6215	IGB_CORE_LOCK(adapter);
6216	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6217	igb_init_locked(adapter);
6218	IGB_CORE_UNLOCK(adapter);
6219	return (0);
6220}
6221