if_igb.c revision 314281
1/******************************************************************************
2
3  Copyright (c) 2001-2015, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/11/sys/dev/e1000/if_igb.c 314281 2017-02-25 20:21:39Z loos $*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38#include "opt_rss.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#include "opt_altq.h"
43#endif
44
45#include "if_igb.h"
46
47/*********************************************************************
48 *  Driver version:
49 *********************************************************************/
50char igb_driver_version[] = "2.5.3-k";
51
52
53/*********************************************************************
54 *  PCI Device ID Table
55 *
56 *  Used by probe to select devices to load on
57 *  Last field stores an index into e1000_strings
58 *  Last entry must be all 0s
59 *
60 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61 *********************************************************************/
62
63static igb_vendor_info_t igb_vendor_info_array[] =
64{
65	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER,	0, 0, 0},
72	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER,	0, 0, 0},
79	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII,	0, 0, 0},
81	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER,	0, 0, 0},
88	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,	0, 0, 0},
89	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES,	0, 0, 0},
90	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,	0, 0, 0},
91	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER,	0, 0, 0},
93	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,	0, 0, 0},
98	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES,	0, 0, 0},
99	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,	0, 0, 0},
100	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER,	0, 0, 0},
101	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,	0, 0, 0},
104	/* required last entry */
105	{0, 0, 0, 0, 0}
106};
107
108/*********************************************************************
109 *  Table of branding strings for all supported NICs.
110 *********************************************************************/
111
112static char *igb_strings[] = {
113	"Intel(R) PRO/1000 Network Connection"
114};
115
116/*********************************************************************
117 *  Function prototypes
118 *********************************************************************/
119static int	igb_probe(device_t);
120static int	igb_attach(device_t);
121static int	igb_detach(device_t);
122static int	igb_shutdown(device_t);
123static int	igb_suspend(device_t);
124static int	igb_resume(device_t);
125#ifndef IGB_LEGACY_TX
126static int	igb_mq_start(struct ifnet *, struct mbuf *);
127static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128static void	igb_qflush(struct ifnet *);
129static void	igb_deferred_mq_start(void *, int);
130#else
131static void	igb_start(struct ifnet *);
132static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133#endif
134static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
135static uint64_t	igb_get_counter(if_t, ift_counter);
136static void	igb_init(void *);
137static void	igb_init_locked(struct adapter *);
138static void	igb_stop(void *);
139static void	igb_media_status(struct ifnet *, struct ifmediareq *);
140static int	igb_media_change(struct ifnet *);
141static void	igb_identify_hardware(struct adapter *);
142static int	igb_allocate_pci_resources(struct adapter *);
143static int	igb_allocate_msix(struct adapter *);
144static int	igb_allocate_legacy(struct adapter *);
145static int	igb_setup_msix(struct adapter *);
146static void	igb_free_pci_resources(struct adapter *);
147static void	igb_local_timer(void *);
148static void	igb_reset(struct adapter *);
149static int	igb_setup_interface(device_t, struct adapter *);
150static int	igb_allocate_queues(struct adapter *);
151static void	igb_configure_queues(struct adapter *);
152
153static int	igb_allocate_transmit_buffers(struct tx_ring *);
154static void	igb_setup_transmit_structures(struct adapter *);
155static void	igb_setup_transmit_ring(struct tx_ring *);
156static void	igb_initialize_transmit_units(struct adapter *);
157static void	igb_free_transmit_structures(struct adapter *);
158static void	igb_free_transmit_buffers(struct tx_ring *);
159
160static int	igb_allocate_receive_buffers(struct rx_ring *);
161static int	igb_setup_receive_structures(struct adapter *);
162static int	igb_setup_receive_ring(struct rx_ring *);
163static void	igb_initialize_receive_units(struct adapter *);
164static void	igb_free_receive_structures(struct adapter *);
165static void	igb_free_receive_buffers(struct rx_ring *);
166static void	igb_free_receive_ring(struct rx_ring *);
167
168static void	igb_enable_intr(struct adapter *);
169static void	igb_disable_intr(struct adapter *);
170static void	igb_update_stats_counters(struct adapter *);
171static bool	igb_txeof(struct tx_ring *);
172
173static __inline	void igb_rx_discard(struct rx_ring *, int);
174static __inline void igb_rx_input(struct rx_ring *,
175		    struct ifnet *, struct mbuf *, u32);
176
177static bool	igb_rxeof(struct igb_queue *, int, int *);
178static void	igb_rx_checksum(u32, struct mbuf *, u32);
179static int	igb_tx_ctx_setup(struct tx_ring *,
180		    struct mbuf *, u32 *, u32 *);
181static int	igb_tso_setup(struct tx_ring *,
182		    struct mbuf *, u32 *, u32 *);
183static void	igb_set_promisc(struct adapter *);
184static void	igb_disable_promisc(struct adapter *);
185static void	igb_set_multi(struct adapter *);
186static void	igb_update_link_status(struct adapter *);
187static void	igb_refresh_mbufs(struct rx_ring *, int);
188
189static void	igb_register_vlan(void *, struct ifnet *, u16);
190static void	igb_unregister_vlan(void *, struct ifnet *, u16);
191static void	igb_setup_vlan_hw_support(struct adapter *);
192
193static int	igb_xmit(struct tx_ring *, struct mbuf **);
194static int	igb_dma_malloc(struct adapter *, bus_size_t,
195		    struct igb_dma_alloc *, int);
196static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198static void	igb_print_nvm_info(struct adapter *);
199static int 	igb_is_valid_ether_addr(u8 *);
200static void     igb_add_hw_stats(struct adapter *);
201
202static void	igb_vf_init_stats(struct adapter *);
203static void	igb_update_vf_stats_counters(struct adapter *);
204
205/* Management and WOL Support */
206static void	igb_init_manageability(struct adapter *);
207static void	igb_release_manageability(struct adapter *);
208static void     igb_get_hw_control(struct adapter *);
209static void     igb_release_hw_control(struct adapter *);
210static void     igb_enable_wakeup(device_t);
211static void     igb_led_func(void *, int);
212
213static int	igb_irq_fast(void *);
214static void	igb_msix_que(void *);
215static void	igb_msix_link(void *);
216static void	igb_handle_que(void *context, int pending);
217static void	igb_handle_link(void *context, int pending);
218static void	igb_handle_link_locked(struct adapter *);
219
220static void	igb_set_sysctl_value(struct adapter *, const char *,
221		    const char *, int *, int);
222static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226#ifdef DEVICE_POLLING
227static poll_handler_t igb_poll;
228#endif /* POLLING */
229
230/*********************************************************************
231 *  FreeBSD Device Interface Entry Points
232 *********************************************************************/
233
234static device_method_t igb_methods[] = {
235	/* Device interface */
236	DEVMETHOD(device_probe, igb_probe),
237	DEVMETHOD(device_attach, igb_attach),
238	DEVMETHOD(device_detach, igb_detach),
239	DEVMETHOD(device_shutdown, igb_shutdown),
240	DEVMETHOD(device_suspend, igb_suspend),
241	DEVMETHOD(device_resume, igb_resume),
242	DEVMETHOD_END
243};
244
245static driver_t igb_driver = {
246	"igb", igb_methods, sizeof(struct adapter),
247};
248
249static devclass_t igb_devclass;
250DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251MODULE_DEPEND(igb, pci, 1, 1, 1);
252MODULE_DEPEND(igb, ether, 1, 1, 1);
253#ifdef DEV_NETMAP
254MODULE_DEPEND(igb, netmap, 1, 1, 1);
255#endif /* DEV_NETMAP */
256
257/*********************************************************************
258 *  Tunable default values.
259 *********************************************************************/
260
261static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263/* Descriptor defaults */
264static int igb_rxd = IGB_DEFAULT_RXD;
265static int igb_txd = IGB_DEFAULT_TXD;
266SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267    "Number of receive descriptors per queue");
268SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269    "Number of transmit descriptors per queue");
270
271/*
272** AIM: Adaptive Interrupt Moderation
273** which means that the interrupt rate
274** is varied over time based on the
275** traffic for that interrupt vector
276*/
277static int igb_enable_aim = TRUE;
278SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279    "Enable adaptive interrupt moderation");
280
281/*
282 * MSIX should be the default for best performance,
283 * but this allows it to be forced off for testing.
284 */
285static int igb_enable_msix = 1;
286SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287    "Enable MSI-X interrupts");
288
289/*
290** Tuneable Interrupt rate
291*/
292static int igb_max_interrupt_rate = 8000;
293SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296#ifndef IGB_LEGACY_TX
297/*
298** Tuneable number of buffers in the buf-ring (drbr_xxx)
299*/
300static int igb_buf_ring_size = IGB_BR_SIZE;
301SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302    &igb_buf_ring_size, 0, "Size of the bufring");
303#endif
304
305/*
306** Header split causes the packet header to
307** be dma'd to a separate mbuf from the payload.
308** this can have memory alignment benefits. But
309** another plus is that small packets often fit
310** into the header and thus use no cluster. Its
311** a very workload dependent type feature.
312*/
313static int igb_header_split = FALSE;
314SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315    "Enable receive mbuf header split");
316
317/*
318** This will autoconfigure based on the
319** number of CPUs and max supported
320** MSIX messages if left at 0.
321*/
322static int igb_num_queues = 0;
323SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324    "Number of queues to configure, 0 indicates autoconfigure");
325
326/*
327** Global variable to store last used CPU when binding queues
328** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
329** queue is bound to a cpu.
330*/
331static int igb_last_bind_cpu = -1;
332
333/* How many packets rxeof tries to clean at a time */
334static int igb_rx_process_limit = 100;
335SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336    &igb_rx_process_limit, 0,
337    "Maximum number of received packets to process at a time, -1 means unlimited");
338
339/* How many packets txeof tries to clean at a time */
340static int igb_tx_process_limit = -1;
341SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342    &igb_tx_process_limit, 0,
343    "Maximum number of sent packets to process at a time, -1 means unlimited");
344
345#ifdef DEV_NETMAP	/* see ixgbe.c for details */
346#include <dev/netmap/if_igb_netmap.h>
347#endif /* DEV_NETMAP */
348/*********************************************************************
349 *  Device identification routine
350 *
351 *  igb_probe determines if the driver should be loaded on
352 *  adapter based on PCI vendor/device id of the adapter.
353 *
354 *  return BUS_PROBE_DEFAULT on success, positive on failure
355 *********************************************************************/
356
357static int
358igb_probe(device_t dev)
359{
360	char		adapter_name[256];
361	uint16_t	pci_vendor_id = 0;
362	uint16_t	pci_device_id = 0;
363	uint16_t	pci_subvendor_id = 0;
364	uint16_t	pci_subdevice_id = 0;
365	igb_vendor_info_t *ent;
366
367	INIT_DEBUGOUT("igb_probe: begin");
368
369	pci_vendor_id = pci_get_vendor(dev);
370	if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371		return (ENXIO);
372
373	pci_device_id = pci_get_device(dev);
374	pci_subvendor_id = pci_get_subvendor(dev);
375	pci_subdevice_id = pci_get_subdevice(dev);
376
377	ent = igb_vendor_info_array;
378	while (ent->vendor_id != 0) {
379		if ((pci_vendor_id == ent->vendor_id) &&
380		    (pci_device_id == ent->device_id) &&
381
382		    ((pci_subvendor_id == ent->subvendor_id) ||
383		    (ent->subvendor_id == 0)) &&
384
385		    ((pci_subdevice_id == ent->subdevice_id) ||
386		    (ent->subdevice_id == 0))) {
387			sprintf(adapter_name, "%s, Version - %s",
388				igb_strings[ent->index],
389				igb_driver_version);
390			device_set_desc_copy(dev, adapter_name);
391			return (BUS_PROBE_DEFAULT);
392		}
393		ent++;
394	}
395	return (ENXIO);
396}
397
398/*********************************************************************
399 *  Device initialization routine
400 *
401 *  The attach entry point is called when the driver is being loaded.
402 *  This routine identifies the type of hardware, allocates all resources
403 *  and initializes the hardware.
404 *
405 *  return 0 on success, positive on failure
406 *********************************************************************/
407
408static int
409igb_attach(device_t dev)
410{
411	struct adapter	*adapter;
412	int		error = 0;
413	u16		eeprom_data;
414
415	INIT_DEBUGOUT("igb_attach: begin");
416
417	if (resource_disabled("igb", device_get_unit(dev))) {
418		device_printf(dev, "Disabled by device hint\n");
419		return (ENXIO);
420	}
421
422	adapter = device_get_softc(dev);
423	adapter->dev = adapter->osdep.dev = dev;
424	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425
426	/* SYSCTLs */
427	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430	    igb_sysctl_nvm_info, "I", "NVM Information");
431
432	igb_set_sysctl_value(adapter, "enable_aim",
433	    "Interrupt Moderation", &adapter->enable_aim,
434	    igb_enable_aim);
435
436	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440
441	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442
443	/* Determine hardware and mac info */
444	igb_identify_hardware(adapter);
445
446	/* Setup PCI resources */
447	if (igb_allocate_pci_resources(adapter)) {
448		device_printf(dev, "Allocation of PCI resources failed\n");
449		error = ENXIO;
450		goto err_pci;
451	}
452
453	/* Do Shared Code initialization */
454	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455		device_printf(dev, "Setup of Shared code failed\n");
456		error = ENXIO;
457		goto err_pci;
458	}
459
460	e1000_get_bus_info(&adapter->hw);
461
462	/* Sysctls for limiting the amount of work done in the taskqueues */
463	igb_set_sysctl_value(adapter, "rx_processing_limit",
464	    "max number of rx packets to process",
465	    &adapter->rx_process_limit, igb_rx_process_limit);
466
467	igb_set_sysctl_value(adapter, "tx_processing_limit",
468	    "max number of tx packets to process",
469	    &adapter->tx_process_limit, igb_tx_process_limit);
470
471	/*
472	 * Validate number of transmit and receive descriptors. It
473	 * must not exceed hardware maximum, and must be multiple
474	 * of E1000_DBA_ALIGN.
475	 */
476	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479		    IGB_DEFAULT_TXD, igb_txd);
480		adapter->num_tx_desc = IGB_DEFAULT_TXD;
481	} else
482		adapter->num_tx_desc = igb_txd;
483	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486		    IGB_DEFAULT_RXD, igb_rxd);
487		adapter->num_rx_desc = IGB_DEFAULT_RXD;
488	} else
489		adapter->num_rx_desc = igb_rxd;
490
491	adapter->hw.mac.autoneg = DO_AUTO_NEG;
492	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494
495	/* Copper options */
496	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497		adapter->hw.phy.mdix = AUTO_ALL_MODES;
498		adapter->hw.phy.disable_polarity_correction = FALSE;
499		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500	}
501
502	/*
503	 * Set the frame limits assuming
504	 * standard ethernet sized frames.
505	 */
506	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507
508	/*
509	** Allocate and Setup Queues
510	*/
511	if (igb_allocate_queues(adapter)) {
512		error = ENOMEM;
513		goto err_pci;
514	}
515
516	/* Allocate the appropriate stats memory */
517	if (adapter->vf_ifp) {
518		adapter->stats =
519		    (struct e1000_vf_stats *)malloc(sizeof \
520		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521		igb_vf_init_stats(adapter);
522	} else
523		adapter->stats =
524		    (struct e1000_hw_stats *)malloc(sizeof \
525		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526	if (adapter->stats == NULL) {
527		device_printf(dev, "Can not allocate stats memory\n");
528		error = ENOMEM;
529		goto err_late;
530	}
531
532	/* Allocate multicast array memory. */
533	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535	if (adapter->mta == NULL) {
536		device_printf(dev, "Can not allocate multicast setup array\n");
537		error = ENOMEM;
538		goto err_late;
539	}
540
541	/* Some adapter-specific advanced features */
542	if (adapter->hw.mac.type >= e1000_i350) {
543		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550		    adapter, 0, igb_sysctl_eee, "I",
551		    "Disable Energy Efficient Ethernet");
552		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553			if (adapter->hw.mac.type == e1000_i354)
554				e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
555			else
556				e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
557		}
558	}
559
560	/*
561	** Start from a known state, this is
562	** important in reading the nvm and
563	** mac from that.
564	*/
565	e1000_reset_hw(&adapter->hw);
566
567	/* Make sure we have a good EEPROM before we read from it */
568	if (((adapter->hw.mac.type != e1000_i210) &&
569	    (adapter->hw.mac.type != e1000_i211)) &&
570	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571		/*
572		** Some PCI-E parts fail the first check due to
573		** the link being in sleep state, call it again,
574		** if it fails a second time its a real issue.
575		*/
576		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577			device_printf(dev,
578			    "The EEPROM Checksum Is Not Valid\n");
579			error = EIO;
580			goto err_late;
581		}
582	}
583
584	/*
585	** Copy the permanent MAC address out of the EEPROM
586	*/
587	if (e1000_read_mac_addr(&adapter->hw) < 0) {
588		device_printf(dev, "EEPROM read error while reading MAC"
589		    " address\n");
590		error = EIO;
591		goto err_late;
592	}
593	/* Check its sanity */
594	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595		device_printf(dev, "Invalid MAC address\n");
596		error = EIO;
597		goto err_late;
598	}
599
600	/* Setup OS specific network interface */
601	if (igb_setup_interface(dev, adapter) != 0)
602		goto err_late;
603
604	/* Now get a good starting state */
605	igb_reset(adapter);
606
607	/* Initialize statistics */
608	igb_update_stats_counters(adapter);
609
610	adapter->hw.mac.get_link_status = 1;
611	igb_update_link_status(adapter);
612
613	/* Indicate SOL/IDER usage */
614	if (e1000_check_reset_block(&adapter->hw))
615		device_printf(dev,
616		    "PHY reset is blocked due to SOL/IDER session.\n");
617
618	/* Determine if we have to control management hardware */
619	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620
621	/*
622	 * Setup Wake-on-Lan
623	 */
624	/* APME bit in EEPROM is mapped to WUC.APME */
625	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626	if (eeprom_data)
627		adapter->wol = E1000_WUFC_MAG;
628
629	/* Register for VLAN events */
630	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634
635	igb_add_hw_stats(adapter);
636
637	/* Tell the stack that the interface is not active */
638	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
640
641	adapter->led_dev = led_create(igb_led_func, adapter,
642	    device_get_nameunit(dev));
643
644	/*
645	** Configure Interrupts
646	*/
647	if ((adapter->msix > 1) && (igb_enable_msix))
648		error = igb_allocate_msix(adapter);
649	else /* MSI or Legacy */
650		error = igb_allocate_legacy(adapter);
651	if (error)
652		goto err_late;
653
654#ifdef DEV_NETMAP
655	igb_netmap_attach(adapter);
656#endif /* DEV_NETMAP */
657	INIT_DEBUGOUT("igb_attach: end");
658
659	return (0);
660
661err_late:
662	if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */
663		return(error);
664	igb_free_transmit_structures(adapter);
665	igb_free_receive_structures(adapter);
666	igb_release_hw_control(adapter);
667err_pci:
668	igb_free_pci_resources(adapter);
669	if (adapter->ifp != NULL)
670		if_free(adapter->ifp);
671	free(adapter->mta, M_DEVBUF);
672	IGB_CORE_LOCK_DESTROY(adapter);
673
674	return (error);
675}
676
677/*********************************************************************
678 *  Device removal routine
679 *
680 *  The detach entry point is called when the driver is being removed.
681 *  This routine stops the adapter and deallocates all the resources
682 *  that were allocated for driver operation.
683 *
684 *  return 0 on success, positive on failure
685 *********************************************************************/
686
687static int
688igb_detach(device_t dev)
689{
690	struct adapter	*adapter = device_get_softc(dev);
691	struct ifnet	*ifp = adapter->ifp;
692
693	INIT_DEBUGOUT("igb_detach: begin");
694
695	/* Make sure VLANS are not using driver */
696	if (adapter->ifp->if_vlantrunk != NULL) {
697		device_printf(dev,"Vlan in use, detach first\n");
698		return (EBUSY);
699	}
700
701	ether_ifdetach(adapter->ifp);
702
703	if (adapter->led_dev != NULL)
704		led_destroy(adapter->led_dev);
705
706#ifdef DEVICE_POLLING
707	if (ifp->if_capenable & IFCAP_POLLING)
708		ether_poll_deregister(ifp);
709#endif
710
711	IGB_CORE_LOCK(adapter);
712	adapter->in_detach = 1;
713	igb_stop(adapter);
714	IGB_CORE_UNLOCK(adapter);
715
716	e1000_phy_hw_reset(&adapter->hw);
717
718	/* Give control back to firmware */
719	igb_release_manageability(adapter);
720	igb_release_hw_control(adapter);
721
722	if (adapter->wol) {
723		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
724		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
725		igb_enable_wakeup(dev);
726	}
727
728	/* Unregister VLAN events */
729	if (adapter->vlan_attach != NULL)
730		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
731	if (adapter->vlan_detach != NULL)
732		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
733
734	callout_drain(&adapter->timer);
735
736#ifdef DEV_NETMAP
737	netmap_detach(adapter->ifp);
738#endif /* DEV_NETMAP */
739	igb_free_pci_resources(adapter);
740	bus_generic_detach(dev);
741	if_free(ifp);
742
743	igb_free_transmit_structures(adapter);
744	igb_free_receive_structures(adapter);
745	if (adapter->mta != NULL)
746		free(adapter->mta, M_DEVBUF);
747
748	IGB_CORE_LOCK_DESTROY(adapter);
749
750	return (0);
751}
752
753/*********************************************************************
754 *
755 *  Shutdown entry point
756 *
757 **********************************************************************/
758
759static int
760igb_shutdown(device_t dev)
761{
762	return igb_suspend(dev);
763}
764
765/*
766 * Suspend/resume device methods.
767 */
768static int
769igb_suspend(device_t dev)
770{
771	struct adapter *adapter = device_get_softc(dev);
772
773	IGB_CORE_LOCK(adapter);
774
775	igb_stop(adapter);
776
777        igb_release_manageability(adapter);
778	igb_release_hw_control(adapter);
779
780        if (adapter->wol) {
781                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
782                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
783                igb_enable_wakeup(dev);
784        }
785
786	IGB_CORE_UNLOCK(adapter);
787
788	return bus_generic_suspend(dev);
789}
790
791static int
792igb_resume(device_t dev)
793{
794	struct adapter *adapter = device_get_softc(dev);
795	struct tx_ring	*txr = adapter->tx_rings;
796	struct ifnet *ifp = adapter->ifp;
797
798	IGB_CORE_LOCK(adapter);
799	igb_init_locked(adapter);
800	igb_init_manageability(adapter);
801
802	if ((ifp->if_flags & IFF_UP) &&
803	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
804		for (int i = 0; i < adapter->num_queues; i++, txr++) {
805			IGB_TX_LOCK(txr);
806#ifndef IGB_LEGACY_TX
807			/* Process the stack queue only if not depleted */
808			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
809			    !drbr_empty(ifp, txr->br))
810				igb_mq_start_locked(ifp, txr);
811#else
812			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
813				igb_start_locked(txr, ifp);
814#endif
815			IGB_TX_UNLOCK(txr);
816		}
817	}
818	IGB_CORE_UNLOCK(adapter);
819
820	return bus_generic_resume(dev);
821}
822
823
824#ifdef IGB_LEGACY_TX
825
826/*********************************************************************
827 *  Transmit entry point
828 *
829 *  igb_start is called by the stack to initiate a transmit.
830 *  The driver will remain in this routine as long as there are
831 *  packets to transmit and transmit resources are available.
832 *  In case resources are not available stack is notified and
833 *  the packet is requeued.
834 **********************************************************************/
835
836static void
837igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
838{
839	struct adapter	*adapter = ifp->if_softc;
840	struct mbuf	*m_head;
841
842	IGB_TX_LOCK_ASSERT(txr);
843
844	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
845	    IFF_DRV_RUNNING)
846		return;
847	if (!adapter->link_active)
848		return;
849
850	/* Call cleanup if number of TX descriptors low */
851	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
852		igb_txeof(txr);
853
854	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
855		if (txr->tx_avail <= IGB_MAX_SCATTER) {
856			txr->queue_status |= IGB_QUEUE_DEPLETED;
857			break;
858		}
859		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
860		if (m_head == NULL)
861			break;
862		/*
863		 *  Encapsulation can modify our pointer, and or make it
864		 *  NULL on failure.  In that event, we can't requeue.
865		 */
866		if (igb_xmit(txr, &m_head)) {
867			if (m_head != NULL)
868				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
869			if (txr->tx_avail <= IGB_MAX_SCATTER)
870				txr->queue_status |= IGB_QUEUE_DEPLETED;
871			break;
872		}
873
874		/* Send a copy of the frame to the BPF listener */
875		ETHER_BPF_MTAP(ifp, m_head);
876
877		/* Set watchdog on */
878		txr->watchdog_time = ticks;
879		txr->queue_status |= IGB_QUEUE_WORKING;
880	}
881}
882
883/*
884 * Legacy TX driver routine, called from the
885 * stack, always uses tx[0], and spins for it.
886 * Should not be used with multiqueue tx
887 */
888static void
889igb_start(struct ifnet *ifp)
890{
891	struct adapter	*adapter = ifp->if_softc;
892	struct tx_ring	*txr = adapter->tx_rings;
893
894	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
895		IGB_TX_LOCK(txr);
896		igb_start_locked(txr, ifp);
897		IGB_TX_UNLOCK(txr);
898	}
899	return;
900}
901
902#else /* ~IGB_LEGACY_TX */
903
904/*
905** Multiqueue Transmit Entry:
906**  quick turnaround to the stack
907**
908*/
909static int
910igb_mq_start(struct ifnet *ifp, struct mbuf *m)
911{
912	struct adapter		*adapter = ifp->if_softc;
913	struct igb_queue	*que;
914	struct tx_ring		*txr;
915	int 			i, err = 0;
916#ifdef	RSS
917	uint32_t		bucket_id;
918#endif
919
920	/* Which queue to use */
921	/*
922	 * When doing RSS, map it to the same outbound queue
923	 * as the incoming flow would be mapped to.
924	 *
925	 * If everything is setup correctly, it should be the
926	 * same bucket that the current CPU we're on is.
927	 */
928	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
929#ifdef	RSS
930		if (rss_hash2bucket(m->m_pkthdr.flowid,
931		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
932			/* XXX TODO: spit out something if bucket_id > num_queues? */
933			i = bucket_id % adapter->num_queues;
934		} else {
935#endif
936			i = m->m_pkthdr.flowid % adapter->num_queues;
937#ifdef	RSS
938		}
939#endif
940	} else {
941		i = curcpu % adapter->num_queues;
942	}
943	txr = &adapter->tx_rings[i];
944	que = &adapter->queues[i];
945
946	err = drbr_enqueue(ifp, txr->br, m);
947	if (err)
948		return (err);
949	if (IGB_TX_TRYLOCK(txr)) {
950		igb_mq_start_locked(ifp, txr);
951		IGB_TX_UNLOCK(txr);
952	} else
953		taskqueue_enqueue(que->tq, &txr->txq_task);
954
955	return (0);
956}
957
958static int
959igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
960{
961	struct adapter  *adapter = txr->adapter;
962        struct mbuf     *next;
963        int             err = 0, enq = 0;
964
965	IGB_TX_LOCK_ASSERT(txr);
966
967	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
968	    adapter->link_active == 0)
969		return (ENETDOWN);
970
971	/* Process the queue */
972	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
973		if ((err = igb_xmit(txr, &next)) != 0) {
974			if (next == NULL) {
975				/* It was freed, move forward */
976				drbr_advance(ifp, txr->br);
977			} else {
978				/*
979				 * Still have one left, it may not be
980				 * the same since the transmit function
981				 * may have changed it.
982				 */
983				drbr_putback(ifp, txr->br, next);
984			}
985			break;
986		}
987		drbr_advance(ifp, txr->br);
988		enq++;
989		if (next->m_flags & M_MCAST && adapter->vf_ifp)
990			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
991		ETHER_BPF_MTAP(ifp, next);
992		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
993			break;
994	}
995	if (enq > 0) {
996		/* Set the watchdog */
997		txr->queue_status |= IGB_QUEUE_WORKING;
998		txr->watchdog_time = ticks;
999	}
1000	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1001		igb_txeof(txr);
1002	if (txr->tx_avail <= IGB_MAX_SCATTER)
1003		txr->queue_status |= IGB_QUEUE_DEPLETED;
1004	return (err);
1005}
1006
1007/*
1008 * Called from a taskqueue to drain queued transmit packets.
1009 */
1010static void
1011igb_deferred_mq_start(void *arg, int pending)
1012{
1013	struct tx_ring *txr = arg;
1014	struct adapter *adapter = txr->adapter;
1015	struct ifnet *ifp = adapter->ifp;
1016
1017	IGB_TX_LOCK(txr);
1018	if (!drbr_empty(ifp, txr->br))
1019		igb_mq_start_locked(ifp, txr);
1020	IGB_TX_UNLOCK(txr);
1021}
1022
1023/*
1024** Flush all ring buffers
1025*/
1026static void
1027igb_qflush(struct ifnet *ifp)
1028{
1029	struct adapter	*adapter = ifp->if_softc;
1030	struct tx_ring	*txr = adapter->tx_rings;
1031	struct mbuf	*m;
1032
1033	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1034		IGB_TX_LOCK(txr);
1035		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1036			m_freem(m);
1037		IGB_TX_UNLOCK(txr);
1038	}
1039	if_qflush(ifp);
1040}
1041#endif /* ~IGB_LEGACY_TX */
1042
1043/*********************************************************************
1044 *  Ioctl entry point
1045 *
1046 *  igb_ioctl is called when the user wants to configure the
1047 *  interface.
1048 *
1049 *  return 0 on success, positive on failure
1050 **********************************************************************/
1051
1052static int
1053igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1054{
1055	struct adapter	*adapter = ifp->if_softc;
1056	struct ifreq	*ifr = (struct ifreq *)data;
1057#if defined(INET) || defined(INET6)
1058	struct ifaddr	*ifa = (struct ifaddr *)data;
1059#endif
1060	bool		avoid_reset = FALSE;
1061	int		error = 0;
1062
1063	if (adapter->in_detach)
1064		return (error);
1065
1066	switch (command) {
1067	case SIOCSIFADDR:
1068#ifdef INET
1069		if (ifa->ifa_addr->sa_family == AF_INET)
1070			avoid_reset = TRUE;
1071#endif
1072#ifdef INET6
1073		if (ifa->ifa_addr->sa_family == AF_INET6)
1074			avoid_reset = TRUE;
1075#endif
1076		/*
1077		** Calling init results in link renegotiation,
1078		** so we avoid doing it when possible.
1079		*/
1080		if (avoid_reset) {
1081			ifp->if_flags |= IFF_UP;
1082			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1083				igb_init(adapter);
1084#ifdef INET
1085			if (!(ifp->if_flags & IFF_NOARP))
1086				arp_ifinit(ifp, ifa);
1087#endif
1088		} else
1089			error = ether_ioctl(ifp, command, data);
1090		break;
1091	case SIOCSIFMTU:
1092	    {
1093		int max_frame_size;
1094
1095		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1096
1097		IGB_CORE_LOCK(adapter);
1098		max_frame_size = 9234;
1099		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1100		    ETHER_CRC_LEN) {
1101			IGB_CORE_UNLOCK(adapter);
1102			error = EINVAL;
1103			break;
1104		}
1105
1106		ifp->if_mtu = ifr->ifr_mtu;
1107		adapter->max_frame_size =
1108		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1109		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1110			igb_init_locked(adapter);
1111		IGB_CORE_UNLOCK(adapter);
1112		break;
1113	    }
1114	case SIOCSIFFLAGS:
1115		IOCTL_DEBUGOUT("ioctl rcv'd:\
1116		    SIOCSIFFLAGS (Set Interface Flags)");
1117		IGB_CORE_LOCK(adapter);
1118		if (ifp->if_flags & IFF_UP) {
1119			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1120				if ((ifp->if_flags ^ adapter->if_flags) &
1121				    (IFF_PROMISC | IFF_ALLMULTI)) {
1122					igb_disable_promisc(adapter);
1123					igb_set_promisc(adapter);
1124				}
1125			} else
1126				igb_init_locked(adapter);
1127		} else
1128			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1129				igb_stop(adapter);
1130		adapter->if_flags = ifp->if_flags;
1131		IGB_CORE_UNLOCK(adapter);
1132		break;
1133	case SIOCADDMULTI:
1134	case SIOCDELMULTI:
1135		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1136		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1137			IGB_CORE_LOCK(adapter);
1138			igb_disable_intr(adapter);
1139			igb_set_multi(adapter);
1140#ifdef DEVICE_POLLING
1141			if (!(ifp->if_capenable & IFCAP_POLLING))
1142#endif
1143				igb_enable_intr(adapter);
1144			IGB_CORE_UNLOCK(adapter);
1145		}
1146		break;
1147	case SIOCSIFMEDIA:
1148		/* Check SOL/IDER usage */
1149		IGB_CORE_LOCK(adapter);
1150		if (e1000_check_reset_block(&adapter->hw)) {
1151			IGB_CORE_UNLOCK(adapter);
1152			device_printf(adapter->dev, "Media change is"
1153			    " blocked due to SOL/IDER session.\n");
1154			break;
1155		}
1156		IGB_CORE_UNLOCK(adapter);
1157	case SIOCGIFMEDIA:
1158		IOCTL_DEBUGOUT("ioctl rcv'd: \
1159		    SIOCxIFMEDIA (Get/Set Interface Media)");
1160		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1161		break;
1162	case SIOCSIFCAP:
1163	    {
1164		int mask, reinit;
1165
1166		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1167		reinit = 0;
1168		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1169#ifdef DEVICE_POLLING
1170		if (mask & IFCAP_POLLING) {
1171			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1172				error = ether_poll_register(igb_poll, ifp);
1173				if (error)
1174					return (error);
1175				IGB_CORE_LOCK(adapter);
1176				igb_disable_intr(adapter);
1177				ifp->if_capenable |= IFCAP_POLLING;
1178				IGB_CORE_UNLOCK(adapter);
1179			} else {
1180				error = ether_poll_deregister(ifp);
1181				/* Enable interrupt even in error case */
1182				IGB_CORE_LOCK(adapter);
1183				igb_enable_intr(adapter);
1184				ifp->if_capenable &= ~IFCAP_POLLING;
1185				IGB_CORE_UNLOCK(adapter);
1186			}
1187		}
1188#endif
1189#if __FreeBSD_version >= 1000000
1190		/* HW cannot turn these on/off separately */
1191		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1192			ifp->if_capenable ^= IFCAP_RXCSUM;
1193			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1194			reinit = 1;
1195		}
1196		if (mask & IFCAP_TXCSUM) {
1197			ifp->if_capenable ^= IFCAP_TXCSUM;
1198			reinit = 1;
1199		}
1200		if (mask & IFCAP_TXCSUM_IPV6) {
1201			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1202			reinit = 1;
1203		}
1204#else
1205		if (mask & IFCAP_HWCSUM) {
1206			ifp->if_capenable ^= IFCAP_HWCSUM;
1207			reinit = 1;
1208		}
1209#endif
1210		if (mask & IFCAP_TSO4) {
1211			ifp->if_capenable ^= IFCAP_TSO4;
1212			reinit = 1;
1213		}
1214		if (mask & IFCAP_TSO6) {
1215			ifp->if_capenable ^= IFCAP_TSO6;
1216			reinit = 1;
1217		}
1218		if (mask & IFCAP_VLAN_HWTAGGING) {
1219			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1220			reinit = 1;
1221		}
1222		if (mask & IFCAP_VLAN_HWFILTER) {
1223			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1224			reinit = 1;
1225		}
1226		if (mask & IFCAP_VLAN_HWTSO) {
1227			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1228			reinit = 1;
1229		}
1230		if (mask & IFCAP_LRO) {
1231			ifp->if_capenable ^= IFCAP_LRO;
1232			reinit = 1;
1233		}
1234		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1235			igb_init(adapter);
1236		VLAN_CAPABILITIES(ifp);
1237		break;
1238	    }
1239
1240	default:
1241		error = ether_ioctl(ifp, command, data);
1242		break;
1243	}
1244
1245	return (error);
1246}
1247
1248
1249/*********************************************************************
1250 *  Init entry point
1251 *
1252 *  This routine is used in two ways. It is used by the stack as
1253 *  init entry point in network interface structure. It is also used
1254 *  by the driver as a hw/sw initialization routine to get to a
1255 *  consistent state.
1256 *
1257 *  return 0 on success, positive on failure
1258 **********************************************************************/
1259
1260static void
1261igb_init_locked(struct adapter *adapter)
1262{
1263	struct ifnet	*ifp = adapter->ifp;
1264	device_t	dev = adapter->dev;
1265
1266	INIT_DEBUGOUT("igb_init: begin");
1267
1268	IGB_CORE_LOCK_ASSERT(adapter);
1269
1270	igb_disable_intr(adapter);
1271	callout_stop(&adapter->timer);
1272
1273	/* Get the latest mac address, User can use a LAA */
1274        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1275              ETHER_ADDR_LEN);
1276
1277	/* Put the address into the Receive Address Array */
1278	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1279
1280	igb_reset(adapter);
1281	igb_update_link_status(adapter);
1282
1283	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1284
1285	/* Set hardware offload abilities */
1286	ifp->if_hwassist = 0;
1287	if (ifp->if_capenable & IFCAP_TXCSUM) {
1288#if __FreeBSD_version >= 1000000
1289		ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
1290		if (adapter->hw.mac.type != e1000_82575)
1291			ifp->if_hwassist |= CSUM_IP_SCTP;
1292#else
1293		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1294#if __FreeBSD_version >= 800000
1295		if (adapter->hw.mac.type != e1000_82575)
1296			ifp->if_hwassist |= CSUM_SCTP;
1297#endif
1298#endif
1299	}
1300
1301#if __FreeBSD_version >= 1000000
1302	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
1303		ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
1304		if (adapter->hw.mac.type != e1000_82575)
1305			ifp->if_hwassist |= CSUM_IP6_SCTP;
1306	}
1307#endif
1308	if (ifp->if_capenable & IFCAP_TSO)
1309		ifp->if_hwassist |= CSUM_TSO;
1310
1311	/* Clear bad data from Rx FIFOs */
1312	e1000_rx_fifo_flush_82575(&adapter->hw);
1313
1314	/* Configure for OS presence */
1315	igb_init_manageability(adapter);
1316
1317	/* Prepare transmit descriptors and buffers */
1318	igb_setup_transmit_structures(adapter);
1319	igb_initialize_transmit_units(adapter);
1320
1321	/* Setup Multicast table */
1322	igb_set_multi(adapter);
1323
1324	/*
1325	** Figure out the desired mbuf pool
1326	** for doing jumbo/packetsplit
1327	*/
1328	if (adapter->max_frame_size <= 2048)
1329		adapter->rx_mbuf_sz = MCLBYTES;
1330	else if (adapter->max_frame_size <= 4096)
1331		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1332	else
1333		adapter->rx_mbuf_sz = MJUM9BYTES;
1334
1335	/* Prepare receive descriptors and buffers */
1336	if (igb_setup_receive_structures(adapter)) {
1337		device_printf(dev, "Could not setup receive structures\n");
1338		return;
1339	}
1340	igb_initialize_receive_units(adapter);
1341
1342        /* Enable VLAN support */
1343	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1344		igb_setup_vlan_hw_support(adapter);
1345
1346	/* Don't lose promiscuous settings */
1347	igb_set_promisc(adapter);
1348
1349	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1350	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1351
1352	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1353	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1354
1355	if (adapter->msix > 1) /* Set up queue routing */
1356		igb_configure_queues(adapter);
1357
1358	/* this clears any pending interrupts */
1359	E1000_READ_REG(&adapter->hw, E1000_ICR);
1360#ifdef DEVICE_POLLING
1361	/*
1362	 * Only enable interrupts if we are not polling, make sure
1363	 * they are off otherwise.
1364	 */
1365	if (ifp->if_capenable & IFCAP_POLLING)
1366		igb_disable_intr(adapter);
1367	else
1368#endif /* DEVICE_POLLING */
1369	{
1370		igb_enable_intr(adapter);
1371		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1372	}
1373
1374	/* Set Energy Efficient Ethernet */
1375	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1376		if (adapter->hw.mac.type == e1000_i354)
1377			e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1378		else
1379			e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1380	}
1381}
1382
1383static void
1384igb_init(void *arg)
1385{
1386	struct adapter *adapter = arg;
1387
1388	IGB_CORE_LOCK(adapter);
1389	igb_init_locked(adapter);
1390	IGB_CORE_UNLOCK(adapter);
1391}
1392
1393
1394static void
1395igb_handle_que(void *context, int pending)
1396{
1397	struct igb_queue *que = context;
1398	struct adapter *adapter = que->adapter;
1399	struct tx_ring *txr = que->txr;
1400	struct ifnet	*ifp = adapter->ifp;
1401
1402	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1403		bool	more;
1404
1405		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1406
1407		IGB_TX_LOCK(txr);
1408		igb_txeof(txr);
1409#ifndef IGB_LEGACY_TX
1410		/* Process the stack queue only if not depleted */
1411		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1412		    !drbr_empty(ifp, txr->br))
1413			igb_mq_start_locked(ifp, txr);
1414#else
1415		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1416			igb_start_locked(txr, ifp);
1417#endif
1418		IGB_TX_UNLOCK(txr);
1419		/* Do we need another? */
1420		if (more) {
1421			taskqueue_enqueue(que->tq, &que->que_task);
1422			return;
1423		}
1424	}
1425
1426#ifdef DEVICE_POLLING
1427	if (ifp->if_capenable & IFCAP_POLLING)
1428		return;
1429#endif
1430	/* Reenable this interrupt */
1431	if (que->eims)
1432		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1433	else
1434		igb_enable_intr(adapter);
1435}
1436
1437/* Deal with link in a sleepable context */
1438static void
1439igb_handle_link(void *context, int pending)
1440{
1441	struct adapter *adapter = context;
1442
1443	IGB_CORE_LOCK(adapter);
1444	igb_handle_link_locked(adapter);
1445	IGB_CORE_UNLOCK(adapter);
1446}
1447
1448static void
1449igb_handle_link_locked(struct adapter *adapter)
1450{
1451	struct tx_ring	*txr = adapter->tx_rings;
1452	struct ifnet *ifp = adapter->ifp;
1453
1454	IGB_CORE_LOCK_ASSERT(adapter);
1455	adapter->hw.mac.get_link_status = 1;
1456	igb_update_link_status(adapter);
1457	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1458		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1459			IGB_TX_LOCK(txr);
1460#ifndef IGB_LEGACY_TX
1461			/* Process the stack queue only if not depleted */
1462			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1463			    !drbr_empty(ifp, txr->br))
1464				igb_mq_start_locked(ifp, txr);
1465#else
1466			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1467				igb_start_locked(txr, ifp);
1468#endif
1469			IGB_TX_UNLOCK(txr);
1470		}
1471	}
1472}
1473
1474/*********************************************************************
1475 *
1476 *  MSI/Legacy Deferred
1477 *  Interrupt Service routine
1478 *
1479 *********************************************************************/
1480static int
1481igb_irq_fast(void *arg)
1482{
1483	struct adapter		*adapter = arg;
1484	struct igb_queue	*que = adapter->queues;
1485	u32			reg_icr;
1486
1487
1488	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1489
1490	/* Hot eject?  */
1491	if (reg_icr == 0xffffffff)
1492		return FILTER_STRAY;
1493
1494	/* Definitely not our interrupt.  */
1495	if (reg_icr == 0x0)
1496		return FILTER_STRAY;
1497
1498	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1499		return FILTER_STRAY;
1500
1501	/*
1502	 * Mask interrupts until the taskqueue is finished running.  This is
1503	 * cheap, just assume that it is needed.  This also works around the
1504	 * MSI message reordering errata on certain systems.
1505	 */
1506	igb_disable_intr(adapter);
1507	taskqueue_enqueue(que->tq, &que->que_task);
1508
1509	/* Link status change */
1510	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1511		taskqueue_enqueue(que->tq, &adapter->link_task);
1512
1513	if (reg_icr & E1000_ICR_RXO)
1514		adapter->rx_overruns++;
1515	return FILTER_HANDLED;
1516}
1517
1518#ifdef DEVICE_POLLING
1519#if __FreeBSD_version >= 800000
1520#define POLL_RETURN_COUNT(a) (a)
1521static int
1522#else
1523#define POLL_RETURN_COUNT(a)
1524static void
1525#endif
1526igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1527{
1528	struct adapter		*adapter = ifp->if_softc;
1529	struct igb_queue	*que;
1530	struct tx_ring		*txr;
1531	u32			reg_icr, rx_done = 0;
1532	u32			loop = IGB_MAX_LOOP;
1533	bool			more;
1534
1535	IGB_CORE_LOCK(adapter);
1536	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1537		IGB_CORE_UNLOCK(adapter);
1538		return POLL_RETURN_COUNT(rx_done);
1539	}
1540
1541	if (cmd == POLL_AND_CHECK_STATUS) {
1542		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1543		/* Link status change */
1544		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1545			igb_handle_link_locked(adapter);
1546
1547		if (reg_icr & E1000_ICR_RXO)
1548			adapter->rx_overruns++;
1549	}
1550	IGB_CORE_UNLOCK(adapter);
1551
1552	for (int i = 0; i < adapter->num_queues; i++) {
1553		que = &adapter->queues[i];
1554		txr = que->txr;
1555
1556		igb_rxeof(que, count, &rx_done);
1557
1558		IGB_TX_LOCK(txr);
1559		do {
1560			more = igb_txeof(txr);
1561		} while (loop-- && more);
1562#ifndef IGB_LEGACY_TX
1563		if (!drbr_empty(ifp, txr->br))
1564			igb_mq_start_locked(ifp, txr);
1565#else
1566		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1567			igb_start_locked(txr, ifp);
1568#endif
1569		IGB_TX_UNLOCK(txr);
1570	}
1571
1572	return POLL_RETURN_COUNT(rx_done);
1573}
1574#endif /* DEVICE_POLLING */
1575
1576/*********************************************************************
1577 *
1578 *  MSIX Que Interrupt Service routine
1579 *
1580 **********************************************************************/
1581static void
1582igb_msix_que(void *arg)
1583{
1584	struct igb_queue *que = arg;
1585	struct adapter *adapter = que->adapter;
1586	struct ifnet   *ifp = adapter->ifp;
1587	struct tx_ring *txr = que->txr;
1588	struct rx_ring *rxr = que->rxr;
1589	u32		newitr = 0;
1590	bool		more_rx;
1591
1592	/* Ignore spurious interrupts */
1593	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1594		return;
1595
1596	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1597	++que->irqs;
1598
1599	IGB_TX_LOCK(txr);
1600	igb_txeof(txr);
1601#ifndef IGB_LEGACY_TX
1602	/* Process the stack queue only if not depleted */
1603	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1604	    !drbr_empty(ifp, txr->br))
1605		igb_mq_start_locked(ifp, txr);
1606#else
1607	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1608		igb_start_locked(txr, ifp);
1609#endif
1610	IGB_TX_UNLOCK(txr);
1611
1612	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1613
1614	if (adapter->enable_aim == FALSE)
1615		goto no_calc;
1616	/*
1617	** Do Adaptive Interrupt Moderation:
1618        **  - Write out last calculated setting
1619	**  - Calculate based on average size over
1620	**    the last interval.
1621	*/
1622        if (que->eitr_setting)
1623                E1000_WRITE_REG(&adapter->hw,
1624                    E1000_EITR(que->msix), que->eitr_setting);
1625
1626        que->eitr_setting = 0;
1627
1628        /* Idle, do nothing */
1629        if ((txr->bytes == 0) && (rxr->bytes == 0))
1630                goto no_calc;
1631
1632        /* Used half Default if sub-gig */
1633        if (adapter->link_speed != 1000)
1634                newitr = IGB_DEFAULT_ITR / 2;
1635        else {
1636		if ((txr->bytes) && (txr->packets))
1637                	newitr = txr->bytes/txr->packets;
1638		if ((rxr->bytes) && (rxr->packets))
1639			newitr = max(newitr,
1640			    (rxr->bytes / rxr->packets));
1641                newitr += 24; /* account for hardware frame, crc */
1642		/* set an upper boundary */
1643		newitr = min(newitr, 3000);
1644		/* Be nice to the mid range */
1645                if ((newitr > 300) && (newitr < 1200))
1646                        newitr = (newitr / 3);
1647                else
1648                        newitr = (newitr / 2);
1649        }
1650        newitr &= 0x7FFC;  /* Mask invalid bits */
1651        if (adapter->hw.mac.type == e1000_82575)
1652                newitr |= newitr << 16;
1653        else
1654                newitr |= E1000_EITR_CNT_IGNR;
1655
1656        /* save for next interrupt */
1657        que->eitr_setting = newitr;
1658
1659        /* Reset state */
1660        txr->bytes = 0;
1661        txr->packets = 0;
1662        rxr->bytes = 0;
1663        rxr->packets = 0;
1664
1665no_calc:
1666	/* Schedule a clean task if needed*/
1667	if (more_rx)
1668		taskqueue_enqueue(que->tq, &que->que_task);
1669	else
1670		/* Reenable this interrupt */
1671		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1672	return;
1673}
1674
1675
1676/*********************************************************************
1677 *
1678 *  MSIX Link Interrupt Service routine
1679 *
1680 **********************************************************************/
1681
1682static void
1683igb_msix_link(void *arg)
1684{
1685	struct adapter	*adapter = arg;
1686	u32       	icr;
1687
1688	++adapter->link_irq;
1689	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1690	if (!(icr & E1000_ICR_LSC))
1691		goto spurious;
1692	igb_handle_link(adapter, 0);
1693
1694spurious:
1695	/* Rearm */
1696	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1697	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1698	return;
1699}
1700
1701
1702/*********************************************************************
1703 *
1704 *  Media Ioctl callback
1705 *
1706 *  This routine is called whenever the user queries the status of
1707 *  the interface using ifconfig.
1708 *
1709 **********************************************************************/
1710static void
1711igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1712{
1713	struct adapter *adapter = ifp->if_softc;
1714
1715	INIT_DEBUGOUT("igb_media_status: begin");
1716
1717	IGB_CORE_LOCK(adapter);
1718	igb_update_link_status(adapter);
1719
1720	ifmr->ifm_status = IFM_AVALID;
1721	ifmr->ifm_active = IFM_ETHER;
1722
1723	if (!adapter->link_active) {
1724		IGB_CORE_UNLOCK(adapter);
1725		return;
1726	}
1727
1728	ifmr->ifm_status |= IFM_ACTIVE;
1729
1730	switch (adapter->link_speed) {
1731	case 10:
1732		ifmr->ifm_active |= IFM_10_T;
1733		break;
1734	case 100:
1735		/*
1736		** Support for 100Mb SFP - these are Fiber
1737		** but the media type appears as serdes
1738		*/
1739		if (adapter->hw.phy.media_type ==
1740		    e1000_media_type_internal_serdes)
1741			ifmr->ifm_active |= IFM_100_FX;
1742		else
1743			ifmr->ifm_active |= IFM_100_TX;
1744		break;
1745	case 1000:
1746		ifmr->ifm_active |= IFM_1000_T;
1747		break;
1748	case 2500:
1749		ifmr->ifm_active |= IFM_2500_SX;
1750		break;
1751	}
1752
1753	if (adapter->link_duplex == FULL_DUPLEX)
1754		ifmr->ifm_active |= IFM_FDX;
1755	else
1756		ifmr->ifm_active |= IFM_HDX;
1757
1758	IGB_CORE_UNLOCK(adapter);
1759}
1760
1761/*********************************************************************
1762 *
1763 *  Media Ioctl callback
1764 *
1765 *  This routine is called when the user changes speed/duplex using
1766 *  media/mediopt option with ifconfig.
1767 *
1768 **********************************************************************/
1769static int
1770igb_media_change(struct ifnet *ifp)
1771{
1772	struct adapter *adapter = ifp->if_softc;
1773	struct ifmedia  *ifm = &adapter->media;
1774
1775	INIT_DEBUGOUT("igb_media_change: begin");
1776
1777	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1778		return (EINVAL);
1779
1780	IGB_CORE_LOCK(adapter);
1781	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1782	case IFM_AUTO:
1783		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1784		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1785		break;
1786	case IFM_1000_LX:
1787	case IFM_1000_SX:
1788	case IFM_1000_T:
1789		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1790		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1791		break;
1792	case IFM_100_TX:
1793		adapter->hw.mac.autoneg = FALSE;
1794		adapter->hw.phy.autoneg_advertised = 0;
1795		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1796			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1797		else
1798			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1799		break;
1800	case IFM_10_T:
1801		adapter->hw.mac.autoneg = FALSE;
1802		adapter->hw.phy.autoneg_advertised = 0;
1803		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1804			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1805		else
1806			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1807		break;
1808	default:
1809		device_printf(adapter->dev, "Unsupported media type\n");
1810	}
1811
1812	igb_init_locked(adapter);
1813	IGB_CORE_UNLOCK(adapter);
1814
1815	return (0);
1816}
1817
1818
1819/*********************************************************************
1820 *
1821 *  This routine maps the mbufs to Advanced TX descriptors.
1822 *
1823 **********************************************************************/
1824static int
1825igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1826{
1827	struct adapter  *adapter = txr->adapter;
1828	u32		olinfo_status = 0, cmd_type_len;
1829	int             i, j, error, nsegs;
1830	int		first;
1831	bool		remap = TRUE;
1832	struct mbuf	*m_head;
1833	bus_dma_segment_t segs[IGB_MAX_SCATTER];
1834	bus_dmamap_t	map;
1835	struct igb_tx_buf *txbuf;
1836	union e1000_adv_tx_desc *txd = NULL;
1837
1838	m_head = *m_headp;
1839
1840	/* Basic descriptor defines */
1841        cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1842	    E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1843
1844	if (m_head->m_flags & M_VLANTAG)
1845        	cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1846
1847        /*
1848         * Important to capture the first descriptor
1849         * used because it will contain the index of
1850         * the one we tell the hardware to report back
1851         */
1852        first = txr->next_avail_desc;
1853	txbuf = &txr->tx_buffers[first];
1854	map = txbuf->map;
1855
1856	/*
1857	 * Map the packet for DMA.
1858	 */
1859retry:
1860	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1861	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1862
1863	if (__predict_false(error)) {
1864		struct mbuf *m;
1865
1866		switch (error) {
1867		case EFBIG:
1868			/* Try it again? - one try */
1869			if (remap == TRUE) {
1870				remap = FALSE;
1871				m = m_collapse(*m_headp, M_NOWAIT,
1872				    IGB_MAX_SCATTER);
1873				if (m == NULL) {
1874					adapter->mbuf_defrag_failed++;
1875					m_freem(*m_headp);
1876					*m_headp = NULL;
1877					return (ENOBUFS);
1878				}
1879				*m_headp = m;
1880				goto retry;
1881			} else
1882				return (error);
1883		default:
1884			txr->no_tx_dma_setup++;
1885			m_freem(*m_headp);
1886			*m_headp = NULL;
1887			return (error);
1888		}
1889	}
1890
1891	/* Make certain there are enough descriptors */
1892	if (txr->tx_avail < (nsegs + 2)) {
1893		txr->no_desc_avail++;
1894		bus_dmamap_unload(txr->txtag, map);
1895		return (ENOBUFS);
1896	}
1897	m_head = *m_headp;
1898
1899	/*
1900	** Set up the appropriate offload context
1901	** this will consume the first descriptor
1902	*/
1903	error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1904	if (__predict_false(error)) {
1905		m_freem(*m_headp);
1906		*m_headp = NULL;
1907		return (error);
1908	}
1909
1910	/* 82575 needs the queue index added */
1911	if (adapter->hw.mac.type == e1000_82575)
1912		olinfo_status |= txr->me << 4;
1913
1914	i = txr->next_avail_desc;
1915	for (j = 0; j < nsegs; j++) {
1916		bus_size_t seglen;
1917		bus_addr_t segaddr;
1918
1919		txbuf = &txr->tx_buffers[i];
1920		txd = &txr->tx_base[i];
1921		seglen = segs[j].ds_len;
1922		segaddr = htole64(segs[j].ds_addr);
1923
1924		txd->read.buffer_addr = segaddr;
1925		txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1926		    cmd_type_len | seglen);
1927		txd->read.olinfo_status = htole32(olinfo_status);
1928
1929		if (++i == txr->num_desc)
1930			i = 0;
1931	}
1932
1933	txd->read.cmd_type_len |=
1934	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1935	txr->tx_avail -= nsegs;
1936	txr->next_avail_desc = i;
1937
1938	txbuf->m_head = m_head;
1939	/*
1940	** Here we swap the map so the last descriptor,
1941	** which gets the completion interrupt has the
1942	** real map, and the first descriptor gets the
1943	** unused map from this descriptor.
1944	*/
1945	txr->tx_buffers[first].map = txbuf->map;
1946	txbuf->map = map;
1947	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1948
1949        /* Set the EOP descriptor that will be marked done */
1950        txbuf = &txr->tx_buffers[first];
1951	txbuf->eop = txd;
1952
1953        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1954            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1955	/*
1956	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1957	 * hardware that this frame is available to transmit.
1958	 */
1959	++txr->total_packets;
1960	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1961
1962	return (0);
1963}
1964static void
1965igb_set_promisc(struct adapter *adapter)
1966{
1967	struct ifnet	*ifp = adapter->ifp;
1968	struct e1000_hw *hw = &adapter->hw;
1969	u32		reg;
1970
1971	if (adapter->vf_ifp) {
1972		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1973		return;
1974	}
1975
1976	reg = E1000_READ_REG(hw, E1000_RCTL);
1977	if (ifp->if_flags & IFF_PROMISC) {
1978		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1979		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1980	} else if (ifp->if_flags & IFF_ALLMULTI) {
1981		reg |= E1000_RCTL_MPE;
1982		reg &= ~E1000_RCTL_UPE;
1983		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1984	}
1985}
1986
1987static void
1988igb_disable_promisc(struct adapter *adapter)
1989{
1990	struct e1000_hw *hw = &adapter->hw;
1991	struct ifnet	*ifp = adapter->ifp;
1992	u32		reg;
1993	int		mcnt = 0;
1994
1995	if (adapter->vf_ifp) {
1996		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1997		return;
1998	}
1999	reg = E1000_READ_REG(hw, E1000_RCTL);
2000	reg &=  (~E1000_RCTL_UPE);
2001	if (ifp->if_flags & IFF_ALLMULTI)
2002		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2003	else {
2004		struct  ifmultiaddr *ifma;
2005#if __FreeBSD_version < 800000
2006		IF_ADDR_LOCK(ifp);
2007#else
2008		if_maddr_rlock(ifp);
2009#endif
2010		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2011			if (ifma->ifma_addr->sa_family != AF_LINK)
2012				continue;
2013			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2014				break;
2015			mcnt++;
2016		}
2017#if __FreeBSD_version < 800000
2018		IF_ADDR_UNLOCK(ifp);
2019#else
2020		if_maddr_runlock(ifp);
2021#endif
2022	}
2023	/* Don't disable if in MAX groups */
2024	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2025		reg &=  (~E1000_RCTL_MPE);
2026	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2027}
2028
2029
2030/*********************************************************************
2031 *  Multicast Update
2032 *
2033 *  This routine is called whenever multicast address list is updated.
2034 *
2035 **********************************************************************/
2036
2037static void
2038igb_set_multi(struct adapter *adapter)
2039{
2040	struct ifnet	*ifp = adapter->ifp;
2041	struct ifmultiaddr *ifma;
2042	u32 reg_rctl = 0;
2043	u8  *mta;
2044
2045	int mcnt = 0;
2046
2047	IOCTL_DEBUGOUT("igb_set_multi: begin");
2048
2049	mta = adapter->mta;
2050	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2051	    MAX_NUM_MULTICAST_ADDRESSES);
2052
2053#if __FreeBSD_version < 800000
2054	IF_ADDR_LOCK(ifp);
2055#else
2056	if_maddr_rlock(ifp);
2057#endif
2058	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2059		if (ifma->ifma_addr->sa_family != AF_LINK)
2060			continue;
2061
2062		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2063			break;
2064
2065		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2066		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2067		mcnt++;
2068	}
2069#if __FreeBSD_version < 800000
2070	IF_ADDR_UNLOCK(ifp);
2071#else
2072	if_maddr_runlock(ifp);
2073#endif
2074
2075	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2076		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2077		reg_rctl |= E1000_RCTL_MPE;
2078		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2079	} else
2080		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2081}
2082
2083
2084/*********************************************************************
2085 *  Timer routine:
2086 *  	This routine checks for link status,
2087 *	updates statistics, and does the watchdog.
2088 *
2089 **********************************************************************/
2090
2091static void
2092igb_local_timer(void *arg)
2093{
2094	struct adapter		*adapter = arg;
2095	device_t		dev = adapter->dev;
2096	struct ifnet		*ifp = adapter->ifp;
2097	struct tx_ring		*txr = adapter->tx_rings;
2098	struct igb_queue	*que = adapter->queues;
2099	int			hung = 0, busy = 0;
2100
2101
2102	IGB_CORE_LOCK_ASSERT(adapter);
2103
2104	igb_update_link_status(adapter);
2105	igb_update_stats_counters(adapter);
2106
2107        /*
2108        ** Check the TX queues status
2109	**	- central locked handling of OACTIVE
2110	**	- watchdog only if all queues show hung
2111        */
2112	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2113		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2114		    (adapter->pause_frames == 0))
2115			++hung;
2116		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2117			++busy;
2118		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2119			taskqueue_enqueue(que->tq, &que->que_task);
2120	}
2121	if (hung == adapter->num_queues)
2122		goto timeout;
2123	if (busy == adapter->num_queues)
2124		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2125	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2126	    (busy < adapter->num_queues))
2127		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2128
2129	adapter->pause_frames = 0;
2130	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2131#ifndef DEVICE_POLLING
2132	/* Schedule all queue interrupts - deadlock protection */
2133	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2134#endif
2135	return;
2136
2137timeout:
2138	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2139	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2140            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2141            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2142	device_printf(dev,"TX(%d) desc avail = %d,"
2143            "Next TX to Clean = %d\n",
2144            txr->me, txr->tx_avail, txr->next_to_clean);
2145	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2146	adapter->watchdog_events++;
2147	igb_init_locked(adapter);
2148}
2149
2150static void
2151igb_update_link_status(struct adapter *adapter)
2152{
2153	struct e1000_hw		*hw = &adapter->hw;
2154	struct e1000_fc_info	*fc = &hw->fc;
2155	struct ifnet		*ifp = adapter->ifp;
2156	device_t		dev = adapter->dev;
2157	struct tx_ring		*txr = adapter->tx_rings;
2158	u32			link_check, thstat, ctrl;
2159	char			*flowctl = NULL;
2160
2161	link_check = thstat = ctrl = 0;
2162
2163	/* Get the cached link value or read for real */
2164        switch (hw->phy.media_type) {
2165        case e1000_media_type_copper:
2166                if (hw->mac.get_link_status) {
2167			/* Do the work to read phy */
2168                        e1000_check_for_link(hw);
2169                        link_check = !hw->mac.get_link_status;
2170                } else
2171                        link_check = TRUE;
2172                break;
2173        case e1000_media_type_fiber:
2174                e1000_check_for_link(hw);
2175                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2176                                 E1000_STATUS_LU);
2177                break;
2178        case e1000_media_type_internal_serdes:
2179                e1000_check_for_link(hw);
2180                link_check = adapter->hw.mac.serdes_has_link;
2181                break;
2182	/* VF device is type_unknown */
2183        case e1000_media_type_unknown:
2184                e1000_check_for_link(hw);
2185		link_check = !hw->mac.get_link_status;
2186		/* Fall thru */
2187        default:
2188                break;
2189        }
2190
2191	/* Check for thermal downshift or shutdown */
2192	if (hw->mac.type == e1000_i350) {
2193		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2194		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2195	}
2196
2197	/* Get the flow control for display */
2198	switch (fc->current_mode) {
2199	case e1000_fc_rx_pause:
2200		flowctl = "RX";
2201		break;
2202	case e1000_fc_tx_pause:
2203		flowctl = "TX";
2204		break;
2205	case e1000_fc_full:
2206		flowctl = "Full";
2207		break;
2208	case e1000_fc_none:
2209	default:
2210		flowctl = "None";
2211		break;
2212	}
2213
2214	/* Now we check if a transition has happened */
2215	if (link_check && (adapter->link_active == 0)) {
2216		e1000_get_speed_and_duplex(&adapter->hw,
2217		    &adapter->link_speed, &adapter->link_duplex);
2218		if (bootverbose)
2219			device_printf(dev, "Link is up %d Mbps %s,"
2220			    " Flow Control: %s\n",
2221			    adapter->link_speed,
2222			    ((adapter->link_duplex == FULL_DUPLEX) ?
2223			    "Full Duplex" : "Half Duplex"), flowctl);
2224		adapter->link_active = 1;
2225		ifp->if_baudrate = adapter->link_speed * 1000000;
2226		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2227		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2228			device_printf(dev, "Link: thermal downshift\n");
2229		/* Delay Link Up for Phy update */
2230		if (((hw->mac.type == e1000_i210) ||
2231		    (hw->mac.type == e1000_i211)) &&
2232		    (hw->phy.id == I210_I_PHY_ID))
2233			msec_delay(I210_LINK_DELAY);
2234		/* Reset if the media type changed. */
2235		if (hw->dev_spec._82575.media_changed) {
2236			hw->dev_spec._82575.media_changed = false;
2237			adapter->flags |= IGB_MEDIA_RESET;
2238			igb_reset(adapter);
2239		}
2240		/* This can sleep */
2241		if_link_state_change(ifp, LINK_STATE_UP);
2242	} else if (!link_check && (adapter->link_active == 1)) {
2243		ifp->if_baudrate = adapter->link_speed = 0;
2244		adapter->link_duplex = 0;
2245		if (bootverbose)
2246			device_printf(dev, "Link is Down\n");
2247		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2248		    (thstat & E1000_THSTAT_PWR_DOWN))
2249			device_printf(dev, "Link: thermal shutdown\n");
2250		adapter->link_active = 0;
2251		/* This can sleep */
2252		if_link_state_change(ifp, LINK_STATE_DOWN);
2253		/* Reset queue state */
2254		for (int i = 0; i < adapter->num_queues; i++, txr++)
2255			txr->queue_status = IGB_QUEUE_IDLE;
2256	}
2257}
2258
2259/*********************************************************************
2260 *
2261 *  This routine disables all traffic on the adapter by issuing a
2262 *  global reset on the MAC and deallocates TX/RX buffers.
2263 *
2264 **********************************************************************/
2265
2266static void
2267igb_stop(void *arg)
2268{
2269	struct adapter	*adapter = arg;
2270	struct ifnet	*ifp = adapter->ifp;
2271	struct tx_ring *txr = adapter->tx_rings;
2272
2273	IGB_CORE_LOCK_ASSERT(adapter);
2274
2275	INIT_DEBUGOUT("igb_stop: begin");
2276
2277	igb_disable_intr(adapter);
2278
2279	callout_stop(&adapter->timer);
2280
2281	/* Tell the stack that the interface is no longer active */
2282	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2283	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2284
2285	/* Disarm watchdog timer. */
2286	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2287		IGB_TX_LOCK(txr);
2288		txr->queue_status = IGB_QUEUE_IDLE;
2289		IGB_TX_UNLOCK(txr);
2290	}
2291
2292	e1000_reset_hw(&adapter->hw);
2293	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2294
2295	e1000_led_off(&adapter->hw);
2296	e1000_cleanup_led(&adapter->hw);
2297}
2298
2299
2300/*********************************************************************
2301 *
2302 *  Determine hardware revision.
2303 *
2304 **********************************************************************/
2305static void
2306igb_identify_hardware(struct adapter *adapter)
2307{
2308	device_t dev = adapter->dev;
2309
2310	/* Make sure our PCI config space has the necessary stuff set */
2311	pci_enable_busmaster(dev);
2312	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2313
2314	/* Save off the information about this board */
2315	adapter->hw.vendor_id = pci_get_vendor(dev);
2316	adapter->hw.device_id = pci_get_device(dev);
2317	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2318	adapter->hw.subsystem_vendor_id =
2319	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2320	adapter->hw.subsystem_device_id =
2321	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2322
2323	/* Set MAC type early for PCI setup */
2324	e1000_set_mac_type(&adapter->hw);
2325
2326	/* Are we a VF device? */
2327	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2328	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2329		adapter->vf_ifp = 1;
2330	else
2331		adapter->vf_ifp = 0;
2332}
2333
2334static int
2335igb_allocate_pci_resources(struct adapter *adapter)
2336{
2337	device_t	dev = adapter->dev;
2338	int		rid;
2339
2340	rid = PCIR_BAR(0);
2341	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2342	    &rid, RF_ACTIVE);
2343	if (adapter->pci_mem == NULL) {
2344		device_printf(dev, "Unable to allocate bus resource: memory\n");
2345		return (ENXIO);
2346	}
2347	adapter->osdep.mem_bus_space_tag =
2348	    rman_get_bustag(adapter->pci_mem);
2349	adapter->osdep.mem_bus_space_handle =
2350	    rman_get_bushandle(adapter->pci_mem);
2351	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2352
2353	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2354
2355	/* This will setup either MSI/X or MSI */
2356	adapter->msix = igb_setup_msix(adapter);
2357	adapter->hw.back = &adapter->osdep;
2358
2359	return (0);
2360}
2361
2362/*********************************************************************
2363 *
2364 *  Setup the Legacy or MSI Interrupt handler
2365 *
2366 **********************************************************************/
2367static int
2368igb_allocate_legacy(struct adapter *adapter)
2369{
2370	device_t		dev = adapter->dev;
2371	struct igb_queue	*que = adapter->queues;
2372#ifndef IGB_LEGACY_TX
2373	struct tx_ring		*txr = adapter->tx_rings;
2374#endif
2375	int			error, rid = 0;
2376
2377	/* Turn off all interrupts */
2378	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2379
2380	/* MSI RID is 1 */
2381	if (adapter->msix == 1)
2382		rid = 1;
2383
2384	/* We allocate a single interrupt resource */
2385	adapter->res = bus_alloc_resource_any(dev,
2386	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2387	if (adapter->res == NULL) {
2388		device_printf(dev, "Unable to allocate bus resource: "
2389		    "interrupt\n");
2390		return (ENXIO);
2391	}
2392
2393#ifndef IGB_LEGACY_TX
2394	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2395#endif
2396
2397	/*
2398	 * Try allocating a fast interrupt and the associated deferred
2399	 * processing contexts.
2400	 */
2401	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2402	/* Make tasklet for deferred link handling */
2403	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2404	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2405	    taskqueue_thread_enqueue, &que->tq);
2406	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2407	    device_get_nameunit(adapter->dev));
2408	if ((error = bus_setup_intr(dev, adapter->res,
2409	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2410	    adapter, &adapter->tag)) != 0) {
2411		device_printf(dev, "Failed to register fast interrupt "
2412			    "handler: %d\n", error);
2413		taskqueue_free(que->tq);
2414		que->tq = NULL;
2415		return (error);
2416	}
2417
2418	return (0);
2419}
2420
2421
2422/*********************************************************************
2423 *
2424 *  Setup the MSIX Queue Interrupt handlers:
2425 *
2426 **********************************************************************/
2427static int
2428igb_allocate_msix(struct adapter *adapter)
2429{
2430	device_t		dev = adapter->dev;
2431	struct igb_queue	*que = adapter->queues;
2432	int			error, rid, vector = 0;
2433	int			cpu_id = 0;
2434#ifdef	RSS
2435	cpuset_t cpu_mask;
2436#endif
2437
2438	/* Be sure to start with all interrupts disabled */
2439	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2440	E1000_WRITE_FLUSH(&adapter->hw);
2441
2442#ifdef	RSS
2443	/*
2444	 * If we're doing RSS, the number of queues needs to
2445	 * match the number of RSS buckets that are configured.
2446	 *
2447	 * + If there's more queues than RSS buckets, we'll end
2448	 *   up with queues that get no traffic.
2449	 *
2450	 * + If there's more RSS buckets than queues, we'll end
2451	 *   up having multiple RSS buckets map to the same queue,
2452	 *   so there'll be some contention.
2453	 */
2454	if (adapter->num_queues != rss_getnumbuckets()) {
2455		device_printf(dev,
2456		    "%s: number of queues (%d) != number of RSS buckets (%d)"
2457		    "; performance will be impacted.\n",
2458		    __func__,
2459		    adapter->num_queues,
2460		    rss_getnumbuckets());
2461	}
2462#endif
2463
2464	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2465		rid = vector +1;
2466		que->res = bus_alloc_resource_any(dev,
2467		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2468		if (que->res == NULL) {
2469			device_printf(dev,
2470			    "Unable to allocate bus resource: "
2471			    "MSIX Queue Interrupt\n");
2472			return (ENXIO);
2473		}
2474		error = bus_setup_intr(dev, que->res,
2475	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2476		    igb_msix_que, que, &que->tag);
2477		if (error) {
2478			que->res = NULL;
2479			device_printf(dev, "Failed to register Queue handler");
2480			return (error);
2481		}
2482#if __FreeBSD_version >= 800504
2483		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2484#endif
2485		que->msix = vector;
2486		if (adapter->hw.mac.type == e1000_82575)
2487			que->eims = E1000_EICR_TX_QUEUE0 << i;
2488		else
2489			que->eims = 1 << vector;
2490
2491#ifdef	RSS
2492		/*
2493		 * The queue ID is used as the RSS layer bucket ID.
2494		 * We look up the queue ID -> RSS CPU ID and select
2495		 * that.
2496		 */
2497		cpu_id = rss_getcpu(i % rss_getnumbuckets());
2498#else
2499		/*
2500		 * Bind the msix vector, and thus the
2501		 * rings to the corresponding cpu.
2502		 *
2503		 * This just happens to match the default RSS round-robin
2504		 * bucket -> queue -> CPU allocation.
2505		 */
2506		if (adapter->num_queues > 1) {
2507			if (igb_last_bind_cpu < 0)
2508				igb_last_bind_cpu = CPU_FIRST();
2509			cpu_id = igb_last_bind_cpu;
2510		}
2511#endif
2512
2513		if (adapter->num_queues > 1) {
2514			bus_bind_intr(dev, que->res, cpu_id);
2515#ifdef	RSS
2516			device_printf(dev,
2517				"Bound queue %d to RSS bucket %d\n",
2518				i, cpu_id);
2519#else
2520			device_printf(dev,
2521				"Bound queue %d to cpu %d\n",
2522				i, cpu_id);
2523#endif
2524		}
2525
2526#ifndef IGB_LEGACY_TX
2527		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2528		    que->txr);
2529#endif
2530		/* Make tasklet for deferred handling */
2531		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2532		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2533		    taskqueue_thread_enqueue, &que->tq);
2534		if (adapter->num_queues > 1) {
2535			/*
2536			 * Only pin the taskqueue thread to a CPU if
2537			 * RSS is in use.
2538			 *
2539			 * This again just happens to match the default RSS
2540			 * round-robin bucket -> queue -> CPU allocation.
2541			 */
2542#ifdef	RSS
2543			CPU_SETOF(cpu_id, &cpu_mask);
2544			taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2545			    &cpu_mask,
2546			    "%s que (bucket %d)",
2547			    device_get_nameunit(adapter->dev),
2548			    cpu_id);
2549#else
2550			taskqueue_start_threads(&que->tq, 1, PI_NET,
2551			    "%s que (qid %d)",
2552			    device_get_nameunit(adapter->dev),
2553			    cpu_id);
2554#endif
2555		} else {
2556			taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2557			    device_get_nameunit(adapter->dev));
2558		}
2559
2560		/* Finally update the last bound CPU id */
2561		if (adapter->num_queues > 1)
2562			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2563	}
2564
2565	/* And Link */
2566	rid = vector + 1;
2567	adapter->res = bus_alloc_resource_any(dev,
2568	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2569	if (adapter->res == NULL) {
2570		device_printf(dev,
2571		    "Unable to allocate bus resource: "
2572		    "MSIX Link Interrupt\n");
2573		return (ENXIO);
2574	}
2575	if ((error = bus_setup_intr(dev, adapter->res,
2576	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2577	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2578		device_printf(dev, "Failed to register Link handler");
2579		return (error);
2580	}
2581#if __FreeBSD_version >= 800504
2582	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2583#endif
2584	adapter->linkvec = vector;
2585
2586	return (0);
2587}
2588
2589
2590static void
2591igb_configure_queues(struct adapter *adapter)
2592{
2593	struct	e1000_hw	*hw = &adapter->hw;
2594	struct	igb_queue	*que;
2595	u32			tmp, ivar = 0, newitr = 0;
2596
2597	/* First turn on RSS capability */
2598	if (adapter->hw.mac.type != e1000_82575)
2599		E1000_WRITE_REG(hw, E1000_GPIE,
2600		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2601		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2602
2603	/* Turn on MSIX */
2604	switch (adapter->hw.mac.type) {
2605	case e1000_82580:
2606	case e1000_i350:
2607	case e1000_i354:
2608	case e1000_i210:
2609	case e1000_i211:
2610	case e1000_vfadapt:
2611	case e1000_vfadapt_i350:
2612		/* RX entries */
2613		for (int i = 0; i < adapter->num_queues; i++) {
2614			u32 index = i >> 1;
2615			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2616			que = &adapter->queues[i];
2617			if (i & 1) {
2618				ivar &= 0xFF00FFFF;
2619				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2620			} else {
2621				ivar &= 0xFFFFFF00;
2622				ivar |= que->msix | E1000_IVAR_VALID;
2623			}
2624			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2625		}
2626		/* TX entries */
2627		for (int i = 0; i < adapter->num_queues; i++) {
2628			u32 index = i >> 1;
2629			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2630			que = &adapter->queues[i];
2631			if (i & 1) {
2632				ivar &= 0x00FFFFFF;
2633				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2634			} else {
2635				ivar &= 0xFFFF00FF;
2636				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2637			}
2638			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2639			adapter->que_mask |= que->eims;
2640		}
2641
2642		/* And for the link interrupt */
2643		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2644		adapter->link_mask = 1 << adapter->linkvec;
2645		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2646		break;
2647	case e1000_82576:
2648		/* RX entries */
2649		for (int i = 0; i < adapter->num_queues; i++) {
2650			u32 index = i & 0x7; /* Each IVAR has two entries */
2651			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2652			que = &adapter->queues[i];
2653			if (i < 8) {
2654				ivar &= 0xFFFFFF00;
2655				ivar |= que->msix | E1000_IVAR_VALID;
2656			} else {
2657				ivar &= 0xFF00FFFF;
2658				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2659			}
2660			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2661			adapter->que_mask |= que->eims;
2662		}
2663		/* TX entries */
2664		for (int i = 0; i < adapter->num_queues; i++) {
2665			u32 index = i & 0x7; /* Each IVAR has two entries */
2666			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2667			que = &adapter->queues[i];
2668			if (i < 8) {
2669				ivar &= 0xFFFF00FF;
2670				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2671			} else {
2672				ivar &= 0x00FFFFFF;
2673				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2674			}
2675			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2676			adapter->que_mask |= que->eims;
2677		}
2678
2679		/* And for the link interrupt */
2680		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2681		adapter->link_mask = 1 << adapter->linkvec;
2682		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2683		break;
2684
2685	case e1000_82575:
2686                /* enable MSI-X support*/
2687		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2688                tmp |= E1000_CTRL_EXT_PBA_CLR;
2689                /* Auto-Mask interrupts upon ICR read. */
2690                tmp |= E1000_CTRL_EXT_EIAME;
2691                tmp |= E1000_CTRL_EXT_IRCA;
2692                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2693
2694		/* Queues */
2695		for (int i = 0; i < adapter->num_queues; i++) {
2696			que = &adapter->queues[i];
2697			tmp = E1000_EICR_RX_QUEUE0 << i;
2698			tmp |= E1000_EICR_TX_QUEUE0 << i;
2699			que->eims = tmp;
2700			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2701			    i, que->eims);
2702			adapter->que_mask |= que->eims;
2703		}
2704
2705		/* Link */
2706		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2707		    E1000_EIMS_OTHER);
2708		adapter->link_mask |= E1000_EIMS_OTHER;
2709	default:
2710		break;
2711	}
2712
2713	/* Set the starting interrupt rate */
2714	if (igb_max_interrupt_rate > 0)
2715		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2716
2717        if (hw->mac.type == e1000_82575)
2718                newitr |= newitr << 16;
2719        else
2720                newitr |= E1000_EITR_CNT_IGNR;
2721
2722	for (int i = 0; i < adapter->num_queues; i++) {
2723		que = &adapter->queues[i];
2724		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2725	}
2726
2727	return;
2728}
2729
2730
2731static void
2732igb_free_pci_resources(struct adapter *adapter)
2733{
2734	struct		igb_queue *que = adapter->queues;
2735	device_t	dev = adapter->dev;
2736	int		rid;
2737
2738	/*
2739	** There is a slight possibility of a failure mode
2740	** in attach that will result in entering this function
2741	** before interrupt resources have been initialized, and
2742	** in that case we do not want to execute the loops below
2743	** We can detect this reliably by the state of the adapter
2744	** res pointer.
2745	*/
2746	if (adapter->res == NULL)
2747		goto mem;
2748
2749	/*
2750	 * First release all the interrupt resources:
2751	 */
2752	for (int i = 0; i < adapter->num_queues; i++, que++) {
2753		rid = que->msix + 1;
2754		if (que->tag != NULL) {
2755			bus_teardown_intr(dev, que->res, que->tag);
2756			que->tag = NULL;
2757		}
2758		if (que->res != NULL)
2759			bus_release_resource(dev,
2760			    SYS_RES_IRQ, rid, que->res);
2761	}
2762
2763	/* Clean the Legacy or Link interrupt last */
2764	if (adapter->linkvec) /* we are doing MSIX */
2765		rid = adapter->linkvec + 1;
2766	else
2767		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2768
2769	que = adapter->queues;
2770	if (adapter->tag != NULL) {
2771		taskqueue_drain(que->tq, &adapter->link_task);
2772		bus_teardown_intr(dev, adapter->res, adapter->tag);
2773		adapter->tag = NULL;
2774	}
2775	if (adapter->res != NULL)
2776		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2777
2778	for (int i = 0; i < adapter->num_queues; i++, que++) {
2779		if (que->tq != NULL) {
2780#ifndef IGB_LEGACY_TX
2781			taskqueue_drain(que->tq, &que->txr->txq_task);
2782#endif
2783			taskqueue_drain(que->tq, &que->que_task);
2784			taskqueue_free(que->tq);
2785		}
2786	}
2787mem:
2788	if (adapter->msix)
2789		pci_release_msi(dev);
2790
2791	if (adapter->msix_mem != NULL)
2792		bus_release_resource(dev, SYS_RES_MEMORY,
2793		    adapter->memrid, adapter->msix_mem);
2794
2795	if (adapter->pci_mem != NULL)
2796		bus_release_resource(dev, SYS_RES_MEMORY,
2797		    PCIR_BAR(0), adapter->pci_mem);
2798
2799}
2800
2801/*
2802 * Setup Either MSI/X or MSI
2803 */
2804static int
2805igb_setup_msix(struct adapter *adapter)
2806{
2807	device_t	dev = adapter->dev;
2808	int		bar, want, queues, msgs, maxqueues;
2809
2810	/* tuneable override */
2811	if (igb_enable_msix == 0)
2812		goto msi;
2813
2814	/* First try MSI/X */
2815	msgs = pci_msix_count(dev);
2816	if (msgs == 0)
2817		goto msi;
2818	/*
2819	** Some new devices, as with ixgbe, now may
2820	** use a different BAR, so we need to keep
2821	** track of which is used.
2822	*/
2823	adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2824	bar = pci_read_config(dev, adapter->memrid, 4);
2825	if (bar == 0) /* use next bar */
2826		adapter->memrid += 4;
2827	adapter->msix_mem = bus_alloc_resource_any(dev,
2828	    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2829       	if (adapter->msix_mem == NULL) {
2830		/* May not be enabled */
2831		device_printf(adapter->dev,
2832		    "Unable to map MSIX table \n");
2833		goto msi;
2834	}
2835
2836	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2837
2838	/* Override via tuneable */
2839	if (igb_num_queues != 0)
2840		queues = igb_num_queues;
2841
2842#ifdef	RSS
2843	/* If we're doing RSS, clamp at the number of RSS buckets */
2844	if (queues > rss_getnumbuckets())
2845		queues = rss_getnumbuckets();
2846#endif
2847
2848
2849	/* Sanity check based on HW */
2850	switch (adapter->hw.mac.type) {
2851		case e1000_82575:
2852			maxqueues = 4;
2853			break;
2854		case e1000_82576:
2855		case e1000_82580:
2856		case e1000_i350:
2857		case e1000_i354:
2858			maxqueues = 8;
2859			break;
2860		case e1000_i210:
2861			maxqueues = 4;
2862			break;
2863		case e1000_i211:
2864			maxqueues = 2;
2865			break;
2866		default:  /* VF interfaces */
2867			maxqueues = 1;
2868			break;
2869	}
2870
2871	/* Final clamp on the actual hardware capability */
2872	if (queues > maxqueues)
2873		queues = maxqueues;
2874
2875	/*
2876	** One vector (RX/TX pair) per queue
2877	** plus an additional for Link interrupt
2878	*/
2879	want = queues + 1;
2880	if (msgs >= want)
2881		msgs = want;
2882	else {
2883               	device_printf(adapter->dev,
2884		    "MSIX Configuration Problem, "
2885		    "%d vectors configured, but %d queues wanted!\n",
2886		    msgs, want);
2887		goto msi;
2888	}
2889	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2890               	device_printf(adapter->dev,
2891		    "Using MSIX interrupts with %d vectors\n", msgs);
2892		adapter->num_queues = queues;
2893		return (msgs);
2894	}
2895	/*
2896	** If MSIX alloc failed or provided us with
2897	** less than needed, free and fall through to MSI
2898	*/
2899	pci_release_msi(dev);
2900
2901msi:
2902       	if (adapter->msix_mem != NULL) {
2903		bus_release_resource(dev, SYS_RES_MEMORY,
2904		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2905		adapter->msix_mem = NULL;
2906	}
2907       	msgs = 1;
2908	if (pci_alloc_msi(dev, &msgs) == 0) {
2909		device_printf(adapter->dev," Using an MSI interrupt\n");
2910		return (msgs);
2911	}
2912	device_printf(adapter->dev," Using a Legacy interrupt\n");
2913	return (0);
2914}
2915
2916/*********************************************************************
2917 *
2918 *  Initialize the DMA Coalescing feature
2919 *
2920 **********************************************************************/
2921static void
2922igb_init_dmac(struct adapter *adapter, u32 pba)
2923{
2924	device_t	dev = adapter->dev;
2925	struct e1000_hw *hw = &adapter->hw;
2926	u32 		dmac, reg = ~E1000_DMACR_DMAC_EN;
2927	u16		hwm;
2928
2929	if (hw->mac.type == e1000_i211)
2930		return;
2931
2932	if (hw->mac.type > e1000_82580) {
2933
2934		if (adapter->dmac == 0) { /* Disabling it */
2935			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2936			return;
2937		} else
2938			device_printf(dev, "DMA Coalescing enabled\n");
2939
2940		/* Set starting threshold */
2941		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2942
2943		hwm = 64 * pba - adapter->max_frame_size / 16;
2944		if (hwm < 64 * (pba - 6))
2945			hwm = 64 * (pba - 6);
2946		reg = E1000_READ_REG(hw, E1000_FCRTC);
2947		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2948		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2949		    & E1000_FCRTC_RTH_COAL_MASK);
2950		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2951
2952
2953		dmac = pba - adapter->max_frame_size / 512;
2954		if (dmac < pba - 10)
2955			dmac = pba - 10;
2956		reg = E1000_READ_REG(hw, E1000_DMACR);
2957		reg &= ~E1000_DMACR_DMACTHR_MASK;
2958		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2959		    & E1000_DMACR_DMACTHR_MASK);
2960
2961		/* transition to L0x or L1 if available..*/
2962		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2963
2964		/* Check if status is 2.5Gb backplane connection
2965		* before configuration of watchdog timer, which is
2966		* in msec values in 12.8usec intervals
2967		* watchdog timer= msec values in 32usec intervals
2968		* for non 2.5Gb connection
2969		*/
2970		if (hw->mac.type == e1000_i354) {
2971			int status = E1000_READ_REG(hw, E1000_STATUS);
2972			if ((status & E1000_STATUS_2P5_SKU) &&
2973			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2974				reg |= ((adapter->dmac * 5) >> 6);
2975			else
2976				reg |= (adapter->dmac >> 5);
2977		} else {
2978			reg |= (adapter->dmac >> 5);
2979		}
2980
2981		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2982
2983		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2984
2985		/* Set the interval before transition */
2986		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2987		if (hw->mac.type == e1000_i350)
2988			reg |= IGB_DMCTLX_DCFLUSH_DIS;
2989		/*
2990		** in 2.5Gb connection, TTLX unit is 0.4 usec
2991		** which is 0x4*2 = 0xA. But delay is still 4 usec
2992		*/
2993		if (hw->mac.type == e1000_i354) {
2994			int status = E1000_READ_REG(hw, E1000_STATUS);
2995			if ((status & E1000_STATUS_2P5_SKU) &&
2996			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2997				reg |= 0xA;
2998			else
2999				reg |= 0x4;
3000		} else {
3001			reg |= 0x4;
3002		}
3003
3004		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3005
3006		/* free space in tx packet buffer to wake from DMA coal */
3007		E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
3008		    (2 * adapter->max_frame_size)) >> 6);
3009
3010		/* make low power state decision controlled by DMA coal */
3011		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3012		reg &= ~E1000_PCIEMISC_LX_DECISION;
3013		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3014
3015	} else if (hw->mac.type == e1000_82580) {
3016		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3017		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3018		    reg & ~E1000_PCIEMISC_LX_DECISION);
3019		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3020	}
3021}
3022
3023
3024/*********************************************************************
3025 *
3026 *  Set up an fresh starting state
3027 *
3028 **********************************************************************/
3029static void
3030igb_reset(struct adapter *adapter)
3031{
3032	device_t	dev = adapter->dev;
3033	struct e1000_hw *hw = &adapter->hw;
3034	struct e1000_fc_info *fc = &hw->fc;
3035	struct ifnet	*ifp = adapter->ifp;
3036	u32		pba = 0;
3037	u16		hwm;
3038
3039	INIT_DEBUGOUT("igb_reset: begin");
3040
3041	/* Let the firmware know the OS is in control */
3042	igb_get_hw_control(adapter);
3043
3044	/*
3045	 * Packet Buffer Allocation (PBA)
3046	 * Writing PBA sets the receive portion of the buffer
3047	 * the remainder is used for the transmit buffer.
3048	 */
3049	switch (hw->mac.type) {
3050	case e1000_82575:
3051		pba = E1000_PBA_32K;
3052		break;
3053	case e1000_82576:
3054	case e1000_vfadapt:
3055		pba = E1000_READ_REG(hw, E1000_RXPBS);
3056		pba &= E1000_RXPBS_SIZE_MASK_82576;
3057		break;
3058	case e1000_82580:
3059	case e1000_i350:
3060	case e1000_i354:
3061	case e1000_vfadapt_i350:
3062		pba = E1000_READ_REG(hw, E1000_RXPBS);
3063		pba = e1000_rxpbs_adjust_82580(pba);
3064		break;
3065	case e1000_i210:
3066	case e1000_i211:
3067		pba = E1000_PBA_34K;
3068	default:
3069		break;
3070	}
3071
3072	/* Special needs in case of Jumbo frames */
3073	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3074		u32 tx_space, min_tx, min_rx;
3075		pba = E1000_READ_REG(hw, E1000_PBA);
3076		tx_space = pba >> 16;
3077		pba &= 0xffff;
3078		min_tx = (adapter->max_frame_size +
3079		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3080		min_tx = roundup2(min_tx, 1024);
3081		min_tx >>= 10;
3082                min_rx = adapter->max_frame_size;
3083                min_rx = roundup2(min_rx, 1024);
3084                min_rx >>= 10;
3085		if (tx_space < min_tx &&
3086		    ((min_tx - tx_space) < pba)) {
3087			pba = pba - (min_tx - tx_space);
3088			/*
3089                         * if short on rx space, rx wins
3090                         * and must trump tx adjustment
3091			 */
3092                        if (pba < min_rx)
3093                                pba = min_rx;
3094		}
3095		E1000_WRITE_REG(hw, E1000_PBA, pba);
3096	}
3097
3098	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3099
3100	/*
3101	 * These parameters control the automatic generation (Tx) and
3102	 * response (Rx) to Ethernet PAUSE frames.
3103	 * - High water mark should allow for at least two frames to be
3104	 *   received after sending an XOFF.
3105	 * - Low water mark works best when it is very near the high water mark.
3106	 *   This allows the receiver to restart by sending XON when it has
3107	 *   drained a bit.
3108	 */
3109	hwm = min(((pba << 10) * 9 / 10),
3110	    ((pba << 10) - 2 * adapter->max_frame_size));
3111
3112	if (hw->mac.type < e1000_82576) {
3113		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3114		fc->low_water = fc->high_water - 8;
3115	} else {
3116		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3117		fc->low_water = fc->high_water - 16;
3118	}
3119
3120	fc->pause_time = IGB_FC_PAUSE_TIME;
3121	fc->send_xon = TRUE;
3122	if (adapter->fc)
3123		fc->requested_mode = adapter->fc;
3124	else
3125		fc->requested_mode = e1000_fc_default;
3126
3127	/* Issue a global reset */
3128	e1000_reset_hw(hw);
3129	E1000_WRITE_REG(hw, E1000_WUC, 0);
3130
3131	/* Reset for AutoMediaDetect */
3132	if (adapter->flags & IGB_MEDIA_RESET) {
3133		e1000_setup_init_funcs(hw, TRUE);
3134		e1000_get_bus_info(hw);
3135		adapter->flags &= ~IGB_MEDIA_RESET;
3136	}
3137
3138	if (e1000_init_hw(hw) < 0)
3139		device_printf(dev, "Hardware Initialization Failed\n");
3140
3141	/* Setup DMA Coalescing */
3142	igb_init_dmac(adapter, pba);
3143
3144	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3145	e1000_get_phy_info(hw);
3146	e1000_check_for_link(hw);
3147	return;
3148}
3149
3150/*********************************************************************
3151 *
3152 *  Setup networking device structure and register an interface.
3153 *
3154 **********************************************************************/
3155static int
3156igb_setup_interface(device_t dev, struct adapter *adapter)
3157{
3158	struct ifnet   *ifp;
3159
3160	INIT_DEBUGOUT("igb_setup_interface: begin");
3161
3162	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3163	if (ifp == NULL) {
3164		device_printf(dev, "can not allocate ifnet structure\n");
3165		return (-1);
3166	}
3167	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3168	ifp->if_init =  igb_init;
3169	ifp->if_softc = adapter;
3170	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3171	ifp->if_ioctl = igb_ioctl;
3172	ifp->if_get_counter = igb_get_counter;
3173
3174	/* TSO parameters */
3175	ifp->if_hw_tsomax = IP_MAXPACKET;
3176	ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3177	ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3178
3179#ifndef IGB_LEGACY_TX
3180	ifp->if_transmit = igb_mq_start;
3181	ifp->if_qflush = igb_qflush;
3182#else
3183	ifp->if_start = igb_start;
3184	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3185	ifp->if_snd.ifq_drv_maxlen = 0;
3186	IFQ_SET_READY(&ifp->if_snd);
3187#endif
3188
3189	ether_ifattach(ifp, adapter->hw.mac.addr);
3190
3191	ifp->if_capabilities = ifp->if_capenable = 0;
3192
3193	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3194#if __FreeBSD_version >= 1000000
3195	ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
3196#endif
3197	ifp->if_capabilities |= IFCAP_TSO;
3198	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3199	ifp->if_capenable = ifp->if_capabilities;
3200
3201	/* Don't enable LRO by default */
3202	ifp->if_capabilities |= IFCAP_LRO;
3203
3204#ifdef DEVICE_POLLING
3205	ifp->if_capabilities |= IFCAP_POLLING;
3206#endif
3207
3208	/*
3209	 * Tell the upper layer(s) we
3210	 * support full VLAN capability.
3211	 */
3212	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3213	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3214			     |  IFCAP_VLAN_HWTSO
3215			     |  IFCAP_VLAN_MTU;
3216	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3217			  |  IFCAP_VLAN_HWTSO
3218			  |  IFCAP_VLAN_MTU;
3219
3220	/*
3221	** Don't turn this on by default, if vlans are
3222	** created on another pseudo device (eg. lagg)
3223	** then vlan events are not passed thru, breaking
3224	** operation, but with HW FILTER off it works. If
3225	** using vlans directly on the igb driver you can
3226	** enable this and get full hardware tag filtering.
3227	*/
3228	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3229
3230	/*
3231	 * Specify the media types supported by this adapter and register
3232	 * callbacks to update media and link information
3233	 */
3234	ifmedia_init(&adapter->media, IFM_IMASK,
3235	    igb_media_change, igb_media_status);
3236	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3237	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3238		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3239			    0, NULL);
3240		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3241	} else {
3242		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3243		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3244			    0, NULL);
3245		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3246			    0, NULL);
3247		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3248			    0, NULL);
3249		if (adapter->hw.phy.type != e1000_phy_ife) {
3250			ifmedia_add(&adapter->media,
3251				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3252			ifmedia_add(&adapter->media,
3253				IFM_ETHER | IFM_1000_T, 0, NULL);
3254		}
3255	}
3256	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3257	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3258	return (0);
3259}
3260
3261
3262/*
3263 * Manage DMA'able memory.
3264 */
3265static void
3266igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3267{
3268	if (error)
3269		return;
3270	*(bus_addr_t *) arg = segs[0].ds_addr;
3271}
3272
3273static int
3274igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3275        struct igb_dma_alloc *dma, int mapflags)
3276{
3277	int error;
3278
3279	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3280				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3281				BUS_SPACE_MAXADDR,	/* lowaddr */
3282				BUS_SPACE_MAXADDR,	/* highaddr */
3283				NULL, NULL,		/* filter, filterarg */
3284				size,			/* maxsize */
3285				1,			/* nsegments */
3286				size,			/* maxsegsize */
3287				0,			/* flags */
3288				NULL,			/* lockfunc */
3289				NULL,			/* lockarg */
3290				&dma->dma_tag);
3291	if (error) {
3292		device_printf(adapter->dev,
3293		    "%s: bus_dma_tag_create failed: %d\n",
3294		    __func__, error);
3295		goto fail_0;
3296	}
3297
3298	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3299	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3300	if (error) {
3301		device_printf(adapter->dev,
3302		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3303		    __func__, (uintmax_t)size, error);
3304		goto fail_2;
3305	}
3306
3307	dma->dma_paddr = 0;
3308	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3309	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3310	if (error || dma->dma_paddr == 0) {
3311		device_printf(adapter->dev,
3312		    "%s: bus_dmamap_load failed: %d\n",
3313		    __func__, error);
3314		goto fail_3;
3315	}
3316
3317	return (0);
3318
3319fail_3:
3320	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3321fail_2:
3322	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3323	bus_dma_tag_destroy(dma->dma_tag);
3324fail_0:
3325	dma->dma_tag = NULL;
3326
3327	return (error);
3328}
3329
3330static void
3331igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3332{
3333	if (dma->dma_tag == NULL)
3334		return;
3335	if (dma->dma_paddr != 0) {
3336		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3337		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3338		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3339		dma->dma_paddr = 0;
3340	}
3341	if (dma->dma_vaddr != NULL) {
3342		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3343		dma->dma_vaddr = NULL;
3344	}
3345	bus_dma_tag_destroy(dma->dma_tag);
3346	dma->dma_tag = NULL;
3347}
3348
3349
3350/*********************************************************************
3351 *
3352 *  Allocate memory for the transmit and receive rings, and then
3353 *  the descriptors associated with each, called only once at attach.
3354 *
3355 **********************************************************************/
3356static int
3357igb_allocate_queues(struct adapter *adapter)
3358{
3359	device_t dev = adapter->dev;
3360	struct igb_queue	*que = NULL;
3361	struct tx_ring		*txr = NULL;
3362	struct rx_ring		*rxr = NULL;
3363	int rsize, tsize, error = E1000_SUCCESS;
3364	int txconf = 0, rxconf = 0;
3365
3366	/* First allocate the top level queue structs */
3367	if (!(adapter->queues =
3368	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3369	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3370		device_printf(dev, "Unable to allocate queue memory\n");
3371		error = ENOMEM;
3372		goto fail;
3373	}
3374
3375	/* Next allocate the TX ring struct memory */
3376	if (!(adapter->tx_rings =
3377	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3378	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3379		device_printf(dev, "Unable to allocate TX ring memory\n");
3380		error = ENOMEM;
3381		goto tx_fail;
3382	}
3383
3384	/* Now allocate the RX */
3385	if (!(adapter->rx_rings =
3386	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3387	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3388		device_printf(dev, "Unable to allocate RX ring memory\n");
3389		error = ENOMEM;
3390		goto rx_fail;
3391	}
3392
3393	tsize = roundup2(adapter->num_tx_desc *
3394	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3395	/*
3396	 * Now set up the TX queues, txconf is needed to handle the
3397	 * possibility that things fail midcourse and we need to
3398	 * undo memory gracefully
3399	 */
3400	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3401		/* Set up some basics */
3402		txr = &adapter->tx_rings[i];
3403		txr->adapter = adapter;
3404		txr->me = i;
3405		txr->num_desc = adapter->num_tx_desc;
3406
3407		/* Initialize the TX lock */
3408		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3409		    device_get_nameunit(dev), txr->me);
3410		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3411
3412		if (igb_dma_malloc(adapter, tsize,
3413			&txr->txdma, BUS_DMA_NOWAIT)) {
3414			device_printf(dev,
3415			    "Unable to allocate TX Descriptor memory\n");
3416			error = ENOMEM;
3417			goto err_tx_desc;
3418		}
3419		txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3420		bzero((void *)txr->tx_base, tsize);
3421
3422        	/* Now allocate transmit buffers for the ring */
3423        	if (igb_allocate_transmit_buffers(txr)) {
3424			device_printf(dev,
3425			    "Critical Failure setting up transmit buffers\n");
3426			error = ENOMEM;
3427			goto err_tx_desc;
3428        	}
3429#ifndef IGB_LEGACY_TX
3430		/* Allocate a buf ring */
3431		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3432		    M_WAITOK, &txr->tx_mtx);
3433#endif
3434	}
3435
3436	/*
3437	 * Next the RX queues...
3438	 */
3439	rsize = roundup2(adapter->num_rx_desc *
3440	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3441	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3442		rxr = &adapter->rx_rings[i];
3443		rxr->adapter = adapter;
3444		rxr->me = i;
3445
3446		/* Initialize the RX lock */
3447		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3448		    device_get_nameunit(dev), txr->me);
3449		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3450
3451		if (igb_dma_malloc(adapter, rsize,
3452			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3453			device_printf(dev,
3454			    "Unable to allocate RxDescriptor memory\n");
3455			error = ENOMEM;
3456			goto err_rx_desc;
3457		}
3458		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3459		bzero((void *)rxr->rx_base, rsize);
3460
3461        	/* Allocate receive buffers for the ring*/
3462		if (igb_allocate_receive_buffers(rxr)) {
3463			device_printf(dev,
3464			    "Critical Failure setting up receive buffers\n");
3465			error = ENOMEM;
3466			goto err_rx_desc;
3467		}
3468	}
3469
3470	/*
3471	** Finally set up the queue holding structs
3472	*/
3473	for (int i = 0; i < adapter->num_queues; i++) {
3474		que = &adapter->queues[i];
3475		que->adapter = adapter;
3476		que->txr = &adapter->tx_rings[i];
3477		que->rxr = &adapter->rx_rings[i];
3478	}
3479
3480	return (0);
3481
3482err_rx_desc:
3483	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3484		igb_dma_free(adapter, &rxr->rxdma);
3485err_tx_desc:
3486	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3487		igb_dma_free(adapter, &txr->txdma);
3488	free(adapter->rx_rings, M_DEVBUF);
3489rx_fail:
3490#ifndef IGB_LEGACY_TX
3491	buf_ring_free(txr->br, M_DEVBUF);
3492#endif
3493	free(adapter->tx_rings, M_DEVBUF);
3494tx_fail:
3495	free(adapter->queues, M_DEVBUF);
3496fail:
3497	return (error);
3498}
3499
3500/*********************************************************************
3501 *
3502 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3503 *  the information needed to transmit a packet on the wire. This is
3504 *  called only once at attach, setup is done every reset.
3505 *
3506 **********************************************************************/
3507static int
3508igb_allocate_transmit_buffers(struct tx_ring *txr)
3509{
3510	struct adapter *adapter = txr->adapter;
3511	device_t dev = adapter->dev;
3512	struct igb_tx_buf *txbuf;
3513	int error, i;
3514
3515	/*
3516	 * Setup DMA descriptor areas.
3517	 */
3518	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3519			       1, 0,			/* alignment, bounds */
3520			       BUS_SPACE_MAXADDR,	/* lowaddr */
3521			       BUS_SPACE_MAXADDR,	/* highaddr */
3522			       NULL, NULL,		/* filter, filterarg */
3523			       IGB_TSO_SIZE,		/* maxsize */
3524			       IGB_MAX_SCATTER,		/* nsegments */
3525			       PAGE_SIZE,		/* maxsegsize */
3526			       0,			/* flags */
3527			       NULL,			/* lockfunc */
3528			       NULL,			/* lockfuncarg */
3529			       &txr->txtag))) {
3530		device_printf(dev,"Unable to allocate TX DMA tag\n");
3531		goto fail;
3532	}
3533
3534	if (!(txr->tx_buffers =
3535	    (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3536	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3537		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3538		error = ENOMEM;
3539		goto fail;
3540	}
3541
3542        /* Create the descriptor buffer dma maps */
3543	txbuf = txr->tx_buffers;
3544	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3545		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3546		if (error != 0) {
3547			device_printf(dev, "Unable to create TX DMA map\n");
3548			goto fail;
3549		}
3550	}
3551
3552	return 0;
3553fail:
3554	/* We free all, it handles case where we are in the middle */
3555	igb_free_transmit_structures(adapter);
3556	return (error);
3557}
3558
3559/*********************************************************************
3560 *
3561 *  Initialize a transmit ring.
3562 *
3563 **********************************************************************/
3564static void
3565igb_setup_transmit_ring(struct tx_ring *txr)
3566{
3567	struct adapter *adapter = txr->adapter;
3568	struct igb_tx_buf *txbuf;
3569	int i;
3570#ifdef DEV_NETMAP
3571	struct netmap_adapter *na = NA(adapter->ifp);
3572	struct netmap_slot *slot;
3573#endif /* DEV_NETMAP */
3574
3575	/* Clear the old descriptor contents */
3576	IGB_TX_LOCK(txr);
3577#ifdef DEV_NETMAP
3578	slot = netmap_reset(na, NR_TX, txr->me, 0);
3579#endif /* DEV_NETMAP */
3580	bzero((void *)txr->tx_base,
3581	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3582	/* Reset indices */
3583	txr->next_avail_desc = 0;
3584	txr->next_to_clean = 0;
3585
3586	/* Free any existing tx buffers. */
3587        txbuf = txr->tx_buffers;
3588	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3589		if (txbuf->m_head != NULL) {
3590			bus_dmamap_sync(txr->txtag, txbuf->map,
3591			    BUS_DMASYNC_POSTWRITE);
3592			bus_dmamap_unload(txr->txtag, txbuf->map);
3593			m_freem(txbuf->m_head);
3594			txbuf->m_head = NULL;
3595		}
3596#ifdef DEV_NETMAP
3597		if (slot) {
3598			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3599			/* no need to set the address */
3600			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3601		}
3602#endif /* DEV_NETMAP */
3603		/* clear the watch index */
3604		txbuf->eop = NULL;
3605        }
3606
3607	/* Set number of descriptors available */
3608	txr->tx_avail = adapter->num_tx_desc;
3609
3610	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3611	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3612	IGB_TX_UNLOCK(txr);
3613}
3614
3615/*********************************************************************
3616 *
3617 *  Initialize all transmit rings.
3618 *
3619 **********************************************************************/
3620static void
3621igb_setup_transmit_structures(struct adapter *adapter)
3622{
3623	struct tx_ring *txr = adapter->tx_rings;
3624
3625	for (int i = 0; i < adapter->num_queues; i++, txr++)
3626		igb_setup_transmit_ring(txr);
3627
3628	return;
3629}
3630
3631/*********************************************************************
3632 *
3633 *  Enable transmit unit.
3634 *
3635 **********************************************************************/
3636static void
3637igb_initialize_transmit_units(struct adapter *adapter)
3638{
3639	struct tx_ring	*txr = adapter->tx_rings;
3640	struct e1000_hw *hw = &adapter->hw;
3641	u32		tctl, txdctl;
3642
3643	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3644	tctl = txdctl = 0;
3645
3646	/* Setup the Tx Descriptor Rings */
3647	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3648		u64 bus_addr = txr->txdma.dma_paddr;
3649
3650		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3651		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3652		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3653		    (uint32_t)(bus_addr >> 32));
3654		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3655		    (uint32_t)bus_addr);
3656
3657		/* Setup the HW Tx Head and Tail descriptor pointers */
3658		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3659		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3660
3661		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3662		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3663		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3664
3665		txr->queue_status = IGB_QUEUE_IDLE;
3666
3667		txdctl |= IGB_TX_PTHRESH;
3668		txdctl |= IGB_TX_HTHRESH << 8;
3669		txdctl |= IGB_TX_WTHRESH << 16;
3670		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3671		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3672	}
3673
3674	if (adapter->vf_ifp)
3675		return;
3676
3677	e1000_config_collision_dist(hw);
3678
3679	/* Program the Transmit Control Register */
3680	tctl = E1000_READ_REG(hw, E1000_TCTL);
3681	tctl &= ~E1000_TCTL_CT;
3682	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3683		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3684
3685	/* This write will effectively turn on the transmit unit. */
3686	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3687}
3688
3689/*********************************************************************
3690 *
3691 *  Free all transmit rings.
3692 *
3693 **********************************************************************/
3694static void
3695igb_free_transmit_structures(struct adapter *adapter)
3696{
3697	struct tx_ring *txr = adapter->tx_rings;
3698
3699	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3700		IGB_TX_LOCK(txr);
3701		igb_free_transmit_buffers(txr);
3702		igb_dma_free(adapter, &txr->txdma);
3703		IGB_TX_UNLOCK(txr);
3704		IGB_TX_LOCK_DESTROY(txr);
3705	}
3706	free(adapter->tx_rings, M_DEVBUF);
3707}
3708
3709/*********************************************************************
3710 *
3711 *  Free transmit ring related data structures.
3712 *
3713 **********************************************************************/
3714static void
3715igb_free_transmit_buffers(struct tx_ring *txr)
3716{
3717	struct adapter *adapter = txr->adapter;
3718	struct igb_tx_buf *tx_buffer;
3719	int             i;
3720
3721	INIT_DEBUGOUT("free_transmit_ring: begin");
3722
3723	if (txr->tx_buffers == NULL)
3724		return;
3725
3726	tx_buffer = txr->tx_buffers;
3727	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3728		if (tx_buffer->m_head != NULL) {
3729			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3730			    BUS_DMASYNC_POSTWRITE);
3731			bus_dmamap_unload(txr->txtag,
3732			    tx_buffer->map);
3733			m_freem(tx_buffer->m_head);
3734			tx_buffer->m_head = NULL;
3735			if (tx_buffer->map != NULL) {
3736				bus_dmamap_destroy(txr->txtag,
3737				    tx_buffer->map);
3738				tx_buffer->map = NULL;
3739			}
3740		} else if (tx_buffer->map != NULL) {
3741			bus_dmamap_unload(txr->txtag,
3742			    tx_buffer->map);
3743			bus_dmamap_destroy(txr->txtag,
3744			    tx_buffer->map);
3745			tx_buffer->map = NULL;
3746		}
3747	}
3748#ifndef IGB_LEGACY_TX
3749	if (txr->br != NULL)
3750		buf_ring_free(txr->br, M_DEVBUF);
3751#endif
3752	if (txr->tx_buffers != NULL) {
3753		free(txr->tx_buffers, M_DEVBUF);
3754		txr->tx_buffers = NULL;
3755	}
3756	if (txr->txtag != NULL) {
3757		bus_dma_tag_destroy(txr->txtag);
3758		txr->txtag = NULL;
3759	}
3760	return;
3761}
3762
3763/**********************************************************************
3764 *
3765 *  Setup work for hardware segmentation offload (TSO) on
3766 *  adapters using advanced tx descriptors
3767 *
3768 **********************************************************************/
3769static int
3770igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3771    u32 *cmd_type_len, u32 *olinfo_status)
3772{
3773	struct adapter *adapter = txr->adapter;
3774	struct e1000_adv_tx_context_desc *TXD;
3775	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3776	u32 mss_l4len_idx = 0, paylen;
3777	u16 vtag = 0, eh_type;
3778	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3779	struct ether_vlan_header *eh;
3780#ifdef INET6
3781	struct ip6_hdr *ip6;
3782#endif
3783#ifdef INET
3784	struct ip *ip;
3785#endif
3786	struct tcphdr *th;
3787
3788
3789	/*
3790	 * Determine where frame payload starts.
3791	 * Jump over vlan headers if already present
3792	 */
3793	eh = mtod(mp, struct ether_vlan_header *);
3794	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3795		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3796		eh_type = eh->evl_proto;
3797	} else {
3798		ehdrlen = ETHER_HDR_LEN;
3799		eh_type = eh->evl_encap_proto;
3800	}
3801
3802	switch (ntohs(eh_type)) {
3803#ifdef INET6
3804	case ETHERTYPE_IPV6:
3805		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3806		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
3807		if (ip6->ip6_nxt != IPPROTO_TCP)
3808			return (ENXIO);
3809		ip_hlen = sizeof(struct ip6_hdr);
3810		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3811		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3812		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3813		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3814		break;
3815#endif
3816#ifdef INET
3817	case ETHERTYPE_IP:
3818		ip = (struct ip *)(mp->m_data + ehdrlen);
3819		if (ip->ip_p != IPPROTO_TCP)
3820			return (ENXIO);
3821		ip->ip_sum = 0;
3822		ip_hlen = ip->ip_hl << 2;
3823		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3824		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3825		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3826		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3827		/* Tell transmit desc to also do IPv4 checksum. */
3828		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3829		break;
3830#endif
3831	default:
3832		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3833		    __func__, ntohs(eh_type));
3834		break;
3835	}
3836
3837	ctxd = txr->next_avail_desc;
3838	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3839
3840	tcp_hlen = th->th_off << 2;
3841
3842	/* This is used in the transmit desc in encap */
3843	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3844
3845	/* VLAN MACLEN IPLEN */
3846	if (mp->m_flags & M_VLANTAG) {
3847		vtag = htole16(mp->m_pkthdr.ether_vtag);
3848                vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3849	}
3850
3851	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3852	vlan_macip_lens |= ip_hlen;
3853	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3854
3855	/* ADV DTYPE TUCMD */
3856	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3857	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3858	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3859
3860	/* MSS L4LEN IDX */
3861	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3862	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3863	/* 82575 needs the queue index added */
3864	if (adapter->hw.mac.type == e1000_82575)
3865		mss_l4len_idx |= txr->me << 4;
3866	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3867
3868	TXD->seqnum_seed = htole32(0);
3869
3870	if (++ctxd == txr->num_desc)
3871		ctxd = 0;
3872
3873	txr->tx_avail--;
3874	txr->next_avail_desc = ctxd;
3875	*cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3876	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3877	*olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3878	++txr->tso_tx;
3879	return (0);
3880}
3881
3882/*********************************************************************
3883 *
3884 *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3885 *
3886 **********************************************************************/
3887
3888static int
3889igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3890    u32 *cmd_type_len, u32 *olinfo_status)
3891{
3892	struct e1000_adv_tx_context_desc *TXD;
3893	struct adapter *adapter = txr->adapter;
3894	struct ether_vlan_header *eh;
3895	struct ip *ip;
3896	struct ip6_hdr *ip6;
3897	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3898	int	ehdrlen, ip_hlen = 0;
3899	u16	etype;
3900	u8	ipproto = 0;
3901	int	offload = TRUE;
3902	int	ctxd = txr->next_avail_desc;
3903	u16	vtag = 0;
3904
3905	/* First check if TSO is to be used */
3906	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3907		return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3908
3909	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3910		offload = FALSE;
3911
3912	/* Indicate the whole packet as payload when not doing TSO */
3913       	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3914
3915	/* Now ready a context descriptor */
3916	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3917
3918	/*
3919	** In advanced descriptors the vlan tag must
3920	** be placed into the context descriptor. Hence
3921	** we need to make one even if not doing offloads.
3922	*/
3923	if (mp->m_flags & M_VLANTAG) {
3924		vtag = htole16(mp->m_pkthdr.ether_vtag);
3925		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3926	} else if (offload == FALSE) /* ... no offload to do */
3927		return (0);
3928
3929	/*
3930	 * Determine where frame payload starts.
3931	 * Jump over vlan headers if already present,
3932	 * helpful for QinQ too.
3933	 */
3934	eh = mtod(mp, struct ether_vlan_header *);
3935	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3936		etype = ntohs(eh->evl_proto);
3937		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3938	} else {
3939		etype = ntohs(eh->evl_encap_proto);
3940		ehdrlen = ETHER_HDR_LEN;
3941	}
3942
3943	/* Set the ether header length */
3944	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3945
3946	switch (etype) {
3947		case ETHERTYPE_IP:
3948			ip = (struct ip *)(mp->m_data + ehdrlen);
3949			ip_hlen = ip->ip_hl << 2;
3950			ipproto = ip->ip_p;
3951			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3952			break;
3953		case ETHERTYPE_IPV6:
3954			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3955			ip_hlen = sizeof(struct ip6_hdr);
3956			/* XXX-BZ this will go badly in case of ext hdrs. */
3957			ipproto = ip6->ip6_nxt;
3958			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3959			break;
3960		default:
3961			offload = FALSE;
3962			break;
3963	}
3964
3965	vlan_macip_lens |= ip_hlen;
3966	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3967
3968	switch (ipproto) {
3969		case IPPROTO_TCP:
3970#if __FreeBSD_version >= 1000000
3971			if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
3972#else
3973			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3974#endif
3975				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3976			break;
3977		case IPPROTO_UDP:
3978#if __FreeBSD_version >= 1000000
3979			if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
3980#else
3981			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3982#endif
3983				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3984			break;
3985
3986#if __FreeBSD_version >= 800000
3987		case IPPROTO_SCTP:
3988#if __FreeBSD_version >= 1000000
3989			if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
3990#else
3991			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3992#endif
3993				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3994			break;
3995#endif
3996		default:
3997			offload = FALSE;
3998			break;
3999	}
4000
4001	if (offload) /* For the TX descriptor setup */
4002		*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4003
4004	/* 82575 needs the queue index added */
4005	if (adapter->hw.mac.type == e1000_82575)
4006		mss_l4len_idx = txr->me << 4;
4007
4008	/* Now copy bits into descriptor */
4009	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
4010	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
4011	TXD->seqnum_seed = htole32(0);
4012	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
4013
4014	/* We've consumed the first desc, adjust counters */
4015	if (++ctxd == txr->num_desc)
4016		ctxd = 0;
4017	txr->next_avail_desc = ctxd;
4018	--txr->tx_avail;
4019
4020        return (0);
4021}
4022
4023/**********************************************************************
4024 *
4025 *  Examine each tx_buffer in the used queue. If the hardware is done
4026 *  processing the packet then free associated resources. The
4027 *  tx_buffer is put back on the free queue.
4028 *
4029 *  TRUE return means there's work in the ring to clean, FALSE its empty.
4030 **********************************************************************/
4031static bool
4032igb_txeof(struct tx_ring *txr)
4033{
4034	struct adapter		*adapter = txr->adapter;
4035#ifdef DEV_NETMAP
4036	struct ifnet		*ifp = adapter->ifp;
4037#endif /* DEV_NETMAP */
4038	u32			work, processed = 0;
4039	int			limit = adapter->tx_process_limit;
4040	struct igb_tx_buf	*buf;
4041	union e1000_adv_tx_desc *txd;
4042
4043	mtx_assert(&txr->tx_mtx, MA_OWNED);
4044
4045#ifdef DEV_NETMAP
4046	if (netmap_tx_irq(ifp, txr->me))
4047		return (FALSE);
4048#endif /* DEV_NETMAP */
4049
4050	if (txr->tx_avail == txr->num_desc) {
4051		txr->queue_status = IGB_QUEUE_IDLE;
4052		return FALSE;
4053	}
4054
4055	/* Get work starting point */
4056	work = txr->next_to_clean;
4057	buf = &txr->tx_buffers[work];
4058	txd = &txr->tx_base[work];
4059	work -= txr->num_desc; /* The distance to ring end */
4060        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4061            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4062	do {
4063		union e1000_adv_tx_desc *eop = buf->eop;
4064		if (eop == NULL) /* No work */
4065			break;
4066
4067		if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4068			break;	/* I/O not complete */
4069
4070		if (buf->m_head) {
4071			txr->bytes +=
4072			    buf->m_head->m_pkthdr.len;
4073			bus_dmamap_sync(txr->txtag,
4074			    buf->map,
4075			    BUS_DMASYNC_POSTWRITE);
4076			bus_dmamap_unload(txr->txtag,
4077			    buf->map);
4078			m_freem(buf->m_head);
4079			buf->m_head = NULL;
4080		}
4081		buf->eop = NULL;
4082		++txr->tx_avail;
4083
4084		/* We clean the range if multi segment */
4085		while (txd != eop) {
4086			++txd;
4087			++buf;
4088			++work;
4089			/* wrap the ring? */
4090			if (__predict_false(!work)) {
4091				work -= txr->num_desc;
4092				buf = txr->tx_buffers;
4093				txd = txr->tx_base;
4094			}
4095			if (buf->m_head) {
4096				txr->bytes +=
4097				    buf->m_head->m_pkthdr.len;
4098				bus_dmamap_sync(txr->txtag,
4099				    buf->map,
4100				    BUS_DMASYNC_POSTWRITE);
4101				bus_dmamap_unload(txr->txtag,
4102				    buf->map);
4103				m_freem(buf->m_head);
4104				buf->m_head = NULL;
4105			}
4106			++txr->tx_avail;
4107			buf->eop = NULL;
4108
4109		}
4110		++txr->packets;
4111		++processed;
4112		txr->watchdog_time = ticks;
4113
4114		/* Try the next packet */
4115		++txd;
4116		++buf;
4117		++work;
4118		/* reset with a wrap */
4119		if (__predict_false(!work)) {
4120			work -= txr->num_desc;
4121			buf = txr->tx_buffers;
4122			txd = txr->tx_base;
4123		}
4124		prefetch(txd);
4125	} while (__predict_true(--limit));
4126
4127	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4128	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4129
4130	work += txr->num_desc;
4131	txr->next_to_clean = work;
4132
4133	/*
4134	** Watchdog calculation, we know there's
4135	** work outstanding or the first return
4136	** would have been taken, so none processed
4137	** for too long indicates a hang.
4138	*/
4139	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4140		txr->queue_status |= IGB_QUEUE_HUNG;
4141
4142	if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4143		txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4144
4145	if (txr->tx_avail == txr->num_desc) {
4146		txr->queue_status = IGB_QUEUE_IDLE;
4147		return (FALSE);
4148	}
4149
4150	return (TRUE);
4151}
4152
4153/*********************************************************************
4154 *
4155 *  Refresh mbuf buffers for RX descriptor rings
4156 *   - now keeps its own state so discards due to resource
4157 *     exhaustion are unnecessary, if an mbuf cannot be obtained
4158 *     it just returns, keeping its placeholder, thus it can simply
4159 *     be recalled to try again.
4160 *
4161 **********************************************************************/
4162static void
4163igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4164{
4165	struct adapter		*adapter = rxr->adapter;
4166	bus_dma_segment_t	hseg[1];
4167	bus_dma_segment_t	pseg[1];
4168	struct igb_rx_buf	*rxbuf;
4169	struct mbuf		*mh, *mp;
4170	int			i, j, nsegs, error;
4171	bool			refreshed = FALSE;
4172
4173	i = j = rxr->next_to_refresh;
4174	/*
4175	** Get one descriptor beyond
4176	** our work mark to control
4177	** the loop.
4178        */
4179	if (++j == adapter->num_rx_desc)
4180		j = 0;
4181
4182	while (j != limit) {
4183		rxbuf = &rxr->rx_buffers[i];
4184		/* No hdr mbuf used with header split off */
4185		if (rxr->hdr_split == FALSE)
4186			goto no_split;
4187		if (rxbuf->m_head == NULL) {
4188			mh = m_gethdr(M_NOWAIT, MT_DATA);
4189			if (mh == NULL)
4190				goto update;
4191		} else
4192			mh = rxbuf->m_head;
4193
4194		mh->m_pkthdr.len = mh->m_len = MHLEN;
4195		mh->m_len = MHLEN;
4196		mh->m_flags |= M_PKTHDR;
4197		/* Get the memory mapping */
4198		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4199		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4200		if (error != 0) {
4201			printf("Refresh mbufs: hdr dmamap load"
4202			    " failure - %d\n", error);
4203			m_free(mh);
4204			rxbuf->m_head = NULL;
4205			goto update;
4206		}
4207		rxbuf->m_head = mh;
4208		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4209		    BUS_DMASYNC_PREREAD);
4210		rxr->rx_base[i].read.hdr_addr =
4211		    htole64(hseg[0].ds_addr);
4212no_split:
4213		if (rxbuf->m_pack == NULL) {
4214			mp = m_getjcl(M_NOWAIT, MT_DATA,
4215			    M_PKTHDR, adapter->rx_mbuf_sz);
4216			if (mp == NULL)
4217				goto update;
4218		} else
4219			mp = rxbuf->m_pack;
4220
4221		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4222		/* Get the memory mapping */
4223		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4224		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4225		if (error != 0) {
4226			printf("Refresh mbufs: payload dmamap load"
4227			    " failure - %d\n", error);
4228			m_free(mp);
4229			rxbuf->m_pack = NULL;
4230			goto update;
4231		}
4232		rxbuf->m_pack = mp;
4233		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4234		    BUS_DMASYNC_PREREAD);
4235		rxr->rx_base[i].read.pkt_addr =
4236		    htole64(pseg[0].ds_addr);
4237		refreshed = TRUE; /* I feel wefreshed :) */
4238
4239		i = j; /* our next is precalculated */
4240		rxr->next_to_refresh = i;
4241		if (++j == adapter->num_rx_desc)
4242			j = 0;
4243	}
4244update:
4245	if (refreshed) /* update tail */
4246		E1000_WRITE_REG(&adapter->hw,
4247		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4248	return;
4249}
4250
4251
4252/*********************************************************************
4253 *
4254 *  Allocate memory for rx_buffer structures. Since we use one
4255 *  rx_buffer per received packet, the maximum number of rx_buffer's
4256 *  that we'll need is equal to the number of receive descriptors
4257 *  that we've allocated.
4258 *
4259 **********************************************************************/
4260static int
4261igb_allocate_receive_buffers(struct rx_ring *rxr)
4262{
4263	struct	adapter 	*adapter = rxr->adapter;
4264	device_t 		dev = adapter->dev;
4265	struct igb_rx_buf	*rxbuf;
4266	int             	i, bsize, error;
4267
4268	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4269	if (!(rxr->rx_buffers =
4270	    (struct igb_rx_buf *) malloc(bsize,
4271	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4272		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4273		error = ENOMEM;
4274		goto fail;
4275	}
4276
4277	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4278				   1, 0,		/* alignment, bounds */
4279				   BUS_SPACE_MAXADDR,	/* lowaddr */
4280				   BUS_SPACE_MAXADDR,	/* highaddr */
4281				   NULL, NULL,		/* filter, filterarg */
4282				   MSIZE,		/* maxsize */
4283				   1,			/* nsegments */
4284				   MSIZE,		/* maxsegsize */
4285				   0,			/* flags */
4286				   NULL,		/* lockfunc */
4287				   NULL,		/* lockfuncarg */
4288				   &rxr->htag))) {
4289		device_printf(dev, "Unable to create RX DMA tag\n");
4290		goto fail;
4291	}
4292
4293	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4294				   1, 0,		/* alignment, bounds */
4295				   BUS_SPACE_MAXADDR,	/* lowaddr */
4296				   BUS_SPACE_MAXADDR,	/* highaddr */
4297				   NULL, NULL,		/* filter, filterarg */
4298				   MJUM9BYTES,		/* maxsize */
4299				   1,			/* nsegments */
4300				   MJUM9BYTES,		/* maxsegsize */
4301				   0,			/* flags */
4302				   NULL,		/* lockfunc */
4303				   NULL,		/* lockfuncarg */
4304				   &rxr->ptag))) {
4305		device_printf(dev, "Unable to create RX payload DMA tag\n");
4306		goto fail;
4307	}
4308
4309	for (i = 0; i < adapter->num_rx_desc; i++) {
4310		rxbuf = &rxr->rx_buffers[i];
4311		error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4312		if (error) {
4313			device_printf(dev,
4314			    "Unable to create RX head DMA maps\n");
4315			goto fail;
4316		}
4317		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4318		if (error) {
4319			device_printf(dev,
4320			    "Unable to create RX packet DMA maps\n");
4321			goto fail;
4322		}
4323	}
4324
4325	return (0);
4326
4327fail:
4328	/* Frees all, but can handle partial completion */
4329	igb_free_receive_structures(adapter);
4330	return (error);
4331}
4332
4333
4334static void
4335igb_free_receive_ring(struct rx_ring *rxr)
4336{
4337	struct	adapter		*adapter = rxr->adapter;
4338	struct igb_rx_buf	*rxbuf;
4339
4340
4341	for (int i = 0; i < adapter->num_rx_desc; i++) {
4342		rxbuf = &rxr->rx_buffers[i];
4343		if (rxbuf->m_head != NULL) {
4344			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4345			    BUS_DMASYNC_POSTREAD);
4346			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4347			rxbuf->m_head->m_flags |= M_PKTHDR;
4348			m_freem(rxbuf->m_head);
4349		}
4350		if (rxbuf->m_pack != NULL) {
4351			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4352			    BUS_DMASYNC_POSTREAD);
4353			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4354			rxbuf->m_pack->m_flags |= M_PKTHDR;
4355			m_freem(rxbuf->m_pack);
4356		}
4357		rxbuf->m_head = NULL;
4358		rxbuf->m_pack = NULL;
4359	}
4360}
4361
4362
4363/*********************************************************************
4364 *
4365 *  Initialize a receive ring and its buffers.
4366 *
4367 **********************************************************************/
4368static int
4369igb_setup_receive_ring(struct rx_ring *rxr)
4370{
4371	struct	adapter		*adapter;
4372	struct  ifnet		*ifp;
4373	device_t		dev;
4374	struct igb_rx_buf	*rxbuf;
4375	bus_dma_segment_t	pseg[1], hseg[1];
4376	struct lro_ctrl		*lro = &rxr->lro;
4377	int			rsize, nsegs, error = 0;
4378#ifdef DEV_NETMAP
4379	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4380	struct netmap_slot *slot;
4381#endif /* DEV_NETMAP */
4382
4383	adapter = rxr->adapter;
4384	dev = adapter->dev;
4385	ifp = adapter->ifp;
4386
4387	/* Clear the ring contents */
4388	IGB_RX_LOCK(rxr);
4389#ifdef DEV_NETMAP
4390	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4391#endif /* DEV_NETMAP */
4392	rsize = roundup2(adapter->num_rx_desc *
4393	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4394	bzero((void *)rxr->rx_base, rsize);
4395
4396	/*
4397	** Free current RX buffer structures and their mbufs
4398	*/
4399	igb_free_receive_ring(rxr);
4400
4401	/* Configure for header split? */
4402	if (igb_header_split)
4403		rxr->hdr_split = TRUE;
4404
4405        /* Now replenish the ring mbufs */
4406	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4407		struct mbuf	*mh, *mp;
4408
4409		rxbuf = &rxr->rx_buffers[j];
4410#ifdef DEV_NETMAP
4411		if (slot) {
4412			/* slot sj is mapped to the j-th NIC-ring entry */
4413			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4414			uint64_t paddr;
4415			void *addr;
4416
4417			addr = PNMB(na, slot + sj, &paddr);
4418			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4419			/* Update descriptor */
4420			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4421			continue;
4422		}
4423#endif /* DEV_NETMAP */
4424		if (rxr->hdr_split == FALSE)
4425			goto skip_head;
4426
4427		/* First the header */
4428		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4429		if (rxbuf->m_head == NULL) {
4430			error = ENOBUFS;
4431                        goto fail;
4432		}
4433		m_adj(rxbuf->m_head, ETHER_ALIGN);
4434		mh = rxbuf->m_head;
4435		mh->m_len = mh->m_pkthdr.len = MHLEN;
4436		mh->m_flags |= M_PKTHDR;
4437		/* Get the memory mapping */
4438		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4439		    rxbuf->hmap, rxbuf->m_head, hseg,
4440		    &nsegs, BUS_DMA_NOWAIT);
4441		if (error != 0) /* Nothing elegant to do here */
4442                        goto fail;
4443		bus_dmamap_sync(rxr->htag,
4444		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4445		/* Update descriptor */
4446		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4447
4448skip_head:
4449		/* Now the payload cluster */
4450		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4451		    M_PKTHDR, adapter->rx_mbuf_sz);
4452		if (rxbuf->m_pack == NULL) {
4453			error = ENOBUFS;
4454                        goto fail;
4455		}
4456		mp = rxbuf->m_pack;
4457		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4458		/* Get the memory mapping */
4459		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4460		    rxbuf->pmap, mp, pseg,
4461		    &nsegs, BUS_DMA_NOWAIT);
4462		if (error != 0)
4463                        goto fail;
4464		bus_dmamap_sync(rxr->ptag,
4465		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4466		/* Update descriptor */
4467		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4468        }
4469
4470	/* Setup our descriptor indices */
4471	rxr->next_to_check = 0;
4472	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4473	rxr->lro_enabled = FALSE;
4474	rxr->rx_split_packets = 0;
4475	rxr->rx_bytes = 0;
4476
4477	rxr->fmp = NULL;
4478	rxr->lmp = NULL;
4479
4480	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4481	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4482
4483	/*
4484	** Now set up the LRO interface, we
4485	** also only do head split when LRO
4486	** is enabled, since so often they
4487	** are undesirable in similar setups.
4488	*/
4489	if (ifp->if_capenable & IFCAP_LRO) {
4490		error = tcp_lro_init(lro);
4491		if (error) {
4492			device_printf(dev, "LRO Initialization failed!\n");
4493			goto fail;
4494		}
4495		INIT_DEBUGOUT("RX LRO Initialized\n");
4496		rxr->lro_enabled = TRUE;
4497		lro->ifp = adapter->ifp;
4498	}
4499
4500	IGB_RX_UNLOCK(rxr);
4501	return (0);
4502
4503fail:
4504	igb_free_receive_ring(rxr);
4505	IGB_RX_UNLOCK(rxr);
4506	return (error);
4507}
4508
4509
4510/*********************************************************************
4511 *
4512 *  Initialize all receive rings.
4513 *
4514 **********************************************************************/
4515static int
4516igb_setup_receive_structures(struct adapter *adapter)
4517{
4518	struct rx_ring *rxr = adapter->rx_rings;
4519	int i;
4520
4521	for (i = 0; i < adapter->num_queues; i++, rxr++)
4522		if (igb_setup_receive_ring(rxr))
4523			goto fail;
4524
4525	return (0);
4526fail:
4527	/*
4528	 * Free RX buffers allocated so far, we will only handle
4529	 * the rings that completed, the failing case will have
4530	 * cleaned up for itself. 'i' is the endpoint.
4531	 */
4532	for (int j = 0; j < i; ++j) {
4533		rxr = &adapter->rx_rings[j];
4534		IGB_RX_LOCK(rxr);
4535		igb_free_receive_ring(rxr);
4536		IGB_RX_UNLOCK(rxr);
4537	}
4538
4539	return (ENOBUFS);
4540}
4541
4542/*
4543 * Initialise the RSS mapping for NICs that support multiple transmit/
4544 * receive rings.
4545 */
4546static void
4547igb_initialise_rss_mapping(struct adapter *adapter)
4548{
4549	struct e1000_hw *hw = &adapter->hw;
4550	int i;
4551	int queue_id;
4552	u32 reta;
4553	u32 rss_key[10], mrqc, shift = 0;
4554
4555	/* XXX? */
4556	if (adapter->hw.mac.type == e1000_82575)
4557		shift = 6;
4558
4559	/*
4560	 * The redirection table controls which destination
4561	 * queue each bucket redirects traffic to.
4562	 * Each DWORD represents four queues, with the LSB
4563	 * being the first queue in the DWORD.
4564	 *
4565	 * This just allocates buckets to queues using round-robin
4566	 * allocation.
4567	 *
4568	 * NOTE: It Just Happens to line up with the default
4569	 * RSS allocation method.
4570	 */
4571
4572	/* Warning FM follows */
4573	reta = 0;
4574	for (i = 0; i < 128; i++) {
4575#ifdef	RSS
4576		queue_id = rss_get_indirection_to_bucket(i);
4577		/*
4578		 * If we have more queues than buckets, we'll
4579		 * end up mapping buckets to a subset of the
4580		 * queues.
4581		 *
4582		 * If we have more buckets than queues, we'll
4583		 * end up instead assigning multiple buckets
4584		 * to queues.
4585		 *
4586		 * Both are suboptimal, but we need to handle
4587		 * the case so we don't go out of bounds
4588		 * indexing arrays and such.
4589		 */
4590		queue_id = queue_id % adapter->num_queues;
4591#else
4592		queue_id = (i % adapter->num_queues);
4593#endif
4594		/* Adjust if required */
4595		queue_id = queue_id << shift;
4596
4597		/*
4598		 * The low 8 bits are for hash value (n+0);
4599		 * The next 8 bits are for hash value (n+1), etc.
4600		 */
4601		reta = reta >> 8;
4602		reta = reta | ( ((uint32_t) queue_id) << 24);
4603		if ((i & 3) == 3) {
4604			E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4605			reta = 0;
4606		}
4607	}
4608
4609	/* Now fill in hash table */
4610
4611	/*
4612	 * MRQC: Multiple Receive Queues Command
4613	 * Set queuing to RSS control, number depends on the device.
4614	 */
4615	mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4616
4617#ifdef	RSS
4618	/* XXX ew typecasting */
4619	rss_getkey((uint8_t *) &rss_key);
4620#else
4621	arc4rand(&rss_key, sizeof(rss_key), 0);
4622#endif
4623	for (i = 0; i < 10; i++)
4624		E1000_WRITE_REG_ARRAY(hw,
4625		    E1000_RSSRK(0), i, rss_key[i]);
4626
4627	/*
4628	 * Configure the RSS fields to hash upon.
4629	 */
4630	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4631	    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4632	mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4633	    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4634	mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4635	    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4636	mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4637	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4638
4639	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4640}
4641
4642/*********************************************************************
4643 *
4644 *  Enable receive unit.
4645 *
4646 **********************************************************************/
4647static void
4648igb_initialize_receive_units(struct adapter *adapter)
4649{
4650	struct rx_ring	*rxr = adapter->rx_rings;
4651	struct ifnet	*ifp = adapter->ifp;
4652	struct e1000_hw *hw = &adapter->hw;
4653	u32		rctl, rxcsum, psize, srrctl = 0;
4654
4655	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4656
4657	/*
4658	 * Make sure receives are disabled while setting
4659	 * up the descriptor ring
4660	 */
4661	rctl = E1000_READ_REG(hw, E1000_RCTL);
4662	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4663
4664	/*
4665	** Set up for header split
4666	*/
4667	if (igb_header_split) {
4668		/* Use a standard mbuf for the header */
4669		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4670		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4671	} else
4672		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4673
4674	/*
4675	** Set up for jumbo frames
4676	*/
4677	if (ifp->if_mtu > ETHERMTU) {
4678		rctl |= E1000_RCTL_LPE;
4679		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4680			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4681			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4682		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4683			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4684			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4685		}
4686		/* Set maximum packet len */
4687		psize = adapter->max_frame_size;
4688		/* are we on a vlan? */
4689		if (adapter->ifp->if_vlantrunk != NULL)
4690			psize += VLAN_TAG_SIZE;
4691		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4692	} else {
4693		rctl &= ~E1000_RCTL_LPE;
4694		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4695		rctl |= E1000_RCTL_SZ_2048;
4696	}
4697
4698	/*
4699	 * If TX flow control is disabled and there's >1 queue defined,
4700	 * enable DROP.
4701	 *
4702	 * This drops frames rather than hanging the RX MAC for all queues.
4703	 */
4704	if ((adapter->num_queues > 1) &&
4705	    (adapter->fc == e1000_fc_none ||
4706	     adapter->fc == e1000_fc_rx_pause)) {
4707		srrctl |= E1000_SRRCTL_DROP_EN;
4708	}
4709
4710	/* Setup the Base and Length of the Rx Descriptor Rings */
4711	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4712		u64 bus_addr = rxr->rxdma.dma_paddr;
4713		u32 rxdctl;
4714
4715		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4716		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4717		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4718		    (uint32_t)(bus_addr >> 32));
4719		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4720		    (uint32_t)bus_addr);
4721		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4722		/* Enable this Queue */
4723		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4724		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4725		rxdctl &= 0xFFF00000;
4726		rxdctl |= IGB_RX_PTHRESH;
4727		rxdctl |= IGB_RX_HTHRESH << 8;
4728		rxdctl |= IGB_RX_WTHRESH << 16;
4729		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4730	}
4731
4732	/*
4733	** Setup for RX MultiQueue
4734	*/
4735	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4736	if (adapter->num_queues >1) {
4737
4738		/* rss setup */
4739		igb_initialise_rss_mapping(adapter);
4740
4741		/*
4742		** NOTE: Receive Full-Packet Checksum Offload
4743		** is mutually exclusive with Multiqueue. However
4744		** this is not the same as TCP/IP checksums which
4745		** still work.
4746		*/
4747		rxcsum |= E1000_RXCSUM_PCSD;
4748#if __FreeBSD_version >= 800000
4749		/* For SCTP Offload */
4750		if ((hw->mac.type != e1000_82575) &&
4751		    (ifp->if_capenable & IFCAP_RXCSUM))
4752			rxcsum |= E1000_RXCSUM_CRCOFL;
4753#endif
4754	} else {
4755		/* Non RSS setup */
4756		if (ifp->if_capenable & IFCAP_RXCSUM) {
4757			rxcsum |= E1000_RXCSUM_IPPCSE;
4758#if __FreeBSD_version >= 800000
4759			if (adapter->hw.mac.type != e1000_82575)
4760				rxcsum |= E1000_RXCSUM_CRCOFL;
4761#endif
4762		} else
4763			rxcsum &= ~E1000_RXCSUM_TUOFL;
4764	}
4765	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4766
4767	/* Setup the Receive Control Register */
4768	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4769	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4770		   E1000_RCTL_RDMTS_HALF |
4771		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4772	/* Strip CRC bytes. */
4773	rctl |= E1000_RCTL_SECRC;
4774	/* Make sure VLAN Filters are off */
4775	rctl &= ~E1000_RCTL_VFE;
4776	/* Don't store bad packets */
4777	rctl &= ~E1000_RCTL_SBP;
4778
4779	/* Enable Receives */
4780	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4781
4782	/*
4783	 * Setup the HW Rx Head and Tail Descriptor Pointers
4784	 *   - needs to be after enable
4785	 */
4786	for (int i = 0; i < adapter->num_queues; i++) {
4787		rxr = &adapter->rx_rings[i];
4788		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4789#ifdef DEV_NETMAP
4790		/*
4791		 * an init() while a netmap client is active must
4792		 * preserve the rx buffers passed to userspace.
4793		 * In this driver it means we adjust RDT to
4794		 * something different from next_to_refresh
4795		 * (which is not used in netmap mode).
4796		 */
4797		if (ifp->if_capenable & IFCAP_NETMAP) {
4798			struct netmap_adapter *na = NA(adapter->ifp);
4799			struct netmap_kring *kring = &na->rx_rings[i];
4800			int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4801
4802			if (t >= adapter->num_rx_desc)
4803				t -= adapter->num_rx_desc;
4804			else if (t < 0)
4805				t += adapter->num_rx_desc;
4806			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4807		} else
4808#endif /* DEV_NETMAP */
4809		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4810	}
4811	return;
4812}
4813
4814/*********************************************************************
4815 *
4816 *  Free receive rings.
4817 *
4818 **********************************************************************/
4819static void
4820igb_free_receive_structures(struct adapter *adapter)
4821{
4822	struct rx_ring *rxr = adapter->rx_rings;
4823
4824	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4825		struct lro_ctrl	*lro = &rxr->lro;
4826		igb_free_receive_buffers(rxr);
4827		tcp_lro_free(lro);
4828		igb_dma_free(adapter, &rxr->rxdma);
4829	}
4830
4831	free(adapter->rx_rings, M_DEVBUF);
4832}
4833
4834/*********************************************************************
4835 *
4836 *  Free receive ring data structures.
4837 *
4838 **********************************************************************/
4839static void
4840igb_free_receive_buffers(struct rx_ring *rxr)
4841{
4842	struct adapter		*adapter = rxr->adapter;
4843	struct igb_rx_buf	*rxbuf;
4844	int i;
4845
4846	INIT_DEBUGOUT("free_receive_structures: begin");
4847
4848	/* Cleanup any existing buffers */
4849	if (rxr->rx_buffers != NULL) {
4850		for (i = 0; i < adapter->num_rx_desc; i++) {
4851			rxbuf = &rxr->rx_buffers[i];
4852			if (rxbuf->m_head != NULL) {
4853				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4854				    BUS_DMASYNC_POSTREAD);
4855				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4856				rxbuf->m_head->m_flags |= M_PKTHDR;
4857				m_freem(rxbuf->m_head);
4858			}
4859			if (rxbuf->m_pack != NULL) {
4860				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4861				    BUS_DMASYNC_POSTREAD);
4862				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4863				rxbuf->m_pack->m_flags |= M_PKTHDR;
4864				m_freem(rxbuf->m_pack);
4865			}
4866			rxbuf->m_head = NULL;
4867			rxbuf->m_pack = NULL;
4868			if (rxbuf->hmap != NULL) {
4869				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4870				rxbuf->hmap = NULL;
4871			}
4872			if (rxbuf->pmap != NULL) {
4873				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4874				rxbuf->pmap = NULL;
4875			}
4876		}
4877		if (rxr->rx_buffers != NULL) {
4878			free(rxr->rx_buffers, M_DEVBUF);
4879			rxr->rx_buffers = NULL;
4880		}
4881	}
4882
4883	if (rxr->htag != NULL) {
4884		bus_dma_tag_destroy(rxr->htag);
4885		rxr->htag = NULL;
4886	}
4887	if (rxr->ptag != NULL) {
4888		bus_dma_tag_destroy(rxr->ptag);
4889		rxr->ptag = NULL;
4890	}
4891}
4892
4893static __inline void
4894igb_rx_discard(struct rx_ring *rxr, int i)
4895{
4896	struct igb_rx_buf	*rbuf;
4897
4898	rbuf = &rxr->rx_buffers[i];
4899
4900	/* Partially received? Free the chain */
4901	if (rxr->fmp != NULL) {
4902		rxr->fmp->m_flags |= M_PKTHDR;
4903		m_freem(rxr->fmp);
4904		rxr->fmp = NULL;
4905		rxr->lmp = NULL;
4906	}
4907
4908	/*
4909	** With advanced descriptors the writeback
4910	** clobbers the buffer addrs, so its easier
4911	** to just free the existing mbufs and take
4912	** the normal refresh path to get new buffers
4913	** and mapping.
4914	*/
4915	if (rbuf->m_head) {
4916		m_free(rbuf->m_head);
4917		rbuf->m_head = NULL;
4918		bus_dmamap_unload(rxr->htag, rbuf->hmap);
4919	}
4920
4921	if (rbuf->m_pack) {
4922		m_free(rbuf->m_pack);
4923		rbuf->m_pack = NULL;
4924		bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4925	}
4926
4927	return;
4928}
4929
4930static __inline void
4931igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4932{
4933
4934	/*
4935	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4936	 * should be computed by hardware. Also it should not have VLAN tag in
4937	 * ethernet header.
4938	 */
4939	if (rxr->lro_enabled &&
4940	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4941	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4942	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4943	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4944	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4945	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4946		/*
4947		 * Send to the stack if:
4948		 **  - LRO not enabled, or
4949		 **  - no LRO resources, or
4950		 **  - lro enqueue fails
4951		 */
4952		if (rxr->lro.lro_cnt != 0)
4953			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4954				return;
4955	}
4956	IGB_RX_UNLOCK(rxr);
4957	(*ifp->if_input)(ifp, m);
4958	IGB_RX_LOCK(rxr);
4959}
4960
4961/*********************************************************************
4962 *
4963 *  This routine executes in interrupt context. It replenishes
4964 *  the mbufs in the descriptor and sends data which has been
4965 *  dma'ed into host memory to upper layer.
4966 *
4967 *  We loop at most count times if count is > 0, or until done if
4968 *  count < 0.
4969 *
4970 *  Return TRUE if more to clean, FALSE otherwise
4971 *********************************************************************/
4972static bool
4973igb_rxeof(struct igb_queue *que, int count, int *done)
4974{
4975	struct adapter		*adapter = que->adapter;
4976	struct rx_ring		*rxr = que->rxr;
4977	struct ifnet		*ifp = adapter->ifp;
4978	struct lro_ctrl		*lro = &rxr->lro;
4979	int			i, processed = 0, rxdone = 0;
4980	u32			ptype, staterr = 0;
4981	union e1000_adv_rx_desc	*cur;
4982
4983	IGB_RX_LOCK(rxr);
4984	/* Sync the ring. */
4985	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4986	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4987
4988#ifdef DEV_NETMAP
4989	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4990		IGB_RX_UNLOCK(rxr);
4991		return (FALSE);
4992	}
4993#endif /* DEV_NETMAP */
4994
4995	/* Main clean loop */
4996	for (i = rxr->next_to_check; count != 0;) {
4997		struct mbuf		*sendmp, *mh, *mp;
4998		struct igb_rx_buf	*rxbuf;
4999		u16			hlen, plen, hdr, vtag, pkt_info;
5000		bool			eop = FALSE;
5001
5002		cur = &rxr->rx_base[i];
5003		staterr = le32toh(cur->wb.upper.status_error);
5004		if ((staterr & E1000_RXD_STAT_DD) == 0)
5005			break;
5006		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
5007			break;
5008		count--;
5009		sendmp = mh = mp = NULL;
5010		cur->wb.upper.status_error = 0;
5011		rxbuf = &rxr->rx_buffers[i];
5012		plen = le16toh(cur->wb.upper.length);
5013		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
5014		if (((adapter->hw.mac.type == e1000_i350) ||
5015		    (adapter->hw.mac.type == e1000_i354)) &&
5016		    (staterr & E1000_RXDEXT_STATERR_LB))
5017			vtag = be16toh(cur->wb.upper.vlan);
5018		else
5019			vtag = le16toh(cur->wb.upper.vlan);
5020		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
5021		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
5022		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
5023
5024		/*
5025		 * Free the frame (all segments) if we're at EOP and
5026		 * it's an error.
5027		 *
5028		 * The datasheet states that EOP + status is only valid for
5029		 * the final segment in a multi-segment frame.
5030		 */
5031		if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
5032			adapter->dropped_pkts++;
5033			++rxr->rx_discarded;
5034			igb_rx_discard(rxr, i);
5035			goto next_desc;
5036		}
5037
5038		/*
5039		** The way the hardware is configured to
5040		** split, it will ONLY use the header buffer
5041		** when header split is enabled, otherwise we
5042		** get normal behavior, ie, both header and
5043		** payload are DMA'd into the payload buffer.
5044		**
5045		** The fmp test is to catch the case where a
5046		** packet spans multiple descriptors, in that
5047		** case only the first header is valid.
5048		*/
5049		if (rxr->hdr_split && rxr->fmp == NULL) {
5050			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5051			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5052			    E1000_RXDADV_HDRBUFLEN_SHIFT;
5053			if (hlen > IGB_HDR_BUF)
5054				hlen = IGB_HDR_BUF;
5055			mh = rxr->rx_buffers[i].m_head;
5056			mh->m_len = hlen;
5057			/* clear buf pointer for refresh */
5058			rxbuf->m_head = NULL;
5059			/*
5060			** Get the payload length, this
5061			** could be zero if its a small
5062			** packet.
5063			*/
5064			if (plen > 0) {
5065				mp = rxr->rx_buffers[i].m_pack;
5066				mp->m_len = plen;
5067				mh->m_next = mp;
5068				/* clear buf pointer */
5069				rxbuf->m_pack = NULL;
5070				rxr->rx_split_packets++;
5071			}
5072		} else {
5073			/*
5074			** Either no header split, or a
5075			** secondary piece of a fragmented
5076			** split packet.
5077			*/
5078			mh = rxr->rx_buffers[i].m_pack;
5079			mh->m_len = plen;
5080			/* clear buf info for refresh */
5081			rxbuf->m_pack = NULL;
5082		}
5083		bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5084
5085		++processed; /* So we know when to refresh */
5086
5087		/* Initial frame - setup */
5088		if (rxr->fmp == NULL) {
5089			mh->m_pkthdr.len = mh->m_len;
5090			/* Save the head of the chain */
5091			rxr->fmp = mh;
5092			rxr->lmp = mh;
5093			if (mp != NULL) {
5094				/* Add payload if split */
5095				mh->m_pkthdr.len += mp->m_len;
5096				rxr->lmp = mh->m_next;
5097			}
5098		} else {
5099			/* Chain mbuf's together */
5100			rxr->lmp->m_next = mh;
5101			rxr->lmp = rxr->lmp->m_next;
5102			rxr->fmp->m_pkthdr.len += mh->m_len;
5103		}
5104
5105		if (eop) {
5106			rxr->fmp->m_pkthdr.rcvif = ifp;
5107			rxr->rx_packets++;
5108			/* capture data for AIM */
5109			rxr->packets++;
5110			rxr->bytes += rxr->fmp->m_pkthdr.len;
5111			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5112
5113			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5114				igb_rx_checksum(staterr, rxr->fmp, ptype);
5115
5116			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5117			    (staterr & E1000_RXD_STAT_VP) != 0) {
5118				rxr->fmp->m_pkthdr.ether_vtag = vtag;
5119				rxr->fmp->m_flags |= M_VLANTAG;
5120			}
5121
5122			/*
5123			 * In case of multiqueue, we have RXCSUM.PCSD bit set
5124			 * and never cleared. This means we have RSS hash
5125			 * available to be used.
5126			 */
5127			if (adapter->num_queues > 1) {
5128				rxr->fmp->m_pkthdr.flowid =
5129				    le32toh(cur->wb.lower.hi_dword.rss);
5130				switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5131					case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5132						M_HASHTYPE_SET(rxr->fmp,
5133						    M_HASHTYPE_RSS_TCP_IPV4);
5134					break;
5135					case E1000_RXDADV_RSSTYPE_IPV4:
5136						M_HASHTYPE_SET(rxr->fmp,
5137						    M_HASHTYPE_RSS_IPV4);
5138					break;
5139					case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5140						M_HASHTYPE_SET(rxr->fmp,
5141						    M_HASHTYPE_RSS_TCP_IPV6);
5142					break;
5143					case E1000_RXDADV_RSSTYPE_IPV6_EX:
5144						M_HASHTYPE_SET(rxr->fmp,
5145						    M_HASHTYPE_RSS_IPV6_EX);
5146					break;
5147					case E1000_RXDADV_RSSTYPE_IPV6:
5148						M_HASHTYPE_SET(rxr->fmp,
5149						    M_HASHTYPE_RSS_IPV6);
5150					break;
5151					case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5152						M_HASHTYPE_SET(rxr->fmp,
5153						    M_HASHTYPE_RSS_TCP_IPV6_EX);
5154					break;
5155					default:
5156						/* XXX fallthrough */
5157						M_HASHTYPE_SET(rxr->fmp,
5158						    M_HASHTYPE_OPAQUE_HASH);
5159				}
5160			} else {
5161#ifndef IGB_LEGACY_TX
5162				rxr->fmp->m_pkthdr.flowid = que->msix;
5163				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5164#endif
5165			}
5166			sendmp = rxr->fmp;
5167			/* Make sure to set M_PKTHDR. */
5168			sendmp->m_flags |= M_PKTHDR;
5169			rxr->fmp = NULL;
5170			rxr->lmp = NULL;
5171		}
5172
5173next_desc:
5174		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5175		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5176
5177		/* Advance our pointers to the next descriptor. */
5178		if (++i == adapter->num_rx_desc)
5179			i = 0;
5180		/*
5181		** Send to the stack or LRO
5182		*/
5183		if (sendmp != NULL) {
5184			rxr->next_to_check = i;
5185			igb_rx_input(rxr, ifp, sendmp, ptype);
5186			i = rxr->next_to_check;
5187			rxdone++;
5188		}
5189
5190		/* Every 8 descriptors we go to refresh mbufs */
5191		if (processed == 8) {
5192                        igb_refresh_mbufs(rxr, i);
5193                        processed = 0;
5194		}
5195	}
5196
5197	/* Catch any remainders */
5198	if (igb_rx_unrefreshed(rxr))
5199		igb_refresh_mbufs(rxr, i);
5200
5201	rxr->next_to_check = i;
5202
5203	/*
5204	 * Flush any outstanding LRO work
5205	 */
5206	tcp_lro_flush_all(lro);
5207
5208	if (done != NULL)
5209		*done += rxdone;
5210
5211	IGB_RX_UNLOCK(rxr);
5212	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5213}
5214
5215/*********************************************************************
5216 *
5217 *  Verify that the hardware indicated that the checksum is valid.
5218 *  Inform the stack about the status of checksum so that stack
5219 *  doesn't spend time verifying the checksum.
5220 *
5221 *********************************************************************/
5222static void
5223igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5224{
5225	u16 status = (u16)staterr;
5226	u8  errors = (u8) (staterr >> 24);
5227	int sctp;
5228
5229	/* Ignore Checksum bit is set */
5230	if (status & E1000_RXD_STAT_IXSM) {
5231		mp->m_pkthdr.csum_flags = 0;
5232		return;
5233	}
5234
5235	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5236	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5237		sctp = 1;
5238	else
5239		sctp = 0;
5240	if (status & E1000_RXD_STAT_IPCS) {
5241		/* Did it pass? */
5242		if (!(errors & E1000_RXD_ERR_IPE)) {
5243			/* IP Checksum Good */
5244			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5245			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5246		} else
5247			mp->m_pkthdr.csum_flags = 0;
5248	}
5249
5250	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5251		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5252#if __FreeBSD_version >= 800000
5253		if (sctp) /* reassign */
5254			type = CSUM_SCTP_VALID;
5255#endif
5256		/* Did it pass? */
5257		if (!(errors & E1000_RXD_ERR_TCPE)) {
5258			mp->m_pkthdr.csum_flags |= type;
5259			if (sctp == 0)
5260				mp->m_pkthdr.csum_data = htons(0xffff);
5261		}
5262	}
5263	return;
5264}
5265
5266/*
5267 * This routine is run via an vlan
5268 * config EVENT
5269 */
5270static void
5271igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5272{
5273	struct adapter	*adapter = ifp->if_softc;
5274	u32		index, bit;
5275
5276	if (ifp->if_softc !=  arg)   /* Not our event */
5277		return;
5278
5279	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5280                return;
5281
5282	IGB_CORE_LOCK(adapter);
5283	index = (vtag >> 5) & 0x7F;
5284	bit = vtag & 0x1F;
5285	adapter->shadow_vfta[index] |= (1 << bit);
5286	++adapter->num_vlans;
5287	/* Change hw filter setting */
5288	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5289		igb_setup_vlan_hw_support(adapter);
5290	IGB_CORE_UNLOCK(adapter);
5291}
5292
5293/*
5294 * This routine is run via an vlan
5295 * unconfig EVENT
5296 */
5297static void
5298igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5299{
5300	struct adapter	*adapter = ifp->if_softc;
5301	u32		index, bit;
5302
5303	if (ifp->if_softc !=  arg)
5304		return;
5305
5306	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5307                return;
5308
5309	IGB_CORE_LOCK(adapter);
5310	index = (vtag >> 5) & 0x7F;
5311	bit = vtag & 0x1F;
5312	adapter->shadow_vfta[index] &= ~(1 << bit);
5313	--adapter->num_vlans;
5314	/* Change hw filter setting */
5315	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5316		igb_setup_vlan_hw_support(adapter);
5317	IGB_CORE_UNLOCK(adapter);
5318}
5319
5320static void
5321igb_setup_vlan_hw_support(struct adapter *adapter)
5322{
5323	struct e1000_hw *hw = &adapter->hw;
5324	struct ifnet	*ifp = adapter->ifp;
5325	u32             reg;
5326
5327	if (adapter->vf_ifp) {
5328		e1000_rlpml_set_vf(hw,
5329		    adapter->max_frame_size + VLAN_TAG_SIZE);
5330		return;
5331	}
5332
5333	reg = E1000_READ_REG(hw, E1000_CTRL);
5334	reg |= E1000_CTRL_VME;
5335	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5336
5337	/* Enable the Filter Table */
5338	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5339		reg = E1000_READ_REG(hw, E1000_RCTL);
5340		reg &= ~E1000_RCTL_CFIEN;
5341		reg |= E1000_RCTL_VFE;
5342		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5343	}
5344
5345	/* Update the frame size */
5346	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5347	    adapter->max_frame_size + VLAN_TAG_SIZE);
5348
5349	/* Don't bother with table if no vlans */
5350	if ((adapter->num_vlans == 0) ||
5351	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5352                return;
5353	/*
5354	** A soft reset zero's out the VFTA, so
5355	** we need to repopulate it now.
5356	*/
5357	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5358                if (adapter->shadow_vfta[i] != 0) {
5359			if (adapter->vf_ifp)
5360				e1000_vfta_set_vf(hw,
5361				    adapter->shadow_vfta[i], TRUE);
5362			else
5363				e1000_write_vfta(hw,
5364				    i, adapter->shadow_vfta[i]);
5365		}
5366}
5367
5368static void
5369igb_enable_intr(struct adapter *adapter)
5370{
5371	/* With RSS set up what to auto clear */
5372	if (adapter->msix_mem) {
5373		u32 mask = (adapter->que_mask | adapter->link_mask);
5374		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5375		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5376		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5377		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5378		    E1000_IMS_LSC);
5379	} else {
5380		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5381		    IMS_ENABLE_MASK);
5382	}
5383	E1000_WRITE_FLUSH(&adapter->hw);
5384
5385	return;
5386}
5387
5388static void
5389igb_disable_intr(struct adapter *adapter)
5390{
5391	if (adapter->msix_mem) {
5392		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5393		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5394	}
5395	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5396	E1000_WRITE_FLUSH(&adapter->hw);
5397	return;
5398}
5399
5400/*
5401 * Bit of a misnomer, what this really means is
5402 * to enable OS management of the system... aka
5403 * to disable special hardware management features
5404 */
5405static void
5406igb_init_manageability(struct adapter *adapter)
5407{
5408	if (adapter->has_manage) {
5409		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5410		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5411
5412		/* disable hardware interception of ARP */
5413		manc &= ~(E1000_MANC_ARP_EN);
5414
5415                /* enable receiving management packets to the host */
5416		manc |= E1000_MANC_EN_MNG2HOST;
5417		manc2h |= 1 << 5;  /* Mng Port 623 */
5418		manc2h |= 1 << 6;  /* Mng Port 664 */
5419		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5420		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5421	}
5422}
5423
5424/*
5425 * Give control back to hardware management
5426 * controller if there is one.
5427 */
5428static void
5429igb_release_manageability(struct adapter *adapter)
5430{
5431	if (adapter->has_manage) {
5432		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5433
5434		/* re-enable hardware interception of ARP */
5435		manc |= E1000_MANC_ARP_EN;
5436		manc &= ~E1000_MANC_EN_MNG2HOST;
5437
5438		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5439	}
5440}
5441
5442/*
5443 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5444 * For ASF and Pass Through versions of f/w this means that
5445 * the driver is loaded.
5446 *
5447 */
5448static void
5449igb_get_hw_control(struct adapter *adapter)
5450{
5451	u32 ctrl_ext;
5452
5453	if (adapter->vf_ifp)
5454		return;
5455
5456	/* Let firmware know the driver has taken over */
5457	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5458	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5459	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5460}
5461
5462/*
5463 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5464 * For ASF and Pass Through versions of f/w this means that the
5465 * driver is no longer loaded.
5466 *
5467 */
5468static void
5469igb_release_hw_control(struct adapter *adapter)
5470{
5471	u32 ctrl_ext;
5472
5473	if (adapter->vf_ifp)
5474		return;
5475
5476	/* Let firmware taken over control of h/w */
5477	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5478	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5479	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5480}
5481
5482static int
5483igb_is_valid_ether_addr(uint8_t *addr)
5484{
5485	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5486
5487	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5488		return (FALSE);
5489	}
5490
5491	return (TRUE);
5492}
5493
5494
5495/*
5496 * Enable PCI Wake On Lan capability
5497 */
5498static void
5499igb_enable_wakeup(device_t dev)
5500{
5501	u16     cap, status;
5502	u8      id;
5503
5504	/* First find the capabilities pointer*/
5505	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5506	/* Read the PM Capabilities */
5507	id = pci_read_config(dev, cap, 1);
5508	if (id != PCIY_PMG)     /* Something wrong */
5509		return;
5510	/* OK, we have the power capabilities, so
5511	   now get the status register */
5512	cap += PCIR_POWER_STATUS;
5513	status = pci_read_config(dev, cap, 2);
5514	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5515	pci_write_config(dev, cap, status, 2);
5516	return;
5517}
5518
5519static void
5520igb_led_func(void *arg, int onoff)
5521{
5522	struct adapter	*adapter = arg;
5523
5524	IGB_CORE_LOCK(adapter);
5525	if (onoff) {
5526		e1000_setup_led(&adapter->hw);
5527		e1000_led_on(&adapter->hw);
5528	} else {
5529		e1000_led_off(&adapter->hw);
5530		e1000_cleanup_led(&adapter->hw);
5531	}
5532	IGB_CORE_UNLOCK(adapter);
5533}
5534
5535static uint64_t
5536igb_get_vf_counter(if_t ifp, ift_counter cnt)
5537{
5538	struct adapter *adapter;
5539	struct e1000_vf_stats *stats;
5540#ifndef IGB_LEGACY_TX
5541	struct tx_ring *txr;
5542	uint64_t rv;
5543#endif
5544
5545	adapter = if_getsoftc(ifp);
5546	stats = (struct e1000_vf_stats *)adapter->stats;
5547
5548	switch (cnt) {
5549	case IFCOUNTER_IPACKETS:
5550		return (stats->gprc);
5551	case IFCOUNTER_OPACKETS:
5552		return (stats->gptc);
5553	case IFCOUNTER_IBYTES:
5554		return (stats->gorc);
5555	case IFCOUNTER_OBYTES:
5556		return (stats->gotc);
5557	case IFCOUNTER_IMCASTS:
5558		return (stats->mprc);
5559	case IFCOUNTER_IERRORS:
5560		return (adapter->dropped_pkts);
5561	case IFCOUNTER_OERRORS:
5562		return (adapter->watchdog_events);
5563#ifndef IGB_LEGACY_TX
5564	case IFCOUNTER_OQDROPS:
5565		rv = 0;
5566		txr = adapter->tx_rings;
5567		for (int i = 0; i < adapter->num_queues; i++, txr++)
5568			rv += txr->br->br_drops;
5569		return (rv);
5570#endif
5571	default:
5572		return (if_get_counter_default(ifp, cnt));
5573	}
5574}
5575
5576static uint64_t
5577igb_get_counter(if_t ifp, ift_counter cnt)
5578{
5579	struct adapter *adapter;
5580	struct e1000_hw_stats *stats;
5581#ifndef IGB_LEGACY_TX
5582	struct tx_ring *txr;
5583	uint64_t rv;
5584#endif
5585
5586	adapter = if_getsoftc(ifp);
5587	if (adapter->vf_ifp)
5588		return (igb_get_vf_counter(ifp, cnt));
5589
5590	stats = (struct e1000_hw_stats *)adapter->stats;
5591
5592	switch (cnt) {
5593	case IFCOUNTER_IPACKETS:
5594		return (stats->gprc);
5595	case IFCOUNTER_OPACKETS:
5596		return (stats->gptc);
5597	case IFCOUNTER_IBYTES:
5598		return (stats->gorc);
5599	case IFCOUNTER_OBYTES:
5600		return (stats->gotc);
5601	case IFCOUNTER_IMCASTS:
5602		return (stats->mprc);
5603	case IFCOUNTER_OMCASTS:
5604		return (stats->mptc);
5605	case IFCOUNTER_IERRORS:
5606		return (adapter->dropped_pkts + stats->rxerrc +
5607		    stats->crcerrs + stats->algnerrc +
5608		    stats->ruc + stats->roc + stats->cexterr);
5609	case IFCOUNTER_OERRORS:
5610		return (stats->ecol + stats->latecol +
5611		    adapter->watchdog_events);
5612	case IFCOUNTER_COLLISIONS:
5613		return (stats->colc);
5614	case IFCOUNTER_IQDROPS:
5615		return (stats->mpc);
5616#ifndef IGB_LEGACY_TX
5617	case IFCOUNTER_OQDROPS:
5618		rv = 0;
5619		txr = adapter->tx_rings;
5620		for (int i = 0; i < adapter->num_queues; i++, txr++)
5621			rv += txr->br->br_drops;
5622		return (rv);
5623#endif
5624	default:
5625		return (if_get_counter_default(ifp, cnt));
5626	}
5627}
5628
5629/**********************************************************************
5630 *
5631 *  Update the board statistics counters.
5632 *
5633 **********************************************************************/
5634static void
5635igb_update_stats_counters(struct adapter *adapter)
5636{
5637        struct e1000_hw		*hw = &adapter->hw;
5638	struct e1000_hw_stats	*stats;
5639
5640	/*
5641	** The virtual function adapter has only a
5642	** small controlled set of stats, do only
5643	** those and return.
5644	*/
5645	if (adapter->vf_ifp) {
5646		igb_update_vf_stats_counters(adapter);
5647		return;
5648	}
5649
5650	stats = (struct e1000_hw_stats	*)adapter->stats;
5651
5652	if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5653	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5654		stats->symerrs +=
5655		    E1000_READ_REG(hw,E1000_SYMERRS);
5656		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5657	}
5658
5659	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5660	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5661	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5662	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5663
5664	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5665	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5666	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5667	stats->dc += E1000_READ_REG(hw, E1000_DC);
5668	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5669	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5670	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5671	/*
5672	** For watchdog management we need to know if we have been
5673	** paused during the last interval, so capture that here.
5674	*/
5675        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5676        stats->xoffrxc += adapter->pause_frames;
5677	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5678	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5679	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5680	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5681	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5682	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5683	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5684	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5685	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5686	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5687	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5688	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5689
5690	/* For the 64-bit byte counters the low dword must be read first. */
5691	/* Both registers clear on the read of the high dword */
5692
5693	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5694	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5695	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5696	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5697
5698	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5699	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5700	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5701	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5702	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5703
5704	stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5705	stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5706	stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5707
5708	stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5709	    ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5710	stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5711	    ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5712
5713	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5714	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5715	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5716	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5717	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5718	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5719	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5720	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5721	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5722	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5723
5724	/* Interrupt Counts */
5725
5726	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5727	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5728	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5729	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5730	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5731	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5732	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5733	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5734	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5735
5736	/* Host to Card Statistics */
5737
5738	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5739	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5740	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5741	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5742	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5743	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5744	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5745	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5746	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5747	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5748	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5749	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5750	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5751	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5752
5753	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5754	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5755	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5756	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5757	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5758	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5759
5760	/* Driver specific counters */
5761	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5762	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5763	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5764	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5765	adapter->packet_buf_alloc_tx =
5766	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5767	adapter->packet_buf_alloc_rx =
5768	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5769}
5770
5771
5772/**********************************************************************
5773 *
5774 *  Initialize the VF board statistics counters.
5775 *
5776 **********************************************************************/
5777static void
5778igb_vf_init_stats(struct adapter *adapter)
5779{
5780        struct e1000_hw *hw = &adapter->hw;
5781	struct e1000_vf_stats	*stats;
5782
5783	stats = (struct e1000_vf_stats	*)adapter->stats;
5784	if (stats == NULL)
5785		return;
5786        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5787        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5788        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5789        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5790        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5791}
5792
5793/**********************************************************************
5794 *
5795 *  Update the VF board statistics counters.
5796 *
5797 **********************************************************************/
5798static void
5799igb_update_vf_stats_counters(struct adapter *adapter)
5800{
5801	struct e1000_hw *hw = &adapter->hw;
5802	struct e1000_vf_stats	*stats;
5803
5804	if (adapter->link_speed == 0)
5805		return;
5806
5807	stats = (struct e1000_vf_stats	*)adapter->stats;
5808
5809	UPDATE_VF_REG(E1000_VFGPRC,
5810	    stats->last_gprc, stats->gprc);
5811	UPDATE_VF_REG(E1000_VFGORC,
5812	    stats->last_gorc, stats->gorc);
5813	UPDATE_VF_REG(E1000_VFGPTC,
5814	    stats->last_gptc, stats->gptc);
5815	UPDATE_VF_REG(E1000_VFGOTC,
5816	    stats->last_gotc, stats->gotc);
5817	UPDATE_VF_REG(E1000_VFMPRC,
5818	    stats->last_mprc, stats->mprc);
5819}
5820
5821/* Export a single 32-bit register via a read-only sysctl. */
5822static int
5823igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5824{
5825	struct adapter *adapter;
5826	u_int val;
5827
5828	adapter = oidp->oid_arg1;
5829	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5830	return (sysctl_handle_int(oidp, &val, 0, req));
5831}
5832
5833/*
5834**  Tuneable interrupt rate handler
5835*/
5836static int
5837igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5838{
5839	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5840	int			error;
5841	u32			reg, usec, rate;
5842
5843	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5844	usec = ((reg & 0x7FFC) >> 2);
5845	if (usec > 0)
5846		rate = 1000000 / usec;
5847	else
5848		rate = 0;
5849	error = sysctl_handle_int(oidp, &rate, 0, req);
5850	if (error || !req->newptr)
5851		return error;
5852	return 0;
5853}
5854
5855/*
5856 * Add sysctl variables, one per statistic, to the system.
5857 */
5858static void
5859igb_add_hw_stats(struct adapter *adapter)
5860{
5861	device_t dev = adapter->dev;
5862
5863	struct tx_ring *txr = adapter->tx_rings;
5864	struct rx_ring *rxr = adapter->rx_rings;
5865
5866	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5867	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5868	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5869	struct e1000_hw_stats *stats = adapter->stats;
5870
5871	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5872	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5873
5874#define QUEUE_NAME_LEN 32
5875	char namebuf[QUEUE_NAME_LEN];
5876
5877	/* Driver Statistics */
5878	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5879			CTLFLAG_RD, &adapter->dropped_pkts,
5880			"Driver dropped packets");
5881	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5882			CTLFLAG_RD, &adapter->link_irq,
5883			"Link MSIX IRQ Handled");
5884	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5885			CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5886			"Defragmenting mbuf chain failed");
5887	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5888			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5889			"Driver tx dma failure in xmit");
5890	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5891			CTLFLAG_RD, &adapter->rx_overruns,
5892			"RX overruns");
5893	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5894			CTLFLAG_RD, &adapter->watchdog_events,
5895			"Watchdog timeouts");
5896
5897	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5898			CTLFLAG_RD, &adapter->device_control,
5899			"Device Control Register");
5900	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5901			CTLFLAG_RD, &adapter->rx_control,
5902			"Receiver Control Register");
5903	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5904			CTLFLAG_RD, &adapter->int_mask,
5905			"Interrupt Mask");
5906	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5907			CTLFLAG_RD, &adapter->eint_mask,
5908			"Extended Interrupt Mask");
5909	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5910			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5911			"Transmit Buffer Packet Allocation");
5912	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5913			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5914			"Receive Buffer Packet Allocation");
5915	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5916			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5917			"Flow Control High Watermark");
5918	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5919			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5920			"Flow Control Low Watermark");
5921
5922	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5923		struct lro_ctrl *lro = &rxr->lro;
5924
5925		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5926		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5927					    CTLFLAG_RD, NULL, "Queue Name");
5928		queue_list = SYSCTL_CHILDREN(queue_node);
5929
5930		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5931				CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5932				sizeof(&adapter->queues[i]),
5933				igb_sysctl_interrupt_rate_handler,
5934				"IU", "Interrupt Rate");
5935
5936		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5937				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5938				igb_sysctl_reg_handler, "IU",
5939 				"Transmit Descriptor Head");
5940		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5941				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5942				igb_sysctl_reg_handler, "IU",
5943 				"Transmit Descriptor Tail");
5944		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5945				CTLFLAG_RD, &txr->no_desc_avail,
5946				"Queue Descriptors Unavailable");
5947		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5948				CTLFLAG_RD, &txr->total_packets,
5949				"Queue Packets Transmitted");
5950
5951		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5952				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5953				igb_sysctl_reg_handler, "IU",
5954				"Receive Descriptor Head");
5955		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5956				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5957				igb_sysctl_reg_handler, "IU",
5958				"Receive Descriptor Tail");
5959		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5960				CTLFLAG_RD, &rxr->rx_packets,
5961				"Queue Packets Received");
5962		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5963				CTLFLAG_RD, &rxr->rx_bytes,
5964				"Queue Bytes Received");
5965		SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
5966				CTLFLAG_RD, &lro->lro_queued, 0,
5967				"LRO Queued");
5968		SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
5969				CTLFLAG_RD, &lro->lro_flushed, 0,
5970				"LRO Flushed");
5971	}
5972
5973	/* MAC stats get their own sub node */
5974
5975	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5976				    CTLFLAG_RD, NULL, "MAC Statistics");
5977	stat_list = SYSCTL_CHILDREN(stat_node);
5978
5979	/*
5980	** VF adapter has a very limited set of stats
5981	** since its not managing the metal, so to speak.
5982	*/
5983	if (adapter->vf_ifp) {
5984	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5985			CTLFLAG_RD, &stats->gprc,
5986			"Good Packets Received");
5987	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5988			CTLFLAG_RD, &stats->gptc,
5989			"Good Packets Transmitted");
5990 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5991 			CTLFLAG_RD, &stats->gorc,
5992 			"Good Octets Received");
5993 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5994 			CTLFLAG_RD, &stats->gotc,
5995 			"Good Octets Transmitted");
5996	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5997			CTLFLAG_RD, &stats->mprc,
5998			"Multicast Packets Received");
5999		return;
6000	}
6001
6002	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
6003			CTLFLAG_RD, &stats->ecol,
6004			"Excessive collisions");
6005	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
6006			CTLFLAG_RD, &stats->scc,
6007			"Single collisions");
6008	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
6009			CTLFLAG_RD, &stats->mcc,
6010			"Multiple collisions");
6011	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
6012			CTLFLAG_RD, &stats->latecol,
6013			"Late collisions");
6014	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
6015			CTLFLAG_RD, &stats->colc,
6016			"Collision Count");
6017	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
6018			CTLFLAG_RD, &stats->symerrs,
6019			"Symbol Errors");
6020	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
6021			CTLFLAG_RD, &stats->sec,
6022			"Sequence Errors");
6023	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
6024			CTLFLAG_RD, &stats->dc,
6025			"Defer Count");
6026	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
6027			CTLFLAG_RD, &stats->mpc,
6028			"Missed Packets");
6029	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
6030			CTLFLAG_RD, &stats->rlec,
6031			"Receive Length Errors");
6032	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
6033			CTLFLAG_RD, &stats->rnbc,
6034			"Receive No Buffers");
6035	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
6036			CTLFLAG_RD, &stats->ruc,
6037			"Receive Undersize");
6038	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
6039			CTLFLAG_RD, &stats->rfc,
6040			"Fragmented Packets Received");
6041	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
6042			CTLFLAG_RD, &stats->roc,
6043			"Oversized Packets Received");
6044	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
6045			CTLFLAG_RD, &stats->rjc,
6046			"Recevied Jabber");
6047	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6048			CTLFLAG_RD, &stats->rxerrc,
6049			"Receive Errors");
6050	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6051			CTLFLAG_RD, &stats->crcerrs,
6052			"CRC errors");
6053	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6054			CTLFLAG_RD, &stats->algnerrc,
6055			"Alignment Errors");
6056	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6057			CTLFLAG_RD, &stats->tncrs,
6058			"Transmit with No CRS");
6059	/* On 82575 these are collision counts */
6060	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6061			CTLFLAG_RD, &stats->cexterr,
6062			"Collision/Carrier extension errors");
6063	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6064			CTLFLAG_RD, &stats->xonrxc,
6065			"XON Received");
6066	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6067			CTLFLAG_RD, &stats->xontxc,
6068			"XON Transmitted");
6069	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6070			CTLFLAG_RD, &stats->xoffrxc,
6071			"XOFF Received");
6072	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6073			CTLFLAG_RD, &stats->xofftxc,
6074			"XOFF Transmitted");
6075	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6076			CTLFLAG_RD, &stats->fcruc,
6077			"Unsupported Flow Control Received");
6078	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6079			CTLFLAG_RD, &stats->mgprc,
6080			"Management Packets Received");
6081	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6082			CTLFLAG_RD, &stats->mgpdc,
6083			"Management Packets Dropped");
6084	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6085			CTLFLAG_RD, &stats->mgptc,
6086			"Management Packets Transmitted");
6087	/* Packet Reception Stats */
6088	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6089			CTLFLAG_RD, &stats->tpr,
6090			"Total Packets Received");
6091	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6092			CTLFLAG_RD, &stats->gprc,
6093			"Good Packets Received");
6094	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6095			CTLFLAG_RD, &stats->bprc,
6096			"Broadcast Packets Received");
6097	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6098			CTLFLAG_RD, &stats->mprc,
6099			"Multicast Packets Received");
6100	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6101			CTLFLAG_RD, &stats->prc64,
6102			"64 byte frames received");
6103	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6104			CTLFLAG_RD, &stats->prc127,
6105			"65-127 byte frames received");
6106	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6107			CTLFLAG_RD, &stats->prc255,
6108			"128-255 byte frames received");
6109	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6110			CTLFLAG_RD, &stats->prc511,
6111			"256-511 byte frames received");
6112	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6113			CTLFLAG_RD, &stats->prc1023,
6114			"512-1023 byte frames received");
6115	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6116			CTLFLAG_RD, &stats->prc1522,
6117			"1023-1522 byte frames received");
6118 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6119 			CTLFLAG_RD, &stats->gorc,
6120			"Good Octets Received");
6121	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
6122			CTLFLAG_RD, &stats->tor,
6123			"Total Octets Received");
6124
6125	/* Packet Transmission Stats */
6126 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6127 			CTLFLAG_RD, &stats->gotc,
6128 			"Good Octets Transmitted");
6129	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
6130			CTLFLAG_RD, &stats->tot,
6131			"Total Octets Transmitted");
6132	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6133			CTLFLAG_RD, &stats->tpt,
6134			"Total Packets Transmitted");
6135	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6136			CTLFLAG_RD, &stats->gptc,
6137			"Good Packets Transmitted");
6138	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6139			CTLFLAG_RD, &stats->bptc,
6140			"Broadcast Packets Transmitted");
6141	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6142			CTLFLAG_RD, &stats->mptc,
6143			"Multicast Packets Transmitted");
6144	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6145			CTLFLAG_RD, &stats->ptc64,
6146			"64 byte frames transmitted");
6147	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6148			CTLFLAG_RD, &stats->ptc127,
6149			"65-127 byte frames transmitted");
6150	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6151			CTLFLAG_RD, &stats->ptc255,
6152			"128-255 byte frames transmitted");
6153	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6154			CTLFLAG_RD, &stats->ptc511,
6155			"256-511 byte frames transmitted");
6156	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6157			CTLFLAG_RD, &stats->ptc1023,
6158			"512-1023 byte frames transmitted");
6159	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6160			CTLFLAG_RD, &stats->ptc1522,
6161			"1024-1522 byte frames transmitted");
6162	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6163			CTLFLAG_RD, &stats->tsctc,
6164			"TSO Contexts Transmitted");
6165	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6166			CTLFLAG_RD, &stats->tsctfc,
6167			"TSO Contexts Failed");
6168
6169
6170	/* Interrupt Stats */
6171
6172	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
6173				    CTLFLAG_RD, NULL, "Interrupt Statistics");
6174	int_list = SYSCTL_CHILDREN(int_node);
6175
6176	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6177			CTLFLAG_RD, &stats->iac,
6178			"Interrupt Assertion Count");
6179
6180	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6181			CTLFLAG_RD, &stats->icrxptc,
6182			"Interrupt Cause Rx Pkt Timer Expire Count");
6183
6184	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6185			CTLFLAG_RD, &stats->icrxatc,
6186			"Interrupt Cause Rx Abs Timer Expire Count");
6187
6188	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6189			CTLFLAG_RD, &stats->ictxptc,
6190			"Interrupt Cause Tx Pkt Timer Expire Count");
6191
6192	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6193			CTLFLAG_RD, &stats->ictxatc,
6194			"Interrupt Cause Tx Abs Timer Expire Count");
6195
6196	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6197			CTLFLAG_RD, &stats->ictxqec,
6198			"Interrupt Cause Tx Queue Empty Count");
6199
6200	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6201			CTLFLAG_RD, &stats->ictxqmtc,
6202			"Interrupt Cause Tx Queue Min Thresh Count");
6203
6204	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6205			CTLFLAG_RD, &stats->icrxdmtc,
6206			"Interrupt Cause Rx Desc Min Thresh Count");
6207
6208	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6209			CTLFLAG_RD, &stats->icrxoc,
6210			"Interrupt Cause Receiver Overrun Count");
6211
6212	/* Host to Card Stats */
6213
6214	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
6215				    CTLFLAG_RD, NULL,
6216				    "Host to Card Statistics");
6217
6218	host_list = SYSCTL_CHILDREN(host_node);
6219
6220	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6221			CTLFLAG_RD, &stats->cbtmpc,
6222			"Circuit Breaker Tx Packet Count");
6223
6224	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6225			CTLFLAG_RD, &stats->htdpmc,
6226			"Host Transmit Discarded Packets");
6227
6228	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6229			CTLFLAG_RD, &stats->rpthc,
6230			"Rx Packets To Host");
6231
6232	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6233			CTLFLAG_RD, &stats->cbrmpc,
6234			"Circuit Breaker Rx Packet Count");
6235
6236	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6237			CTLFLAG_RD, &stats->cbrdpc,
6238			"Circuit Breaker Rx Dropped Count");
6239
6240	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6241			CTLFLAG_RD, &stats->hgptc,
6242			"Host Good Packets Tx Count");
6243
6244	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6245			CTLFLAG_RD, &stats->htcbdpc,
6246			"Host Tx Circuit Breaker Dropped Count");
6247
6248	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6249			CTLFLAG_RD, &stats->hgorc,
6250			"Host Good Octets Received Count");
6251
6252	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6253			CTLFLAG_RD, &stats->hgotc,
6254			"Host Good Octets Transmit Count");
6255
6256	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6257			CTLFLAG_RD, &stats->lenerrs,
6258			"Length Errors");
6259
6260	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6261			CTLFLAG_RD, &stats->scvpc,
6262			"SerDes/SGMII Code Violation Pkt Count");
6263
6264	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6265			CTLFLAG_RD, &stats->hrmpc,
6266			"Header Redirection Missed Packet Count");
6267}
6268
6269
6270/**********************************************************************
6271 *
6272 *  This routine provides a way to dump out the adapter eeprom,
6273 *  often a useful debug/service tool. This only dumps the first
6274 *  32 words, stuff that matters is in that extent.
6275 *
6276 **********************************************************************/
6277static int
6278igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6279{
6280	struct adapter *adapter;
6281	int error;
6282	int result;
6283
6284	result = -1;
6285	error = sysctl_handle_int(oidp, &result, 0, req);
6286
6287	if (error || !req->newptr)
6288		return (error);
6289
6290	/*
6291	 * This value will cause a hex dump of the
6292	 * first 32 16-bit words of the EEPROM to
6293	 * the screen.
6294	 */
6295	if (result == 1) {
6296		adapter = (struct adapter *)arg1;
6297		igb_print_nvm_info(adapter);
6298        }
6299
6300	return (error);
6301}
6302
6303static void
6304igb_print_nvm_info(struct adapter *adapter)
6305{
6306	u16	eeprom_data;
6307	int	i, j, row = 0;
6308
6309	/* Its a bit crude, but it gets the job done */
6310	printf("\nInterface EEPROM Dump:\n");
6311	printf("Offset\n0x0000  ");
6312	for (i = 0, j = 0; i < 32; i++, j++) {
6313		if (j == 8) { /* Make the offset block */
6314			j = 0; ++row;
6315			printf("\n0x00%x0  ",row);
6316		}
6317		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6318		printf("%04x ", eeprom_data);
6319	}
6320	printf("\n");
6321}
6322
6323static void
6324igb_set_sysctl_value(struct adapter *adapter, const char *name,
6325	const char *description, int *limit, int value)
6326{
6327	*limit = value;
6328	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6329	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6330	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6331}
6332
6333/*
6334** Set flow control using sysctl:
6335** Flow control values:
6336** 	0 - off
6337**	1 - rx pause
6338**	2 - tx pause
6339**	3 - full
6340*/
6341static int
6342igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6343{
6344	int		error;
6345	static int	input = 3; /* default is full */
6346	struct adapter	*adapter = (struct adapter *) arg1;
6347
6348	error = sysctl_handle_int(oidp, &input, 0, req);
6349
6350	if ((error) || (req->newptr == NULL))
6351		return (error);
6352
6353	switch (input) {
6354		case e1000_fc_rx_pause:
6355		case e1000_fc_tx_pause:
6356		case e1000_fc_full:
6357		case e1000_fc_none:
6358			adapter->hw.fc.requested_mode = input;
6359			adapter->fc = input;
6360			break;
6361		default:
6362			/* Do nothing */
6363			return (error);
6364	}
6365
6366	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6367	e1000_force_mac_fc(&adapter->hw);
6368	/* XXX TODO: update DROP_EN on each RX queue if appropriate */
6369	return (error);
6370}
6371
6372/*
6373** Manage DMA Coalesce:
6374** Control values:
6375** 	0/1 - off/on
6376**	Legal timer values are:
6377**	250,500,1000-10000 in thousands
6378*/
6379static int
6380igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6381{
6382	struct adapter *adapter = (struct adapter *) arg1;
6383	int		error;
6384
6385	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6386
6387	if ((error) || (req->newptr == NULL))
6388		return (error);
6389
6390	switch (adapter->dmac) {
6391		case 0:
6392			/* Disabling */
6393			break;
6394		case 1: /* Just enable and use default */
6395			adapter->dmac = 1000;
6396			break;
6397		case 250:
6398		case 500:
6399		case 1000:
6400		case 2000:
6401		case 3000:
6402		case 4000:
6403		case 5000:
6404		case 6000:
6405		case 7000:
6406		case 8000:
6407		case 9000:
6408		case 10000:
6409			/* Legal values - allow */
6410			break;
6411		default:
6412			/* Do nothing, illegal value */
6413			adapter->dmac = 0;
6414			return (EINVAL);
6415	}
6416	/* Reinit the interface */
6417	igb_init(adapter);
6418	return (error);
6419}
6420
6421/*
6422** Manage Energy Efficient Ethernet:
6423** Control values:
6424**     0/1 - enabled/disabled
6425*/
6426static int
6427igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6428{
6429	struct adapter	*adapter = (struct adapter *) arg1;
6430	int		error, value;
6431
6432	value = adapter->hw.dev_spec._82575.eee_disable;
6433	error = sysctl_handle_int(oidp, &value, 0, req);
6434	if (error || req->newptr == NULL)
6435		return (error);
6436	IGB_CORE_LOCK(adapter);
6437	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6438	igb_init_locked(adapter);
6439	IGB_CORE_UNLOCK(adapter);
6440	return (0);
6441}
6442