1/******************************************************************************
2
3  Copyright (c) 2001-2015, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/11/sys/dev/e1000/if_igb.c 342789 2019-01-05 19:32:48Z marius $*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38#include "opt_rss.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#include "opt_altq.h"
43#endif
44
45#include "if_igb.h"
46
47/*********************************************************************
48 *  Driver version:
49 *********************************************************************/
50char igb_driver_version[] = "2.5.3-k";
51
52
53/*********************************************************************
54 *  PCI Device ID Table
55 *
56 *  Used by probe to select devices to load on
57 *  Last field stores an index into e1000_strings
58 *  Last entry must be all 0s
59 *
60 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61 *********************************************************************/
62
63static igb_vendor_info_t igb_vendor_info_array[] =
64{
65	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER,	0, 0, 0},
72	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER,	0, 0, 0},
79	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII,	0, 0, 0},
81	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER,	0, 0, 0},
88	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER,	0, 0, 0},
89	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES,	0, 0, 0},
90	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII,	0, 0, 0},
91	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER,	0, 0, 0},
93	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER,	0, 0, 0},
98	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES,	0, 0, 0},
99	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII,	0, 0, 0},
100	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER,	0, 0, 0},
101	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103	{IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII,	0, 0, 0},
104	/* required last entry */
105	{0, 0, 0, 0, 0}
106};
107
108/*********************************************************************
109 *  Table of branding strings for all supported NICs.
110 *********************************************************************/
111
112static char *igb_strings[] = {
113	"Intel(R) PRO/1000 Network Connection"
114};
115
116/*********************************************************************
117 *  Function prototypes
118 *********************************************************************/
119static int	igb_probe(device_t);
120static int	igb_attach(device_t);
121static int	igb_detach(device_t);
122static int	igb_shutdown(device_t);
123static int	igb_suspend(device_t);
124static int	igb_resume(device_t);
125#ifndef IGB_LEGACY_TX
126static int	igb_mq_start(struct ifnet *, struct mbuf *);
127static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128static void	igb_qflush(struct ifnet *);
129static void	igb_deferred_mq_start(void *, int);
130#else
131static void	igb_start(struct ifnet *);
132static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133#endif
134static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
135static uint64_t	igb_get_counter(if_t, ift_counter);
136static void	igb_init(void *);
137static void	igb_init_locked(struct adapter *);
138static void	igb_stop(void *);
139static void	igb_media_status(struct ifnet *, struct ifmediareq *);
140static int	igb_media_change(struct ifnet *);
141static void	igb_identify_hardware(struct adapter *);
142static int	igb_allocate_pci_resources(struct adapter *);
143static int	igb_allocate_msix(struct adapter *);
144static int	igb_allocate_legacy(struct adapter *);
145static int	igb_setup_msix(struct adapter *);
146static void	igb_free_pci_resources(struct adapter *);
147static void	igb_local_timer(void *);
148static void	igb_reset(struct adapter *);
149static int	igb_setup_interface(device_t, struct adapter *);
150static int	igb_allocate_queues(struct adapter *);
151static void	igb_configure_queues(struct adapter *);
152
153static int	igb_allocate_transmit_buffers(struct tx_ring *);
154static void	igb_setup_transmit_structures(struct adapter *);
155static void	igb_setup_transmit_ring(struct tx_ring *);
156static void	igb_initialize_transmit_units(struct adapter *);
157static void	igb_free_transmit_structures(struct adapter *);
158static void	igb_free_transmit_buffers(struct tx_ring *);
159
160static int	igb_allocate_receive_buffers(struct rx_ring *);
161static int	igb_setup_receive_structures(struct adapter *);
162static int	igb_setup_receive_ring(struct rx_ring *);
163static void	igb_initialize_receive_units(struct adapter *);
164static void	igb_free_receive_structures(struct adapter *);
165static void	igb_free_receive_buffers(struct rx_ring *);
166static void	igb_free_receive_ring(struct rx_ring *);
167
168static void	igb_enable_intr(struct adapter *);
169static void	igb_disable_intr(struct adapter *);
170static void	igb_update_stats_counters(struct adapter *);
171static bool	igb_txeof(struct tx_ring *);
172
173static __inline	void igb_rx_discard(struct rx_ring *, int);
174static __inline void igb_rx_input(struct rx_ring *,
175		    struct ifnet *, struct mbuf *, u32);
176
177static bool	igb_rxeof(struct igb_queue *, int, int *);
178static void	igb_rx_checksum(u32, struct mbuf *, u32);
179static int	igb_tx_ctx_setup(struct tx_ring *,
180		    struct mbuf *, u32 *, u32 *);
181static int	igb_tso_setup(struct tx_ring *,
182		    struct mbuf *, u32 *, u32 *);
183static void	igb_set_promisc(struct adapter *);
184static void	igb_disable_promisc(struct adapter *);
185static void	igb_set_multi(struct adapter *);
186static void	igb_update_link_status(struct adapter *);
187static void	igb_refresh_mbufs(struct rx_ring *, int);
188
189static void	igb_register_vlan(void *, struct ifnet *, u16);
190static void	igb_unregister_vlan(void *, struct ifnet *, u16);
191static void	igb_setup_vlan_hw_support(struct adapter *);
192
193static int	igb_xmit(struct tx_ring *, struct mbuf **);
194static int	igb_dma_malloc(struct adapter *, bus_size_t,
195		    struct igb_dma_alloc *, int);
196static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198static void	igb_print_nvm_info(struct adapter *);
199static int 	igb_is_valid_ether_addr(u8 *);
200static void     igb_add_hw_stats(struct adapter *);
201
202static void	igb_vf_init_stats(struct adapter *);
203static void	igb_update_vf_stats_counters(struct adapter *);
204
205/* Management and WOL Support */
206static void	igb_init_manageability(struct adapter *);
207static void	igb_release_manageability(struct adapter *);
208static void     igb_get_hw_control(struct adapter *);
209static void     igb_release_hw_control(struct adapter *);
210static void     igb_enable_wakeup(device_t);
211static void     igb_led_func(void *, int);
212
213static int	igb_irq_fast(void *);
214static void	igb_msix_que(void *);
215static void	igb_msix_link(void *);
216static void	igb_handle_que(void *context, int pending);
217static void	igb_handle_link(void *context, int pending);
218static void	igb_handle_link_locked(struct adapter *);
219
220static void	igb_set_sysctl_value(struct adapter *, const char *,
221		    const char *, int *, int);
222static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226#ifdef DEVICE_POLLING
227static poll_handler_t igb_poll;
228#endif /* POLLING */
229
230/*********************************************************************
231 *  FreeBSD Device Interface Entry Points
232 *********************************************************************/
233
234static device_method_t igb_methods[] = {
235	/* Device interface */
236	DEVMETHOD(device_probe, igb_probe),
237	DEVMETHOD(device_attach, igb_attach),
238	DEVMETHOD(device_detach, igb_detach),
239	DEVMETHOD(device_shutdown, igb_shutdown),
240	DEVMETHOD(device_suspend, igb_suspend),
241	DEVMETHOD(device_resume, igb_resume),
242	DEVMETHOD_END
243};
244
245static driver_t igb_driver = {
246	"igb", igb_methods, sizeof(struct adapter),
247};
248
249static devclass_t igb_devclass;
250DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251MODULE_DEPEND(igb, pci, 1, 1, 1);
252MODULE_DEPEND(igb, ether, 1, 1, 1);
253#ifdef DEV_NETMAP
254MODULE_DEPEND(igb, netmap, 1, 1, 1);
255#endif /* DEV_NETMAP */
256
257/*********************************************************************
258 *  Tunable default values.
259 *********************************************************************/
260
261static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263/* Descriptor defaults */
264static int igb_rxd = IGB_DEFAULT_RXD;
265static int igb_txd = IGB_DEFAULT_TXD;
266SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267    "Number of receive descriptors per queue");
268SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269    "Number of transmit descriptors per queue");
270
271/*
272** AIM: Adaptive Interrupt Moderation
273** which means that the interrupt rate
274** is varied over time based on the
275** traffic for that interrupt vector
276*/
277static int igb_enable_aim = TRUE;
278SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279    "Enable adaptive interrupt moderation");
280
281/*
282 * MSIX should be the default for best performance,
283 * but this allows it to be forced off for testing.
284 */
285static int igb_enable_msix = 1;
286SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287    "Enable MSI-X interrupts");
288
289/*
290** Tuneable Interrupt rate
291*/
292static int igb_max_interrupt_rate = 8000;
293SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296#ifndef IGB_LEGACY_TX
297/*
298** Tuneable number of buffers in the buf-ring (drbr_xxx)
299*/
300static int igb_buf_ring_size = IGB_BR_SIZE;
301SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302    &igb_buf_ring_size, 0, "Size of the bufring");
303#endif
304
305/*
306** Header split causes the packet header to
307** be dma'd to a separate mbuf from the payload.
308** this can have memory alignment benefits. But
309** another plus is that small packets often fit
310** into the header and thus use no cluster. Its
311** a very workload dependent type feature.
312*/
313static int igb_header_split = FALSE;
314SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315    "Enable receive mbuf header split");
316
317/*
318** This will autoconfigure based on the
319** number of CPUs and max supported
320** MSIX messages if left at 0.
321*/
322static int igb_num_queues = 0;
323SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324    "Number of queues to configure, 0 indicates autoconfigure");
325
326/*
327** Global variable to store last used CPU when binding queues
328** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
329** queue is bound to a cpu.
330*/
331static int igb_last_bind_cpu = -1;
332
333/* How many packets rxeof tries to clean at a time */
334static int igb_rx_process_limit = 100;
335SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336    &igb_rx_process_limit, 0,
337    "Maximum number of received packets to process at a time, -1 means unlimited");
338
339/* How many packets txeof tries to clean at a time */
340static int igb_tx_process_limit = -1;
341SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342    &igb_tx_process_limit, 0,
343    "Maximum number of sent packets to process at a time, -1 means unlimited");
344
345#ifdef DEV_NETMAP	/* see ixgbe.c for details */
346#include <dev/netmap/if_igb_netmap.h>
347#endif /* DEV_NETMAP */
348/*********************************************************************
349 *  Device identification routine
350 *
351 *  igb_probe determines if the driver should be loaded on
352 *  adapter based on PCI vendor/device id of the adapter.
353 *
354 *  return BUS_PROBE_DEFAULT on success, positive on failure
355 *********************************************************************/
356
357static int
358igb_probe(device_t dev)
359{
360	char		adapter_name[256];
361	uint16_t	pci_vendor_id = 0;
362	uint16_t	pci_device_id = 0;
363	uint16_t	pci_subvendor_id = 0;
364	uint16_t	pci_subdevice_id = 0;
365	igb_vendor_info_t *ent;
366
367	INIT_DEBUGOUT("igb_probe: begin");
368
369	pci_vendor_id = pci_get_vendor(dev);
370	if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371		return (ENXIO);
372
373	pci_device_id = pci_get_device(dev);
374	pci_subvendor_id = pci_get_subvendor(dev);
375	pci_subdevice_id = pci_get_subdevice(dev);
376
377	ent = igb_vendor_info_array;
378	while (ent->vendor_id != 0) {
379		if ((pci_vendor_id == ent->vendor_id) &&
380		    (pci_device_id == ent->device_id) &&
381
382		    ((pci_subvendor_id == ent->subvendor_id) ||
383		    (ent->subvendor_id == 0)) &&
384
385		    ((pci_subdevice_id == ent->subdevice_id) ||
386		    (ent->subdevice_id == 0))) {
387			sprintf(adapter_name, "%s, Version - %s",
388				igb_strings[ent->index],
389				igb_driver_version);
390			device_set_desc_copy(dev, adapter_name);
391			return (BUS_PROBE_DEFAULT);
392		}
393		ent++;
394	}
395	return (ENXIO);
396}
397
398/*********************************************************************
399 *  Device initialization routine
400 *
401 *  The attach entry point is called when the driver is being loaded.
402 *  This routine identifies the type of hardware, allocates all resources
403 *  and initializes the hardware.
404 *
405 *  return 0 on success, positive on failure
406 *********************************************************************/
407
408static int
409igb_attach(device_t dev)
410{
411	struct adapter	*adapter;
412	int		error = 0;
413	u16		eeprom_data;
414
415	INIT_DEBUGOUT("igb_attach: begin");
416
417	if (resource_disabled("igb", device_get_unit(dev))) {
418		device_printf(dev, "Disabled by device hint\n");
419		return (ENXIO);
420	}
421
422	adapter = device_get_softc(dev);
423	adapter->dev = adapter->osdep.dev = dev;
424	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425
426	/* SYSCTLs */
427	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430	    igb_sysctl_nvm_info, "I", "NVM Information");
431
432	igb_set_sysctl_value(adapter, "enable_aim",
433	    "Interrupt Moderation", &adapter->enable_aim,
434	    igb_enable_aim);
435
436	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440
441	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442
443	/* Determine hardware and mac info */
444	igb_identify_hardware(adapter);
445
446	/* Setup PCI resources */
447	if (igb_allocate_pci_resources(adapter)) {
448		device_printf(dev, "Allocation of PCI resources failed\n");
449		error = ENXIO;
450		goto err_pci;
451	}
452
453	/* Do Shared Code initialization */
454	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455		device_printf(dev, "Setup of Shared code failed\n");
456		error = ENXIO;
457		goto err_pci;
458	}
459
460	e1000_get_bus_info(&adapter->hw);
461
462	/* Sysctls for limiting the amount of work done in the taskqueues */
463	igb_set_sysctl_value(adapter, "rx_processing_limit",
464	    "max number of rx packets to process",
465	    &adapter->rx_process_limit, igb_rx_process_limit);
466
467	igb_set_sysctl_value(adapter, "tx_processing_limit",
468	    "max number of tx packets to process",
469	    &adapter->tx_process_limit, igb_tx_process_limit);
470
471	/*
472	 * Validate number of transmit and receive descriptors. It
473	 * must not exceed hardware maximum, and must be multiple
474	 * of E1000_DBA_ALIGN.
475	 */
476	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479		    IGB_DEFAULT_TXD, igb_txd);
480		adapter->num_tx_desc = IGB_DEFAULT_TXD;
481	} else
482		adapter->num_tx_desc = igb_txd;
483	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486		    IGB_DEFAULT_RXD, igb_rxd);
487		adapter->num_rx_desc = IGB_DEFAULT_RXD;
488	} else
489		adapter->num_rx_desc = igb_rxd;
490
491	adapter->hw.mac.autoneg = DO_AUTO_NEG;
492	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494
495	/* Copper options */
496	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497		adapter->hw.phy.mdix = AUTO_ALL_MODES;
498		adapter->hw.phy.disable_polarity_correction = FALSE;
499		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500	}
501
502	/*
503	 * Set the frame limits assuming
504	 * standard ethernet sized frames.
505	 */
506	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507
508	/*
509	** Allocate and Setup Queues
510	*/
511	if (igb_allocate_queues(adapter)) {
512		error = ENOMEM;
513		goto err_pci;
514	}
515
516	/* Allocate the appropriate stats memory */
517	if (adapter->vf_ifp) {
518		adapter->stats =
519		    (struct e1000_vf_stats *)malloc(sizeof \
520		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521		igb_vf_init_stats(adapter);
522	} else
523		adapter->stats =
524		    (struct e1000_hw_stats *)malloc(sizeof \
525		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526	if (adapter->stats == NULL) {
527		device_printf(dev, "Can not allocate stats memory\n");
528		error = ENOMEM;
529		goto err_late;
530	}
531
532	/* Allocate multicast array memory. */
533	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535	if (adapter->mta == NULL) {
536		device_printf(dev, "Can not allocate multicast setup array\n");
537		error = ENOMEM;
538		goto err_late;
539	}
540
541	/* Some adapter-specific advanced features */
542	if (adapter->hw.mac.type >= e1000_i350) {
543		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550		    adapter, 0, igb_sysctl_eee, "I",
551		    "Disable Energy Efficient Ethernet");
552		if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553			if (adapter->hw.mac.type == e1000_i354)
554				e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
555			else
556				e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
557		}
558	}
559
560	/*
561	** Start from a known state, this is
562	** important in reading the nvm and
563	** mac from that.
564	*/
565	e1000_reset_hw(&adapter->hw);
566
567	/* Make sure we have a good EEPROM before we read from it */
568	if (((adapter->hw.mac.type != e1000_i210) &&
569	    (adapter->hw.mac.type != e1000_i211)) &&
570	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571		/*
572		** Some PCI-E parts fail the first check due to
573		** the link being in sleep state, call it again,
574		** if it fails a second time its a real issue.
575		*/
576		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577			device_printf(dev,
578			    "The EEPROM Checksum Is Not Valid\n");
579			error = EIO;
580			goto err_late;
581		}
582	}
583
584	/*
585	** Copy the permanent MAC address out of the EEPROM
586	*/
587	if (e1000_read_mac_addr(&adapter->hw) < 0) {
588		device_printf(dev, "EEPROM read error while reading MAC"
589		    " address\n");
590		error = EIO;
591		goto err_late;
592	}
593	/* Check its sanity */
594	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595		device_printf(dev, "Invalid MAC address\n");
596		error = EIO;
597		goto err_late;
598	}
599
600	/* Setup OS specific network interface */
601	if (igb_setup_interface(dev, adapter) != 0)
602		goto err_late;
603
604	/* Now get a good starting state */
605	igb_reset(adapter);
606
607	/* Initialize statistics */
608	igb_update_stats_counters(adapter);
609
610	adapter->hw.mac.get_link_status = 1;
611	igb_update_link_status(adapter);
612
613	/* Indicate SOL/IDER usage */
614	if (e1000_check_reset_block(&adapter->hw))
615		device_printf(dev,
616		    "PHY reset is blocked due to SOL/IDER session.\n");
617
618	/* Determine if we have to control management hardware */
619	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620
621	/*
622	 * Setup Wake-on-Lan
623	 */
624	/* APME bit in EEPROM is mapped to WUC.APME */
625	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626	if (eeprom_data)
627		adapter->wol = E1000_WUFC_MAG;
628
629	/* Register for VLAN events */
630	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634
635	igb_add_hw_stats(adapter);
636
637	/* Tell the stack that the interface is not active */
638	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
640
641	adapter->led_dev = led_create(igb_led_func, adapter,
642	    device_get_nameunit(dev));
643
644	/*
645	** Configure Interrupts
646	*/
647	if ((adapter->msix > 1) && (igb_enable_msix))
648		error = igb_allocate_msix(adapter);
649	else /* MSI or Legacy */
650		error = igb_allocate_legacy(adapter);
651	if (error)
652		goto err_late;
653
654#ifdef DEV_NETMAP
655	igb_netmap_attach(adapter);
656#endif /* DEV_NETMAP */
657	INIT_DEBUGOUT("igb_attach: end");
658
659	return (0);
660
661err_late:
662	if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */
663		return(error);
664	igb_free_transmit_structures(adapter);
665	igb_free_receive_structures(adapter);
666	igb_release_hw_control(adapter);
667err_pci:
668	igb_free_pci_resources(adapter);
669	if (adapter->ifp != NULL)
670		if_free(adapter->ifp);
671	free(adapter->mta, M_DEVBUF);
672	IGB_CORE_LOCK_DESTROY(adapter);
673
674	return (error);
675}
676
677/*********************************************************************
678 *  Device removal routine
679 *
680 *  The detach entry point is called when the driver is being removed.
681 *  This routine stops the adapter and deallocates all the resources
682 *  that were allocated for driver operation.
683 *
684 *  return 0 on success, positive on failure
685 *********************************************************************/
686
687static int
688igb_detach(device_t dev)
689{
690	struct adapter	*adapter = device_get_softc(dev);
691	struct ifnet	*ifp = adapter->ifp;
692
693	INIT_DEBUGOUT("igb_detach: begin");
694
695	/* Make sure VLANS are not using driver */
696	if (adapter->ifp->if_vlantrunk != NULL) {
697		device_printf(dev,"Vlan in use, detach first\n");
698		return (EBUSY);
699	}
700
701	ether_ifdetach(adapter->ifp);
702
703	if (adapter->led_dev != NULL)
704		led_destroy(adapter->led_dev);
705
706#ifdef DEVICE_POLLING
707	if (ifp->if_capenable & IFCAP_POLLING)
708		ether_poll_deregister(ifp);
709#endif
710
711	IGB_CORE_LOCK(adapter);
712	adapter->in_detach = 1;
713	igb_stop(adapter);
714	IGB_CORE_UNLOCK(adapter);
715
716	e1000_phy_hw_reset(&adapter->hw);
717
718	/* Give control back to firmware */
719	igb_release_manageability(adapter);
720	igb_release_hw_control(adapter);
721
722	/* Unregister VLAN events */
723	if (adapter->vlan_attach != NULL)
724		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
725	if (adapter->vlan_detach != NULL)
726		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
727
728	callout_drain(&adapter->timer);
729
730#ifdef DEV_NETMAP
731	netmap_detach(adapter->ifp);
732#endif /* DEV_NETMAP */
733	igb_free_pci_resources(adapter);
734	bus_generic_detach(dev);
735	if_free(ifp);
736
737	igb_free_transmit_structures(adapter);
738	igb_free_receive_structures(adapter);
739	if (adapter->mta != NULL)
740		free(adapter->mta, M_DEVBUF);
741
742	IGB_CORE_LOCK_DESTROY(adapter);
743
744	return (0);
745}
746
747/*********************************************************************
748 *
749 *  Shutdown entry point
750 *
751 **********************************************************************/
752
753static int
754igb_shutdown(device_t dev)
755{
756	return igb_suspend(dev);
757}
758
759/*
760 * Suspend/resume device methods.
761 */
762static int
763igb_suspend(device_t dev)
764{
765	struct adapter *adapter = device_get_softc(dev);
766
767	IGB_CORE_LOCK(adapter);
768
769	igb_stop(adapter);
770
771        igb_release_manageability(adapter);
772	igb_release_hw_control(adapter);
773	igb_enable_wakeup(dev);
774
775	IGB_CORE_UNLOCK(adapter);
776
777	return bus_generic_suspend(dev);
778}
779
780static int
781igb_resume(device_t dev)
782{
783	struct adapter *adapter = device_get_softc(dev);
784	struct tx_ring	*txr = adapter->tx_rings;
785	struct ifnet *ifp = adapter->ifp;
786
787	IGB_CORE_LOCK(adapter);
788	igb_init_locked(adapter);
789	igb_init_manageability(adapter);
790
791	if ((ifp->if_flags & IFF_UP) &&
792	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
793		for (int i = 0; i < adapter->num_queues; i++, txr++) {
794			IGB_TX_LOCK(txr);
795#ifndef IGB_LEGACY_TX
796			/* Process the stack queue only if not depleted */
797			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
798			    !drbr_empty(ifp, txr->br))
799				igb_mq_start_locked(ifp, txr);
800#else
801			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
802				igb_start_locked(txr, ifp);
803#endif
804			IGB_TX_UNLOCK(txr);
805		}
806	}
807	IGB_CORE_UNLOCK(adapter);
808
809	return bus_generic_resume(dev);
810}
811
812
813#ifdef IGB_LEGACY_TX
814
815/*********************************************************************
816 *  Transmit entry point
817 *
818 *  igb_start is called by the stack to initiate a transmit.
819 *  The driver will remain in this routine as long as there are
820 *  packets to transmit and transmit resources are available.
821 *  In case resources are not available stack is notified and
822 *  the packet is requeued.
823 **********************************************************************/
824
825static void
826igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
827{
828	struct adapter	*adapter = ifp->if_softc;
829	struct mbuf	*m_head;
830
831	IGB_TX_LOCK_ASSERT(txr);
832
833	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
834	    IFF_DRV_RUNNING)
835		return;
836	if (!adapter->link_active)
837		return;
838
839	/* Call cleanup if number of TX descriptors low */
840	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
841		igb_txeof(txr);
842
843	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
844		if (txr->tx_avail <= IGB_MAX_SCATTER) {
845			txr->queue_status |= IGB_QUEUE_DEPLETED;
846			break;
847		}
848		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
849		if (m_head == NULL)
850			break;
851		/*
852		 *  Encapsulation can modify our pointer, and or make it
853		 *  NULL on failure.  In that event, we can't requeue.
854		 */
855		if (igb_xmit(txr, &m_head)) {
856			if (m_head != NULL)
857				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
858			if (txr->tx_avail <= IGB_MAX_SCATTER)
859				txr->queue_status |= IGB_QUEUE_DEPLETED;
860			break;
861		}
862
863		/* Send a copy of the frame to the BPF listener */
864		ETHER_BPF_MTAP(ifp, m_head);
865
866		/* Set watchdog on */
867		txr->watchdog_time = ticks;
868		txr->queue_status |= IGB_QUEUE_WORKING;
869	}
870}
871
872/*
873 * Legacy TX driver routine, called from the
874 * stack, always uses tx[0], and spins for it.
875 * Should not be used with multiqueue tx
876 */
877static void
878igb_start(struct ifnet *ifp)
879{
880	struct adapter	*adapter = ifp->if_softc;
881	struct tx_ring	*txr = adapter->tx_rings;
882
883	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
884		IGB_TX_LOCK(txr);
885		igb_start_locked(txr, ifp);
886		IGB_TX_UNLOCK(txr);
887	}
888	return;
889}
890
891#else /* ~IGB_LEGACY_TX */
892
893/*
894** Multiqueue Transmit Entry:
895**  quick turnaround to the stack
896**
897*/
898static int
899igb_mq_start(struct ifnet *ifp, struct mbuf *m)
900{
901	struct adapter		*adapter = ifp->if_softc;
902	struct igb_queue	*que;
903	struct tx_ring		*txr;
904	int 			i, err = 0;
905#ifdef	RSS
906	uint32_t		bucket_id;
907#endif
908
909	/* Which queue to use */
910	/*
911	 * When doing RSS, map it to the same outbound queue
912	 * as the incoming flow would be mapped to.
913	 *
914	 * If everything is setup correctly, it should be the
915	 * same bucket that the current CPU we're on is.
916	 */
917	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
918#ifdef	RSS
919		if (rss_hash2bucket(m->m_pkthdr.flowid,
920		    M_HASHTYPE_GET(m), &bucket_id) == 0) {
921			/* XXX TODO: spit out something if bucket_id > num_queues? */
922			i = bucket_id % adapter->num_queues;
923		} else {
924#endif
925			i = m->m_pkthdr.flowid % adapter->num_queues;
926#ifdef	RSS
927		}
928#endif
929	} else {
930		i = curcpu % adapter->num_queues;
931	}
932	txr = &adapter->tx_rings[i];
933	que = &adapter->queues[i];
934
935	err = drbr_enqueue(ifp, txr->br, m);
936	if (err)
937		return (err);
938	if (IGB_TX_TRYLOCK(txr)) {
939		igb_mq_start_locked(ifp, txr);
940		IGB_TX_UNLOCK(txr);
941	} else
942		taskqueue_enqueue(que->tq, &txr->txq_task);
943
944	return (0);
945}
946
947static int
948igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
949{
950	struct adapter  *adapter = txr->adapter;
951        struct mbuf     *next;
952        int             err = 0, enq = 0;
953
954	IGB_TX_LOCK_ASSERT(txr);
955
956	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
957	    adapter->link_active == 0)
958		return (ENETDOWN);
959
960	/* Process the queue */
961	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
962		if ((err = igb_xmit(txr, &next)) != 0) {
963			if (next == NULL) {
964				/* It was freed, move forward */
965				drbr_advance(ifp, txr->br);
966			} else {
967				/*
968				 * Still have one left, it may not be
969				 * the same since the transmit function
970				 * may have changed it.
971				 */
972				drbr_putback(ifp, txr->br, next);
973			}
974			break;
975		}
976		drbr_advance(ifp, txr->br);
977		enq++;
978		if (next->m_flags & M_MCAST && adapter->vf_ifp)
979			if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
980		ETHER_BPF_MTAP(ifp, next);
981		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
982			break;
983	}
984	if (enq > 0) {
985		/* Set the watchdog */
986		txr->queue_status |= IGB_QUEUE_WORKING;
987		txr->watchdog_time = ticks;
988	}
989	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
990		igb_txeof(txr);
991	if (txr->tx_avail <= IGB_MAX_SCATTER)
992		txr->queue_status |= IGB_QUEUE_DEPLETED;
993	return (err);
994}
995
996/*
997 * Called from a taskqueue to drain queued transmit packets.
998 */
999static void
1000igb_deferred_mq_start(void *arg, int pending)
1001{
1002	struct tx_ring *txr = arg;
1003	struct adapter *adapter = txr->adapter;
1004	struct ifnet *ifp = adapter->ifp;
1005
1006	IGB_TX_LOCK(txr);
1007	if (!drbr_empty(ifp, txr->br))
1008		igb_mq_start_locked(ifp, txr);
1009	IGB_TX_UNLOCK(txr);
1010}
1011
1012/*
1013** Flush all ring buffers
1014*/
1015static void
1016igb_qflush(struct ifnet *ifp)
1017{
1018	struct adapter	*adapter = ifp->if_softc;
1019	struct tx_ring	*txr = adapter->tx_rings;
1020	struct mbuf	*m;
1021
1022	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1023		IGB_TX_LOCK(txr);
1024		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1025			m_freem(m);
1026		IGB_TX_UNLOCK(txr);
1027	}
1028	if_qflush(ifp);
1029}
1030#endif /* ~IGB_LEGACY_TX */
1031
1032/*********************************************************************
1033 *  Ioctl entry point
1034 *
1035 *  igb_ioctl is called when the user wants to configure the
1036 *  interface.
1037 *
1038 *  return 0 on success, positive on failure
1039 **********************************************************************/
1040
1041static int
1042igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1043{
1044	struct adapter	*adapter = ifp->if_softc;
1045	struct ifreq	*ifr = (struct ifreq *)data;
1046#if defined(INET) || defined(INET6)
1047	struct ifaddr	*ifa = (struct ifaddr *)data;
1048#endif
1049	bool		avoid_reset = FALSE;
1050	int		error = 0;
1051
1052	if (adapter->in_detach)
1053		return (error);
1054
1055	switch (command) {
1056	case SIOCSIFADDR:
1057#ifdef INET
1058		if (ifa->ifa_addr->sa_family == AF_INET)
1059			avoid_reset = TRUE;
1060#endif
1061#ifdef INET6
1062		if (ifa->ifa_addr->sa_family == AF_INET6)
1063			avoid_reset = TRUE;
1064#endif
1065		/*
1066		** Calling init results in link renegotiation,
1067		** so we avoid doing it when possible.
1068		*/
1069		if (avoid_reset) {
1070			ifp->if_flags |= IFF_UP;
1071			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1072				igb_init(adapter);
1073#ifdef INET
1074			if (!(ifp->if_flags & IFF_NOARP))
1075				arp_ifinit(ifp, ifa);
1076#endif
1077		} else
1078			error = ether_ioctl(ifp, command, data);
1079		break;
1080	case SIOCSIFMTU:
1081	    {
1082		int max_frame_size;
1083
1084		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1085
1086		IGB_CORE_LOCK(adapter);
1087		max_frame_size = 9234;
1088		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1089		    ETHER_CRC_LEN) {
1090			IGB_CORE_UNLOCK(adapter);
1091			error = EINVAL;
1092			break;
1093		}
1094
1095		ifp->if_mtu = ifr->ifr_mtu;
1096		adapter->max_frame_size =
1097		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1098		if ((ifp->if_drv_flags & IFF_DRV_RUNNING))
1099			igb_init_locked(adapter);
1100		IGB_CORE_UNLOCK(adapter);
1101		break;
1102	    }
1103	case SIOCSIFFLAGS:
1104		IOCTL_DEBUGOUT("ioctl rcv'd:\
1105		    SIOCSIFFLAGS (Set Interface Flags)");
1106		IGB_CORE_LOCK(adapter);
1107		if (ifp->if_flags & IFF_UP) {
1108			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1109				if ((ifp->if_flags ^ adapter->if_flags) &
1110				    (IFF_PROMISC | IFF_ALLMULTI)) {
1111					igb_disable_promisc(adapter);
1112					igb_set_promisc(adapter);
1113				}
1114			} else
1115				igb_init_locked(adapter);
1116		} else
1117			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1118				igb_stop(adapter);
1119		adapter->if_flags = ifp->if_flags;
1120		IGB_CORE_UNLOCK(adapter);
1121		break;
1122	case SIOCADDMULTI:
1123	case SIOCDELMULTI:
1124		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1125		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1126			IGB_CORE_LOCK(adapter);
1127			igb_disable_intr(adapter);
1128			igb_set_multi(adapter);
1129#ifdef DEVICE_POLLING
1130			if (!(ifp->if_capenable & IFCAP_POLLING))
1131#endif
1132				igb_enable_intr(adapter);
1133			IGB_CORE_UNLOCK(adapter);
1134		}
1135		break;
1136	case SIOCSIFMEDIA:
1137		/* Check SOL/IDER usage */
1138		IGB_CORE_LOCK(adapter);
1139		if (e1000_check_reset_block(&adapter->hw)) {
1140			IGB_CORE_UNLOCK(adapter);
1141			device_printf(adapter->dev, "Media change is"
1142			    " blocked due to SOL/IDER session.\n");
1143			break;
1144		}
1145		IGB_CORE_UNLOCK(adapter);
1146	case SIOCGIFMEDIA:
1147		IOCTL_DEBUGOUT("ioctl rcv'd: \
1148		    SIOCxIFMEDIA (Get/Set Interface Media)");
1149		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1150		break;
1151	case SIOCSIFCAP:
1152	    {
1153		int mask, reinit;
1154
1155		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1156		reinit = 0;
1157		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1158#ifdef DEVICE_POLLING
1159		if (mask & IFCAP_POLLING) {
1160			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1161				error = ether_poll_register(igb_poll, ifp);
1162				if (error)
1163					return (error);
1164				IGB_CORE_LOCK(adapter);
1165				igb_disable_intr(adapter);
1166				ifp->if_capenable |= IFCAP_POLLING;
1167				IGB_CORE_UNLOCK(adapter);
1168			} else {
1169				error = ether_poll_deregister(ifp);
1170				/* Enable interrupt even in error case */
1171				IGB_CORE_LOCK(adapter);
1172				igb_enable_intr(adapter);
1173				ifp->if_capenable &= ~IFCAP_POLLING;
1174				IGB_CORE_UNLOCK(adapter);
1175			}
1176		}
1177#endif
1178#if __FreeBSD_version >= 1000000
1179		/* HW cannot turn these on/off separately */
1180		if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1181			ifp->if_capenable ^= IFCAP_RXCSUM;
1182			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1183			reinit = 1;
1184		}
1185		if (mask & IFCAP_TXCSUM) {
1186			ifp->if_capenable ^= IFCAP_TXCSUM;
1187			reinit = 1;
1188		}
1189		if (mask & IFCAP_TXCSUM_IPV6) {
1190			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1191			reinit = 1;
1192		}
1193#else
1194		if (mask & IFCAP_HWCSUM) {
1195			ifp->if_capenable ^= IFCAP_HWCSUM;
1196			reinit = 1;
1197		}
1198#endif
1199		if (mask & IFCAP_TSO4) {
1200			ifp->if_capenable ^= IFCAP_TSO4;
1201			reinit = 1;
1202		}
1203		if (mask & IFCAP_TSO6) {
1204			ifp->if_capenable ^= IFCAP_TSO6;
1205			reinit = 1;
1206		}
1207		if (mask & IFCAP_VLAN_HWTAGGING) {
1208			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1209			reinit = 1;
1210		}
1211		if (mask & IFCAP_VLAN_HWFILTER) {
1212			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1213			reinit = 1;
1214		}
1215		if (mask & IFCAP_VLAN_HWTSO) {
1216			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1217			reinit = 1;
1218		}
1219		if (mask & IFCAP_LRO) {
1220			ifp->if_capenable ^= IFCAP_LRO;
1221			reinit = 1;
1222		}
1223		if (mask & IFCAP_WOL) {
1224			if (mask & IFCAP_WOL_MAGIC)
1225				ifp->if_capenable ^= IFCAP_WOL_MAGIC;
1226			if (mask & IFCAP_WOL_MCAST)
1227				ifp->if_capenable ^= IFCAP_WOL_MCAST;
1228			if (mask & IFCAP_WOL_UCAST)
1229				ifp->if_capenable ^= IFCAP_WOL_UCAST;
1230		}
1231		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1232			igb_init(adapter);
1233		VLAN_CAPABILITIES(ifp);
1234		break;
1235	    }
1236
1237	default:
1238		error = ether_ioctl(ifp, command, data);
1239		break;
1240	}
1241
1242	return (error);
1243}
1244
1245
1246/*********************************************************************
1247 *  Init entry point
1248 *
1249 *  This routine is used in two ways. It is used by the stack as
1250 *  init entry point in network interface structure. It is also used
1251 *  by the driver as a hw/sw initialization routine to get to a
1252 *  consistent state.
1253 *
1254 *  return 0 on success, positive on failure
1255 **********************************************************************/
1256
1257static void
1258igb_init_locked(struct adapter *adapter)
1259{
1260	struct ifnet	*ifp = adapter->ifp;
1261	device_t	dev = adapter->dev;
1262
1263	INIT_DEBUGOUT("igb_init: begin");
1264
1265	IGB_CORE_LOCK_ASSERT(adapter);
1266
1267	igb_disable_intr(adapter);
1268	callout_stop(&adapter->timer);
1269
1270	/* Get the latest mac address, User can use a LAA */
1271        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1272              ETHER_ADDR_LEN);
1273
1274	/* Put the address into the Receive Address Array */
1275	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1276
1277	igb_reset(adapter);
1278	igb_update_link_status(adapter);
1279
1280	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1281
1282	/* Set hardware offload abilities */
1283	ifp->if_hwassist = 0;
1284	if (ifp->if_capenable & IFCAP_TXCSUM) {
1285#if __FreeBSD_version >= 1000000
1286		ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP);
1287		if (adapter->hw.mac.type != e1000_82575)
1288			ifp->if_hwassist |= CSUM_IP_SCTP;
1289#else
1290		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1291#if __FreeBSD_version >= 800000
1292		if (adapter->hw.mac.type != e1000_82575)
1293			ifp->if_hwassist |= CSUM_SCTP;
1294#endif
1295#endif
1296	}
1297
1298#if __FreeBSD_version >= 1000000
1299	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) {
1300		ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP);
1301		if (adapter->hw.mac.type != e1000_82575)
1302			ifp->if_hwassist |= CSUM_IP6_SCTP;
1303	}
1304#endif
1305	if (ifp->if_capenable & IFCAP_TSO)
1306		ifp->if_hwassist |= CSUM_TSO;
1307
1308	/* Clear bad data from Rx FIFOs */
1309	e1000_rx_fifo_flush_82575(&adapter->hw);
1310
1311	/* Configure for OS presence */
1312	igb_init_manageability(adapter);
1313
1314	/* Prepare transmit descriptors and buffers */
1315	igb_setup_transmit_structures(adapter);
1316	igb_initialize_transmit_units(adapter);
1317
1318	/* Setup Multicast table */
1319	igb_set_multi(adapter);
1320
1321	/*
1322	** Figure out the desired mbuf pool
1323	** for doing jumbo/packetsplit
1324	*/
1325	if (adapter->max_frame_size <= 2048)
1326		adapter->rx_mbuf_sz = MCLBYTES;
1327#ifndef CONTIGMALLOC_WORKS
1328       else
1329               adapter->rx_mbuf_sz = MJUMPAGESIZE;
1330#else
1331	else if (adapter->max_frame_size <= 4096)
1332		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1333	else
1334		adapter->rx_mbuf_sz = MJUM9BYTES;
1335#endif
1336
1337	/* Prepare receive descriptors and buffers */
1338	if (igb_setup_receive_structures(adapter)) {
1339		device_printf(dev, "Could not setup receive structures\n");
1340		return;
1341	}
1342	igb_initialize_receive_units(adapter);
1343
1344        /* Enable VLAN support */
1345	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1346		igb_setup_vlan_hw_support(adapter);
1347
1348	/* Don't lose promiscuous settings */
1349	igb_set_promisc(adapter);
1350
1351	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1352	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1353
1354	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1355	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1356
1357	if (adapter->msix > 1) /* Set up queue routing */
1358		igb_configure_queues(adapter);
1359
1360	/* this clears any pending interrupts */
1361	E1000_READ_REG(&adapter->hw, E1000_ICR);
1362#ifdef DEVICE_POLLING
1363	/*
1364	 * Only enable interrupts if we are not polling, make sure
1365	 * they are off otherwise.
1366	 */
1367	if (ifp->if_capenable & IFCAP_POLLING)
1368		igb_disable_intr(adapter);
1369	else
1370#endif /* DEVICE_POLLING */
1371	{
1372		igb_enable_intr(adapter);
1373		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1374	}
1375
1376	/* Set Energy Efficient Ethernet */
1377	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1378		if (adapter->hw.mac.type == e1000_i354)
1379			e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1380		else
1381			e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1382	}
1383}
1384
1385static void
1386igb_init(void *arg)
1387{
1388	struct adapter *adapter = arg;
1389
1390	IGB_CORE_LOCK(adapter);
1391	igb_init_locked(adapter);
1392	IGB_CORE_UNLOCK(adapter);
1393}
1394
1395
1396static void
1397igb_handle_que(void *context, int pending)
1398{
1399	struct igb_queue *que = context;
1400	struct adapter *adapter = que->adapter;
1401	struct tx_ring *txr = que->txr;
1402	struct ifnet	*ifp = adapter->ifp;
1403
1404	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1405		bool	more;
1406
1407		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1408
1409		IGB_TX_LOCK(txr);
1410		igb_txeof(txr);
1411#ifndef IGB_LEGACY_TX
1412		/* Process the stack queue only if not depleted */
1413		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1414		    !drbr_empty(ifp, txr->br))
1415			igb_mq_start_locked(ifp, txr);
1416#else
1417		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1418			igb_start_locked(txr, ifp);
1419#endif
1420		IGB_TX_UNLOCK(txr);
1421		/* Do we need another? */
1422		if (more) {
1423			taskqueue_enqueue(que->tq, &que->que_task);
1424			return;
1425		}
1426	}
1427
1428#ifdef DEVICE_POLLING
1429	if (ifp->if_capenable & IFCAP_POLLING)
1430		return;
1431#endif
1432	/* Reenable this interrupt */
1433	if (que->eims)
1434		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1435	else
1436		igb_enable_intr(adapter);
1437}
1438
1439/* Deal with link in a sleepable context */
1440static void
1441igb_handle_link(void *context, int pending)
1442{
1443	struct adapter *adapter = context;
1444
1445	IGB_CORE_LOCK(adapter);
1446	igb_handle_link_locked(adapter);
1447	IGB_CORE_UNLOCK(adapter);
1448}
1449
1450static void
1451igb_handle_link_locked(struct adapter *adapter)
1452{
1453	struct tx_ring	*txr = adapter->tx_rings;
1454	struct ifnet *ifp = adapter->ifp;
1455
1456	IGB_CORE_LOCK_ASSERT(adapter);
1457	adapter->hw.mac.get_link_status = 1;
1458	igb_update_link_status(adapter);
1459	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1460		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1461			IGB_TX_LOCK(txr);
1462#ifndef IGB_LEGACY_TX
1463			/* Process the stack queue only if not depleted */
1464			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1465			    !drbr_empty(ifp, txr->br))
1466				igb_mq_start_locked(ifp, txr);
1467#else
1468			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1469				igb_start_locked(txr, ifp);
1470#endif
1471			IGB_TX_UNLOCK(txr);
1472		}
1473	}
1474}
1475
1476/*********************************************************************
1477 *
1478 *  MSI/Legacy Deferred
1479 *  Interrupt Service routine
1480 *
1481 *********************************************************************/
1482static int
1483igb_irq_fast(void *arg)
1484{
1485	struct adapter		*adapter = arg;
1486	struct igb_queue	*que = adapter->queues;
1487	u32			reg_icr;
1488
1489
1490	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1491
1492	/* Hot eject?  */
1493	if (reg_icr == 0xffffffff)
1494		return FILTER_STRAY;
1495
1496	/* Definitely not our interrupt.  */
1497	if (reg_icr == 0x0)
1498		return FILTER_STRAY;
1499
1500	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1501		return FILTER_STRAY;
1502
1503	/*
1504	 * Mask interrupts until the taskqueue is finished running.  This is
1505	 * cheap, just assume that it is needed.  This also works around the
1506	 * MSI message reordering errata on certain systems.
1507	 */
1508	igb_disable_intr(adapter);
1509	taskqueue_enqueue(que->tq, &que->que_task);
1510
1511	/* Link status change */
1512	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1513		taskqueue_enqueue(que->tq, &adapter->link_task);
1514
1515	if (reg_icr & E1000_ICR_RXO)
1516		adapter->rx_overruns++;
1517	return FILTER_HANDLED;
1518}
1519
1520#ifdef DEVICE_POLLING
1521#if __FreeBSD_version >= 800000
1522#define POLL_RETURN_COUNT(a) (a)
1523static int
1524#else
1525#define POLL_RETURN_COUNT(a)
1526static void
1527#endif
1528igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1529{
1530	struct adapter		*adapter = ifp->if_softc;
1531	struct igb_queue	*que;
1532	struct tx_ring		*txr;
1533	u32			reg_icr, rx_done = 0;
1534	u32			loop = IGB_MAX_LOOP;
1535	bool			more;
1536
1537	IGB_CORE_LOCK(adapter);
1538	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1539		IGB_CORE_UNLOCK(adapter);
1540		return POLL_RETURN_COUNT(rx_done);
1541	}
1542
1543	if (cmd == POLL_AND_CHECK_STATUS) {
1544		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1545		/* Link status change */
1546		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1547			igb_handle_link_locked(adapter);
1548
1549		if (reg_icr & E1000_ICR_RXO)
1550			adapter->rx_overruns++;
1551	}
1552	IGB_CORE_UNLOCK(adapter);
1553
1554	for (int i = 0; i < adapter->num_queues; i++) {
1555		que = &adapter->queues[i];
1556		txr = que->txr;
1557
1558		igb_rxeof(que, count, &rx_done);
1559
1560		IGB_TX_LOCK(txr);
1561		do {
1562			more = igb_txeof(txr);
1563		} while (loop-- && more);
1564#ifndef IGB_LEGACY_TX
1565		if (!drbr_empty(ifp, txr->br))
1566			igb_mq_start_locked(ifp, txr);
1567#else
1568		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1569			igb_start_locked(txr, ifp);
1570#endif
1571		IGB_TX_UNLOCK(txr);
1572	}
1573
1574	return POLL_RETURN_COUNT(rx_done);
1575}
1576#endif /* DEVICE_POLLING */
1577
1578/*********************************************************************
1579 *
1580 *  MSIX Que Interrupt Service routine
1581 *
1582 **********************************************************************/
1583static void
1584igb_msix_que(void *arg)
1585{
1586	struct igb_queue *que = arg;
1587	struct adapter *adapter = que->adapter;
1588	struct ifnet   *ifp = adapter->ifp;
1589	struct tx_ring *txr = que->txr;
1590	struct rx_ring *rxr = que->rxr;
1591	u32		newitr = 0;
1592	bool		more_rx;
1593
1594	/* Ignore spurious interrupts */
1595	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1596		return;
1597
1598	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1599	++que->irqs;
1600
1601	IGB_TX_LOCK(txr);
1602	igb_txeof(txr);
1603#ifndef IGB_LEGACY_TX
1604	/* Process the stack queue only if not depleted */
1605	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1606	    !drbr_empty(ifp, txr->br))
1607		igb_mq_start_locked(ifp, txr);
1608#else
1609	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1610		igb_start_locked(txr, ifp);
1611#endif
1612	IGB_TX_UNLOCK(txr);
1613
1614	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1615
1616	if (adapter->enable_aim == FALSE)
1617		goto no_calc;
1618	/*
1619	** Do Adaptive Interrupt Moderation:
1620        **  - Write out last calculated setting
1621	**  - Calculate based on average size over
1622	**    the last interval.
1623	*/
1624        if (que->eitr_setting)
1625                E1000_WRITE_REG(&adapter->hw,
1626                    E1000_EITR(que->msix), que->eitr_setting);
1627
1628        que->eitr_setting = 0;
1629
1630        /* Idle, do nothing */
1631        if ((txr->bytes == 0) && (rxr->bytes == 0))
1632                goto no_calc;
1633
1634        /* Used half Default if sub-gig */
1635        if (adapter->link_speed != 1000)
1636                newitr = IGB_DEFAULT_ITR / 2;
1637        else {
1638		if ((txr->bytes) && (txr->packets))
1639                	newitr = txr->bytes/txr->packets;
1640		if ((rxr->bytes) && (rxr->packets))
1641			newitr = max(newitr,
1642			    (rxr->bytes / rxr->packets));
1643                newitr += 24; /* account for hardware frame, crc */
1644		/* set an upper boundary */
1645		newitr = min(newitr, 3000);
1646		/* Be nice to the mid range */
1647                if ((newitr > 300) && (newitr < 1200))
1648                        newitr = (newitr / 3);
1649                else
1650                        newitr = (newitr / 2);
1651        }
1652        newitr &= 0x7FFC;  /* Mask invalid bits */
1653        if (adapter->hw.mac.type == e1000_82575)
1654                newitr |= newitr << 16;
1655        else
1656                newitr |= E1000_EITR_CNT_IGNR;
1657
1658        /* save for next interrupt */
1659        que->eitr_setting = newitr;
1660
1661        /* Reset state */
1662        txr->bytes = 0;
1663        txr->packets = 0;
1664        rxr->bytes = 0;
1665        rxr->packets = 0;
1666
1667no_calc:
1668	/* Schedule a clean task if needed*/
1669	if (more_rx)
1670		taskqueue_enqueue(que->tq, &que->que_task);
1671	else
1672		/* Reenable this interrupt */
1673		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1674	return;
1675}
1676
1677
1678/*********************************************************************
1679 *
1680 *  MSIX Link Interrupt Service routine
1681 *
1682 **********************************************************************/
1683
1684static void
1685igb_msix_link(void *arg)
1686{
1687	struct adapter	*adapter = arg;
1688	u32       	icr;
1689
1690	++adapter->link_irq;
1691	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1692	if (!(icr & E1000_ICR_LSC))
1693		goto spurious;
1694	igb_handle_link(adapter, 0);
1695
1696spurious:
1697	/* Rearm */
1698	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1699	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1700	return;
1701}
1702
1703
1704/*********************************************************************
1705 *
1706 *  Media Ioctl callback
1707 *
1708 *  This routine is called whenever the user queries the status of
1709 *  the interface using ifconfig.
1710 *
1711 **********************************************************************/
1712static void
1713igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1714{
1715	struct adapter *adapter = ifp->if_softc;
1716
1717	INIT_DEBUGOUT("igb_media_status: begin");
1718
1719	IGB_CORE_LOCK(adapter);
1720	igb_update_link_status(adapter);
1721
1722	ifmr->ifm_status = IFM_AVALID;
1723	ifmr->ifm_active = IFM_ETHER;
1724
1725	if (!adapter->link_active) {
1726		IGB_CORE_UNLOCK(adapter);
1727		return;
1728	}
1729
1730	ifmr->ifm_status |= IFM_ACTIVE;
1731
1732	switch (adapter->link_speed) {
1733	case 10:
1734		ifmr->ifm_active |= IFM_10_T;
1735		break;
1736	case 100:
1737		/*
1738		** Support for 100Mb SFP - these are Fiber
1739		** but the media type appears as serdes
1740		*/
1741		if (adapter->hw.phy.media_type ==
1742		    e1000_media_type_internal_serdes)
1743			ifmr->ifm_active |= IFM_100_FX;
1744		else
1745			ifmr->ifm_active |= IFM_100_TX;
1746		break;
1747	case 1000:
1748		ifmr->ifm_active |= IFM_1000_T;
1749		break;
1750	case 2500:
1751		ifmr->ifm_active |= IFM_2500_SX;
1752		break;
1753	}
1754
1755	if (adapter->link_duplex == FULL_DUPLEX)
1756		ifmr->ifm_active |= IFM_FDX;
1757	else
1758		ifmr->ifm_active |= IFM_HDX;
1759
1760	IGB_CORE_UNLOCK(adapter);
1761}
1762
1763/*********************************************************************
1764 *
1765 *  Media Ioctl callback
1766 *
1767 *  This routine is called when the user changes speed/duplex using
1768 *  media/mediopt option with ifconfig.
1769 *
1770 **********************************************************************/
1771static int
1772igb_media_change(struct ifnet *ifp)
1773{
1774	struct adapter *adapter = ifp->if_softc;
1775	struct ifmedia  *ifm = &adapter->media;
1776
1777	INIT_DEBUGOUT("igb_media_change: begin");
1778
1779	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1780		return (EINVAL);
1781
1782	IGB_CORE_LOCK(adapter);
1783	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1784	case IFM_AUTO:
1785		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1786		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1787		break;
1788	case IFM_1000_LX:
1789	case IFM_1000_SX:
1790	case IFM_1000_T:
1791		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1792		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1793		break;
1794	case IFM_100_TX:
1795		adapter->hw.mac.autoneg = FALSE;
1796		adapter->hw.phy.autoneg_advertised = 0;
1797		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1798			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1799		else
1800			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1801		break;
1802	case IFM_10_T:
1803		adapter->hw.mac.autoneg = FALSE;
1804		adapter->hw.phy.autoneg_advertised = 0;
1805		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1806			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1807		else
1808			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1809		break;
1810	default:
1811		device_printf(adapter->dev, "Unsupported media type\n");
1812	}
1813
1814	igb_init_locked(adapter);
1815	IGB_CORE_UNLOCK(adapter);
1816
1817	return (0);
1818}
1819
1820
1821/*********************************************************************
1822 *
1823 *  This routine maps the mbufs to Advanced TX descriptors.
1824 *
1825 **********************************************************************/
1826static int
1827igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1828{
1829	struct adapter  *adapter = txr->adapter;
1830	u32		olinfo_status = 0, cmd_type_len;
1831	int             i, j, error, nsegs;
1832	int		first;
1833	bool		remap = TRUE;
1834	struct mbuf	*m_head;
1835	bus_dma_segment_t segs[IGB_MAX_SCATTER];
1836	bus_dmamap_t	map;
1837	struct igb_tx_buf *txbuf;
1838	union e1000_adv_tx_desc *txd = NULL;
1839
1840	m_head = *m_headp;
1841
1842	/* Basic descriptor defines */
1843        cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1844	    E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1845
1846	if (m_head->m_flags & M_VLANTAG)
1847        	cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1848
1849        /*
1850         * Important to capture the first descriptor
1851         * used because it will contain the index of
1852         * the one we tell the hardware to report back
1853         */
1854        first = txr->next_avail_desc;
1855	txbuf = &txr->tx_buffers[first];
1856	map = txbuf->map;
1857
1858	/*
1859	 * Map the packet for DMA.
1860	 */
1861retry:
1862	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1863	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1864
1865	if (__predict_false(error)) {
1866		struct mbuf *m;
1867
1868		switch (error) {
1869		case EFBIG:
1870			/* Try it again? - one try */
1871			if (remap == TRUE) {
1872				remap = FALSE;
1873				m = m_collapse(*m_headp, M_NOWAIT,
1874				    IGB_MAX_SCATTER);
1875				if (m == NULL) {
1876					adapter->mbuf_defrag_failed++;
1877					m_freem(*m_headp);
1878					*m_headp = NULL;
1879					return (ENOBUFS);
1880				}
1881				*m_headp = m;
1882				goto retry;
1883			} else
1884				return (error);
1885		default:
1886			txr->no_tx_dma_setup++;
1887			m_freem(*m_headp);
1888			*m_headp = NULL;
1889			return (error);
1890		}
1891	}
1892
1893	/* Make certain there are enough descriptors */
1894	if (txr->tx_avail < (nsegs + 2)) {
1895		txr->no_desc_avail++;
1896		bus_dmamap_unload(txr->txtag, map);
1897		return (ENOBUFS);
1898	}
1899	m_head = *m_headp;
1900
1901	/*
1902	** Set up the appropriate offload context
1903	** this will consume the first descriptor
1904	*/
1905	error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1906	if (__predict_false(error)) {
1907		m_freem(*m_headp);
1908		*m_headp = NULL;
1909		return (error);
1910	}
1911
1912	/* 82575 needs the queue index added */
1913	if (adapter->hw.mac.type == e1000_82575)
1914		olinfo_status |= txr->me << 4;
1915
1916	i = txr->next_avail_desc;
1917	for (j = 0; j < nsegs; j++) {
1918		bus_size_t seglen;
1919		bus_addr_t segaddr;
1920
1921		txbuf = &txr->tx_buffers[i];
1922		txd = &txr->tx_base[i];
1923		seglen = segs[j].ds_len;
1924		segaddr = htole64(segs[j].ds_addr);
1925
1926		txd->read.buffer_addr = segaddr;
1927		txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1928		    cmd_type_len | seglen);
1929		txd->read.olinfo_status = htole32(olinfo_status);
1930
1931		if (++i == txr->num_desc)
1932			i = 0;
1933	}
1934
1935	txd->read.cmd_type_len |=
1936	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1937	txr->tx_avail -= nsegs;
1938	txr->next_avail_desc = i;
1939
1940	txbuf->m_head = m_head;
1941	/*
1942	** Here we swap the map so the last descriptor,
1943	** which gets the completion interrupt has the
1944	** real map, and the first descriptor gets the
1945	** unused map from this descriptor.
1946	*/
1947	txr->tx_buffers[first].map = txbuf->map;
1948	txbuf->map = map;
1949	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1950
1951        /* Set the EOP descriptor that will be marked done */
1952        txbuf = &txr->tx_buffers[first];
1953	txbuf->eop = txd;
1954
1955        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1956            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1957	/*
1958	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1959	 * hardware that this frame is available to transmit.
1960	 */
1961	++txr->total_packets;
1962	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1963
1964	return (0);
1965}
1966static void
1967igb_set_promisc(struct adapter *adapter)
1968{
1969	struct ifnet	*ifp = adapter->ifp;
1970	struct e1000_hw *hw = &adapter->hw;
1971	u32		reg;
1972
1973	if (adapter->vf_ifp) {
1974		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1975		return;
1976	}
1977
1978	reg = E1000_READ_REG(hw, E1000_RCTL);
1979	if (ifp->if_flags & IFF_PROMISC) {
1980		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1981		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1982	} else if (ifp->if_flags & IFF_ALLMULTI) {
1983		reg |= E1000_RCTL_MPE;
1984		reg &= ~E1000_RCTL_UPE;
1985		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1986	}
1987}
1988
1989static void
1990igb_disable_promisc(struct adapter *adapter)
1991{
1992	struct e1000_hw *hw = &adapter->hw;
1993	struct ifnet	*ifp = adapter->ifp;
1994	u32		reg;
1995	int		mcnt = 0;
1996
1997	if (adapter->vf_ifp) {
1998		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1999		return;
2000	}
2001	reg = E1000_READ_REG(hw, E1000_RCTL);
2002	reg &=  (~E1000_RCTL_UPE);
2003	if (ifp->if_flags & IFF_ALLMULTI)
2004		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2005	else {
2006		struct  ifmultiaddr *ifma;
2007#if __FreeBSD_version < 800000
2008		IF_ADDR_LOCK(ifp);
2009#else
2010		if_maddr_rlock(ifp);
2011#endif
2012		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2013			if (ifma->ifma_addr->sa_family != AF_LINK)
2014				continue;
2015			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2016				break;
2017			mcnt++;
2018		}
2019#if __FreeBSD_version < 800000
2020		IF_ADDR_UNLOCK(ifp);
2021#else
2022		if_maddr_runlock(ifp);
2023#endif
2024	}
2025	/* Don't disable if in MAX groups */
2026	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2027		reg &=  (~E1000_RCTL_MPE);
2028	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2029}
2030
2031
2032/*********************************************************************
2033 *  Multicast Update
2034 *
2035 *  This routine is called whenever multicast address list is updated.
2036 *
2037 **********************************************************************/
2038
2039static void
2040igb_set_multi(struct adapter *adapter)
2041{
2042	struct ifnet	*ifp = adapter->ifp;
2043	struct ifmultiaddr *ifma;
2044	u32 reg_rctl = 0;
2045	u8  *mta;
2046
2047	int mcnt = 0;
2048
2049	IOCTL_DEBUGOUT("igb_set_multi: begin");
2050
2051	mta = adapter->mta;
2052	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2053	    MAX_NUM_MULTICAST_ADDRESSES);
2054
2055#if __FreeBSD_version < 800000
2056	IF_ADDR_LOCK(ifp);
2057#else
2058	if_maddr_rlock(ifp);
2059#endif
2060	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2061		if (ifma->ifma_addr->sa_family != AF_LINK)
2062			continue;
2063
2064		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2065			break;
2066
2067		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2068		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2069		mcnt++;
2070	}
2071#if __FreeBSD_version < 800000
2072	IF_ADDR_UNLOCK(ifp);
2073#else
2074	if_maddr_runlock(ifp);
2075#endif
2076
2077	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2078		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2079		reg_rctl |= E1000_RCTL_MPE;
2080		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2081	} else
2082		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2083}
2084
2085
2086/*********************************************************************
2087 *  Timer routine:
2088 *  	This routine checks for link status,
2089 *	updates statistics, and does the watchdog.
2090 *
2091 **********************************************************************/
2092
2093static void
2094igb_local_timer(void *arg)
2095{
2096	struct adapter		*adapter = arg;
2097	device_t		dev = adapter->dev;
2098	struct ifnet		*ifp = adapter->ifp;
2099	struct tx_ring		*txr = adapter->tx_rings;
2100	struct igb_queue	*que = adapter->queues;
2101	int			hung = 0, busy = 0;
2102
2103
2104	IGB_CORE_LOCK_ASSERT(adapter);
2105
2106	igb_update_link_status(adapter);
2107	igb_update_stats_counters(adapter);
2108
2109        /*
2110        ** Check the TX queues status
2111	**	- central locked handling of OACTIVE
2112	**	- watchdog only if all queues show hung
2113        */
2114	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2115		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2116		    (adapter->pause_frames == 0))
2117			++hung;
2118		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2119			++busy;
2120		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2121			taskqueue_enqueue(que->tq, &que->que_task);
2122	}
2123	if (hung == adapter->num_queues)
2124		goto timeout;
2125	if (busy == adapter->num_queues)
2126		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2127	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2128	    (busy < adapter->num_queues))
2129		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2130
2131	adapter->pause_frames = 0;
2132	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2133#ifndef DEVICE_POLLING
2134	/* Schedule all queue interrupts - deadlock protection */
2135	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2136#endif
2137	return;
2138
2139timeout:
2140	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2141	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2142            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2143            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2144	device_printf(dev,"TX(%d) desc avail = %d,"
2145            "Next TX to Clean = %d\n",
2146            txr->me, txr->tx_avail, txr->next_to_clean);
2147	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2148	adapter->watchdog_events++;
2149	igb_init_locked(adapter);
2150}
2151
2152static void
2153igb_update_link_status(struct adapter *adapter)
2154{
2155	struct e1000_hw		*hw = &adapter->hw;
2156	struct e1000_fc_info	*fc = &hw->fc;
2157	struct ifnet		*ifp = adapter->ifp;
2158	device_t		dev = adapter->dev;
2159	struct tx_ring		*txr = adapter->tx_rings;
2160	u32			link_check, thstat, ctrl;
2161	char			*flowctl = NULL;
2162
2163	link_check = thstat = ctrl = 0;
2164
2165	/* Get the cached link value or read for real */
2166        switch (hw->phy.media_type) {
2167        case e1000_media_type_copper:
2168                if (hw->mac.get_link_status) {
2169			/* Do the work to read phy */
2170                        e1000_check_for_link(hw);
2171                        link_check = !hw->mac.get_link_status;
2172                } else
2173                        link_check = TRUE;
2174                break;
2175        case e1000_media_type_fiber:
2176                e1000_check_for_link(hw);
2177                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2178                                 E1000_STATUS_LU);
2179                break;
2180        case e1000_media_type_internal_serdes:
2181                e1000_check_for_link(hw);
2182                link_check = adapter->hw.mac.serdes_has_link;
2183                break;
2184	/* VF device is type_unknown */
2185        case e1000_media_type_unknown:
2186                e1000_check_for_link(hw);
2187		link_check = !hw->mac.get_link_status;
2188		/* Fall thru */
2189        default:
2190                break;
2191        }
2192
2193	/* Check for thermal downshift or shutdown */
2194	if (hw->mac.type == e1000_i350) {
2195		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2196		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2197	}
2198
2199	/* Get the flow control for display */
2200	switch (fc->current_mode) {
2201	case e1000_fc_rx_pause:
2202		flowctl = "RX";
2203		break;
2204	case e1000_fc_tx_pause:
2205		flowctl = "TX";
2206		break;
2207	case e1000_fc_full:
2208		flowctl = "Full";
2209		break;
2210	case e1000_fc_none:
2211	default:
2212		flowctl = "None";
2213		break;
2214	}
2215
2216	/* Now we check if a transition has happened */
2217	if (link_check && (adapter->link_active == 0)) {
2218		e1000_get_speed_and_duplex(&adapter->hw,
2219		    &adapter->link_speed, &adapter->link_duplex);
2220		if (bootverbose)
2221			device_printf(dev, "Link is up %d Mbps %s,"
2222			    " Flow Control: %s\n",
2223			    adapter->link_speed,
2224			    ((adapter->link_duplex == FULL_DUPLEX) ?
2225			    "Full Duplex" : "Half Duplex"), flowctl);
2226		adapter->link_active = 1;
2227		ifp->if_baudrate = adapter->link_speed * 1000000;
2228		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2229		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2230			device_printf(dev, "Link: thermal downshift\n");
2231		/* Delay Link Up for Phy update */
2232		if (((hw->mac.type == e1000_i210) ||
2233		    (hw->mac.type == e1000_i211)) &&
2234		    (hw->phy.id == I210_I_PHY_ID))
2235			msec_delay(I210_LINK_DELAY);
2236		/* Reset if the media type changed. */
2237		if (hw->dev_spec._82575.media_changed) {
2238			hw->dev_spec._82575.media_changed = false;
2239			adapter->flags |= IGB_MEDIA_RESET;
2240			igb_reset(adapter);
2241		}
2242		/* This can sleep */
2243		if_link_state_change(ifp, LINK_STATE_UP);
2244	} else if (!link_check && (adapter->link_active == 1)) {
2245		ifp->if_baudrate = adapter->link_speed = 0;
2246		adapter->link_duplex = 0;
2247		if (bootverbose)
2248			device_printf(dev, "Link is Down\n");
2249		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2250		    (thstat & E1000_THSTAT_PWR_DOWN))
2251			device_printf(dev, "Link: thermal shutdown\n");
2252		adapter->link_active = 0;
2253		/* This can sleep */
2254		if_link_state_change(ifp, LINK_STATE_DOWN);
2255		/* Reset queue state */
2256		for (int i = 0; i < adapter->num_queues; i++, txr++)
2257			txr->queue_status = IGB_QUEUE_IDLE;
2258	}
2259}
2260
2261/*********************************************************************
2262 *
2263 *  This routine disables all traffic on the adapter by issuing a
2264 *  global reset on the MAC and deallocates TX/RX buffers.
2265 *
2266 **********************************************************************/
2267
2268static void
2269igb_stop(void *arg)
2270{
2271	struct adapter	*adapter = arg;
2272	struct ifnet	*ifp = adapter->ifp;
2273	struct tx_ring *txr = adapter->tx_rings;
2274
2275	IGB_CORE_LOCK_ASSERT(adapter);
2276
2277	INIT_DEBUGOUT("igb_stop: begin");
2278
2279	igb_disable_intr(adapter);
2280
2281	callout_stop(&adapter->timer);
2282
2283	/* Tell the stack that the interface is no longer active */
2284	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2285	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2286
2287	/* Disarm watchdog timer. */
2288	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2289		IGB_TX_LOCK(txr);
2290		txr->queue_status = IGB_QUEUE_IDLE;
2291		IGB_TX_UNLOCK(txr);
2292	}
2293
2294	e1000_reset_hw(&adapter->hw);
2295	E1000_WRITE_REG(&adapter->hw, E1000_WUFC, 0);
2296
2297	e1000_led_off(&adapter->hw);
2298	e1000_cleanup_led(&adapter->hw);
2299}
2300
2301
2302/*********************************************************************
2303 *
2304 *  Determine hardware revision.
2305 *
2306 **********************************************************************/
2307static void
2308igb_identify_hardware(struct adapter *adapter)
2309{
2310	device_t dev = adapter->dev;
2311
2312	/* Make sure our PCI config space has the necessary stuff set */
2313	pci_enable_busmaster(dev);
2314	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2315
2316	/* Save off the information about this board */
2317	adapter->hw.vendor_id = pci_get_vendor(dev);
2318	adapter->hw.device_id = pci_get_device(dev);
2319	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2320	adapter->hw.subsystem_vendor_id =
2321	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2322	adapter->hw.subsystem_device_id =
2323	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2324
2325	/* Set MAC type early for PCI setup */
2326	e1000_set_mac_type(&adapter->hw);
2327
2328	/* Are we a VF device? */
2329	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2330	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2331		adapter->vf_ifp = 1;
2332	else
2333		adapter->vf_ifp = 0;
2334}
2335
2336static int
2337igb_allocate_pci_resources(struct adapter *adapter)
2338{
2339	device_t	dev = adapter->dev;
2340	int		rid;
2341
2342	rid = PCIR_BAR(0);
2343	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2344	    &rid, RF_ACTIVE);
2345	if (adapter->pci_mem == NULL) {
2346		device_printf(dev, "Unable to allocate bus resource: memory\n");
2347		return (ENXIO);
2348	}
2349	adapter->osdep.mem_bus_space_tag =
2350	    rman_get_bustag(adapter->pci_mem);
2351	adapter->osdep.mem_bus_space_handle =
2352	    rman_get_bushandle(adapter->pci_mem);
2353	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2354
2355	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2356
2357	/* This will setup either MSI/X or MSI */
2358	adapter->msix = igb_setup_msix(adapter);
2359	adapter->hw.back = &adapter->osdep;
2360
2361	return (0);
2362}
2363
2364/*********************************************************************
2365 *
2366 *  Setup the Legacy or MSI Interrupt handler
2367 *
2368 **********************************************************************/
2369static int
2370igb_allocate_legacy(struct adapter *adapter)
2371{
2372	device_t		dev = adapter->dev;
2373	struct igb_queue	*que = adapter->queues;
2374#ifndef IGB_LEGACY_TX
2375	struct tx_ring		*txr = adapter->tx_rings;
2376#endif
2377	int			error, rid = 0;
2378
2379	/* Turn off all interrupts */
2380	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2381
2382	/* MSI RID is 1 */
2383	if (adapter->msix == 1)
2384		rid = 1;
2385
2386	/* We allocate a single interrupt resource */
2387	adapter->res = bus_alloc_resource_any(dev,
2388	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2389	if (adapter->res == NULL) {
2390		device_printf(dev, "Unable to allocate bus resource: "
2391		    "interrupt\n");
2392		return (ENXIO);
2393	}
2394
2395#ifndef IGB_LEGACY_TX
2396	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2397#endif
2398
2399	/*
2400	 * Try allocating a fast interrupt and the associated deferred
2401	 * processing contexts.
2402	 */
2403	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2404	/* Make tasklet for deferred link handling */
2405	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2406	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2407	    taskqueue_thread_enqueue, &que->tq);
2408	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2409	    device_get_nameunit(adapter->dev));
2410	if ((error = bus_setup_intr(dev, adapter->res,
2411	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2412	    adapter, &adapter->tag)) != 0) {
2413		device_printf(dev, "Failed to register fast interrupt "
2414			    "handler: %d\n", error);
2415		taskqueue_free(que->tq);
2416		que->tq = NULL;
2417		return (error);
2418	}
2419
2420	return (0);
2421}
2422
2423
2424/*********************************************************************
2425 *
2426 *  Setup the MSIX Queue Interrupt handlers:
2427 *
2428 **********************************************************************/
2429static int
2430igb_allocate_msix(struct adapter *adapter)
2431{
2432	device_t		dev = adapter->dev;
2433	struct igb_queue	*que = adapter->queues;
2434	int			error, rid, vector = 0;
2435	int			cpu_id = 0;
2436#ifdef	RSS
2437	cpuset_t cpu_mask;
2438#endif
2439
2440	/* Be sure to start with all interrupts disabled */
2441	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2442	E1000_WRITE_FLUSH(&adapter->hw);
2443
2444#ifdef	RSS
2445	/*
2446	 * If we're doing RSS, the number of queues needs to
2447	 * match the number of RSS buckets that are configured.
2448	 *
2449	 * + If there's more queues than RSS buckets, we'll end
2450	 *   up with queues that get no traffic.
2451	 *
2452	 * + If there's more RSS buckets than queues, we'll end
2453	 *   up having multiple RSS buckets map to the same queue,
2454	 *   so there'll be some contention.
2455	 */
2456	if (adapter->num_queues != rss_getnumbuckets()) {
2457		device_printf(dev,
2458		    "%s: number of queues (%d) != number of RSS buckets (%d)"
2459		    "; performance will be impacted.\n",
2460		    __func__,
2461		    adapter->num_queues,
2462		    rss_getnumbuckets());
2463	}
2464#endif
2465
2466	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2467		rid = vector +1;
2468		que->res = bus_alloc_resource_any(dev,
2469		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2470		if (que->res == NULL) {
2471			device_printf(dev,
2472			    "Unable to allocate bus resource: "
2473			    "MSIX Queue Interrupt\n");
2474			return (ENXIO);
2475		}
2476		error = bus_setup_intr(dev, que->res,
2477	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2478		    igb_msix_que, que, &que->tag);
2479		if (error) {
2480			que->res = NULL;
2481			device_printf(dev, "Failed to register Queue handler");
2482			return (error);
2483		}
2484#if __FreeBSD_version >= 800504
2485		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2486#endif
2487		que->msix = vector;
2488		if (adapter->hw.mac.type == e1000_82575)
2489			que->eims = E1000_EICR_TX_QUEUE0 << i;
2490		else
2491			que->eims = 1 << vector;
2492
2493#ifdef	RSS
2494		/*
2495		 * The queue ID is used as the RSS layer bucket ID.
2496		 * We look up the queue ID -> RSS CPU ID and select
2497		 * that.
2498		 */
2499		cpu_id = rss_getcpu(i % rss_getnumbuckets());
2500#else
2501		/*
2502		 * Bind the msix vector, and thus the
2503		 * rings to the corresponding cpu.
2504		 *
2505		 * This just happens to match the default RSS round-robin
2506		 * bucket -> queue -> CPU allocation.
2507		 */
2508		if (adapter->num_queues > 1) {
2509			if (igb_last_bind_cpu < 0)
2510				igb_last_bind_cpu = CPU_FIRST();
2511			cpu_id = igb_last_bind_cpu;
2512		}
2513#endif
2514
2515		if (adapter->num_queues > 1) {
2516			bus_bind_intr(dev, que->res, cpu_id);
2517#ifdef	RSS
2518			device_printf(dev,
2519				"Bound queue %d to RSS bucket %d\n",
2520				i, cpu_id);
2521#else
2522			device_printf(dev,
2523				"Bound queue %d to cpu %d\n",
2524				i, cpu_id);
2525#endif
2526		}
2527
2528#ifndef IGB_LEGACY_TX
2529		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2530		    que->txr);
2531#endif
2532		/* Make tasklet for deferred handling */
2533		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2534		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2535		    taskqueue_thread_enqueue, &que->tq);
2536		if (adapter->num_queues > 1) {
2537			/*
2538			 * Only pin the taskqueue thread to a CPU if
2539			 * RSS is in use.
2540			 *
2541			 * This again just happens to match the default RSS
2542			 * round-robin bucket -> queue -> CPU allocation.
2543			 */
2544#ifdef	RSS
2545			CPU_SETOF(cpu_id, &cpu_mask);
2546			taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2547			    &cpu_mask,
2548			    "%s que (bucket %d)",
2549			    device_get_nameunit(adapter->dev),
2550			    cpu_id);
2551#else
2552			taskqueue_start_threads(&que->tq, 1, PI_NET,
2553			    "%s que (qid %d)",
2554			    device_get_nameunit(adapter->dev),
2555			    cpu_id);
2556#endif
2557		} else {
2558			taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2559			    device_get_nameunit(adapter->dev));
2560		}
2561
2562		/* Finally update the last bound CPU id */
2563		if (adapter->num_queues > 1)
2564			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2565	}
2566
2567	/* And Link */
2568	rid = vector + 1;
2569	adapter->res = bus_alloc_resource_any(dev,
2570	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2571	if (adapter->res == NULL) {
2572		device_printf(dev,
2573		    "Unable to allocate bus resource: "
2574		    "MSIX Link Interrupt\n");
2575		return (ENXIO);
2576	}
2577	if ((error = bus_setup_intr(dev, adapter->res,
2578	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2579	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2580		device_printf(dev, "Failed to register Link handler");
2581		return (error);
2582	}
2583#if __FreeBSD_version >= 800504
2584	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2585#endif
2586	adapter->linkvec = vector;
2587
2588	return (0);
2589}
2590
2591
2592static void
2593igb_configure_queues(struct adapter *adapter)
2594{
2595	struct	e1000_hw	*hw = &adapter->hw;
2596	struct	igb_queue	*que;
2597	u32			tmp, ivar = 0, newitr = 0;
2598
2599	/* First turn on RSS capability */
2600	if (adapter->hw.mac.type != e1000_82575)
2601		E1000_WRITE_REG(hw, E1000_GPIE,
2602		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2603		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2604
2605	/* Turn on MSIX */
2606	switch (adapter->hw.mac.type) {
2607	case e1000_82580:
2608	case e1000_i350:
2609	case e1000_i354:
2610	case e1000_i210:
2611	case e1000_i211:
2612	case e1000_vfadapt:
2613	case e1000_vfadapt_i350:
2614		/* RX entries */
2615		for (int i = 0; i < adapter->num_queues; i++) {
2616			u32 index = i >> 1;
2617			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2618			que = &adapter->queues[i];
2619			if (i & 1) {
2620				ivar &= 0xFF00FFFF;
2621				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2622			} else {
2623				ivar &= 0xFFFFFF00;
2624				ivar |= que->msix | E1000_IVAR_VALID;
2625			}
2626			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2627		}
2628		/* TX entries */
2629		for (int i = 0; i < adapter->num_queues; i++) {
2630			u32 index = i >> 1;
2631			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2632			que = &adapter->queues[i];
2633			if (i & 1) {
2634				ivar &= 0x00FFFFFF;
2635				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2636			} else {
2637				ivar &= 0xFFFF00FF;
2638				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2639			}
2640			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2641			adapter->que_mask |= que->eims;
2642		}
2643
2644		/* And for the link interrupt */
2645		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2646		adapter->link_mask = 1 << adapter->linkvec;
2647		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2648		break;
2649	case e1000_82576:
2650		/* RX entries */
2651		for (int i = 0; i < adapter->num_queues; i++) {
2652			u32 index = i & 0x7; /* Each IVAR has two entries */
2653			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2654			que = &adapter->queues[i];
2655			if (i < 8) {
2656				ivar &= 0xFFFFFF00;
2657				ivar |= que->msix | E1000_IVAR_VALID;
2658			} else {
2659				ivar &= 0xFF00FFFF;
2660				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2661			}
2662			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2663			adapter->que_mask |= que->eims;
2664		}
2665		/* TX entries */
2666		for (int i = 0; i < adapter->num_queues; i++) {
2667			u32 index = i & 0x7; /* Each IVAR has two entries */
2668			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2669			que = &adapter->queues[i];
2670			if (i < 8) {
2671				ivar &= 0xFFFF00FF;
2672				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2673			} else {
2674				ivar &= 0x00FFFFFF;
2675				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2676			}
2677			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2678			adapter->que_mask |= que->eims;
2679		}
2680
2681		/* And for the link interrupt */
2682		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2683		adapter->link_mask = 1 << adapter->linkvec;
2684		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2685		break;
2686
2687	case e1000_82575:
2688                /* enable MSI-X support*/
2689		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2690                tmp |= E1000_CTRL_EXT_PBA_CLR;
2691                /* Auto-Mask interrupts upon ICR read. */
2692                tmp |= E1000_CTRL_EXT_EIAME;
2693                tmp |= E1000_CTRL_EXT_IRCA;
2694                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2695
2696		/* Queues */
2697		for (int i = 0; i < adapter->num_queues; i++) {
2698			que = &adapter->queues[i];
2699			tmp = E1000_EICR_RX_QUEUE0 << i;
2700			tmp |= E1000_EICR_TX_QUEUE0 << i;
2701			que->eims = tmp;
2702			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2703			    i, que->eims);
2704			adapter->que_mask |= que->eims;
2705		}
2706
2707		/* Link */
2708		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2709		    E1000_EIMS_OTHER);
2710		adapter->link_mask |= E1000_EIMS_OTHER;
2711	default:
2712		break;
2713	}
2714
2715	/* Set the starting interrupt rate */
2716	if (igb_max_interrupt_rate > 0)
2717		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2718
2719        if (hw->mac.type == e1000_82575)
2720                newitr |= newitr << 16;
2721        else
2722                newitr |= E1000_EITR_CNT_IGNR;
2723
2724	for (int i = 0; i < adapter->num_queues; i++) {
2725		que = &adapter->queues[i];
2726		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2727	}
2728
2729	return;
2730}
2731
2732
2733static void
2734igb_free_pci_resources(struct adapter *adapter)
2735{
2736	struct		igb_queue *que = adapter->queues;
2737	device_t	dev = adapter->dev;
2738	int		rid;
2739
2740	/*
2741	** There is a slight possibility of a failure mode
2742	** in attach that will result in entering this function
2743	** before interrupt resources have been initialized, and
2744	** in that case we do not want to execute the loops below
2745	** We can detect this reliably by the state of the adapter
2746	** res pointer.
2747	*/
2748	if (adapter->res == NULL)
2749		goto mem;
2750
2751	/*
2752	 * First release all the interrupt resources:
2753	 */
2754	for (int i = 0; i < adapter->num_queues; i++, que++) {
2755		rid = que->msix + 1;
2756		if (que->tag != NULL) {
2757			bus_teardown_intr(dev, que->res, que->tag);
2758			que->tag = NULL;
2759		}
2760		if (que->res != NULL)
2761			bus_release_resource(dev,
2762			    SYS_RES_IRQ, rid, que->res);
2763	}
2764
2765	/* Clean the Legacy or Link interrupt last */
2766	if (adapter->linkvec) /* we are doing MSIX */
2767		rid = adapter->linkvec + 1;
2768	else
2769		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2770
2771	que = adapter->queues;
2772	if (adapter->tag != NULL) {
2773		taskqueue_drain(que->tq, &adapter->link_task);
2774		bus_teardown_intr(dev, adapter->res, adapter->tag);
2775		adapter->tag = NULL;
2776	}
2777	if (adapter->res != NULL)
2778		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2779
2780	for (int i = 0; i < adapter->num_queues; i++, que++) {
2781		if (que->tq != NULL) {
2782#ifndef IGB_LEGACY_TX
2783			taskqueue_drain(que->tq, &que->txr->txq_task);
2784#endif
2785			taskqueue_drain(que->tq, &que->que_task);
2786			taskqueue_free(que->tq);
2787		}
2788	}
2789mem:
2790	if (adapter->msix)
2791		pci_release_msi(dev);
2792
2793	if (adapter->msix_mem != NULL)
2794		bus_release_resource(dev, SYS_RES_MEMORY,
2795		    adapter->memrid, adapter->msix_mem);
2796
2797	if (adapter->pci_mem != NULL)
2798		bus_release_resource(dev, SYS_RES_MEMORY,
2799		    PCIR_BAR(0), adapter->pci_mem);
2800
2801}
2802
2803/*
2804 * Setup Either MSI/X or MSI
2805 */
2806static int
2807igb_setup_msix(struct adapter *adapter)
2808{
2809	device_t	dev = adapter->dev;
2810	int		bar, want, queues, msgs, maxqueues;
2811
2812	/* tuneable override */
2813	if (igb_enable_msix == 0)
2814		goto msi;
2815
2816	/* First try MSI/X */
2817	msgs = pci_msix_count(dev);
2818	if (msgs == 0)
2819		goto msi;
2820	/*
2821	** Some new devices, as with ixgbe, now may
2822	** use a different BAR, so we need to keep
2823	** track of which is used.
2824	*/
2825	adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2826	bar = pci_read_config(dev, adapter->memrid, 4);
2827	if (bar == 0) /* use next bar */
2828		adapter->memrid += 4;
2829	adapter->msix_mem = bus_alloc_resource_any(dev,
2830	    SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2831       	if (adapter->msix_mem == NULL) {
2832		/* May not be enabled */
2833		device_printf(adapter->dev,
2834		    "Unable to map MSIX table \n");
2835		goto msi;
2836	}
2837
2838	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2839
2840	/* Override via tuneable */
2841	if (igb_num_queues != 0)
2842		queues = igb_num_queues;
2843
2844#ifdef	RSS
2845	/* If we're doing RSS, clamp at the number of RSS buckets */
2846	if (queues > rss_getnumbuckets())
2847		queues = rss_getnumbuckets();
2848#endif
2849
2850
2851	/* Sanity check based on HW */
2852	switch (adapter->hw.mac.type) {
2853		case e1000_82575:
2854			maxqueues = 4;
2855			break;
2856		case e1000_82576:
2857		case e1000_82580:
2858		case e1000_i350:
2859		case e1000_i354:
2860			maxqueues = 8;
2861			break;
2862		case e1000_i210:
2863			maxqueues = 4;
2864			break;
2865		case e1000_i211:
2866			maxqueues = 2;
2867			break;
2868		default:  /* VF interfaces */
2869			maxqueues = 1;
2870			break;
2871	}
2872
2873	/* Final clamp on the actual hardware capability */
2874	if (queues > maxqueues)
2875		queues = maxqueues;
2876
2877	/*
2878	** One vector (RX/TX pair) per queue
2879	** plus an additional for Link interrupt
2880	*/
2881	want = queues + 1;
2882	if (msgs >= want)
2883		msgs = want;
2884	else {
2885               	device_printf(adapter->dev,
2886		    "MSIX Configuration Problem, "
2887		    "%d vectors configured, but %d queues wanted!\n",
2888		    msgs, want);
2889		goto msi;
2890	}
2891	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2892               	device_printf(adapter->dev,
2893		    "Using MSIX interrupts with %d vectors\n", msgs);
2894		adapter->num_queues = queues;
2895		return (msgs);
2896	}
2897	/*
2898	** If MSIX alloc failed or provided us with
2899	** less than needed, free and fall through to MSI
2900	*/
2901	pci_release_msi(dev);
2902
2903msi:
2904       	if (adapter->msix_mem != NULL) {
2905		bus_release_resource(dev, SYS_RES_MEMORY,
2906		    adapter->memrid, adapter->msix_mem);
2907		adapter->msix_mem = NULL;
2908	}
2909       	msgs = 1;
2910	if (pci_alloc_msi(dev, &msgs) == 0) {
2911		device_printf(adapter->dev," Using an MSI interrupt\n");
2912		return (msgs);
2913	}
2914	device_printf(adapter->dev," Using a Legacy interrupt\n");
2915	return (0);
2916}
2917
2918/*********************************************************************
2919 *
2920 *  Initialize the DMA Coalescing feature
2921 *
2922 **********************************************************************/
2923static void
2924igb_init_dmac(struct adapter *adapter, u32 pba)
2925{
2926	device_t	dev = adapter->dev;
2927	struct e1000_hw *hw = &adapter->hw;
2928	u32 		dmac, reg = ~E1000_DMACR_DMAC_EN;
2929	u16		hwm;
2930
2931	if (hw->mac.type == e1000_i211)
2932		return;
2933
2934	if (hw->mac.type > e1000_82580) {
2935
2936		if (adapter->dmac == 0) { /* Disabling it */
2937			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2938			return;
2939		} else
2940			device_printf(dev, "DMA Coalescing enabled\n");
2941
2942		/* Set starting threshold */
2943		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2944
2945		hwm = 64 * pba - adapter->max_frame_size / 16;
2946		if (hwm < 64 * (pba - 6))
2947			hwm = 64 * (pba - 6);
2948		reg = E1000_READ_REG(hw, E1000_FCRTC);
2949		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2950		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2951		    & E1000_FCRTC_RTH_COAL_MASK);
2952		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2953
2954
2955		dmac = pba - adapter->max_frame_size / 512;
2956		if (dmac < pba - 10)
2957			dmac = pba - 10;
2958		reg = E1000_READ_REG(hw, E1000_DMACR);
2959		reg &= ~E1000_DMACR_DMACTHR_MASK;
2960		reg |= ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2961		    & E1000_DMACR_DMACTHR_MASK);
2962
2963		/* transition to L0x or L1 if available..*/
2964		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2965
2966		/* Check if status is 2.5Gb backplane connection
2967		* before configuration of watchdog timer, which is
2968		* in msec values in 12.8usec intervals
2969		* watchdog timer= msec values in 32usec intervals
2970		* for non 2.5Gb connection
2971		*/
2972		if (hw->mac.type == e1000_i354) {
2973			int status = E1000_READ_REG(hw, E1000_STATUS);
2974			if ((status & E1000_STATUS_2P5_SKU) &&
2975			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2976				reg |= ((adapter->dmac * 5) >> 6);
2977			else
2978				reg |= (adapter->dmac >> 5);
2979		} else {
2980			reg |= (adapter->dmac >> 5);
2981		}
2982
2983		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2984
2985		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2986
2987		/* Set the interval before transition */
2988		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2989		if (hw->mac.type == e1000_i350)
2990			reg |= IGB_DMCTLX_DCFLUSH_DIS;
2991		/*
2992		** in 2.5Gb connection, TTLX unit is 0.4 usec
2993		** which is 0x4*2 = 0xA. But delay is still 4 usec
2994		*/
2995		if (hw->mac.type == e1000_i354) {
2996			int status = E1000_READ_REG(hw, E1000_STATUS);
2997			if ((status & E1000_STATUS_2P5_SKU) &&
2998			    (!(status & E1000_STATUS_2P5_SKU_OVER)))
2999				reg |= 0xA;
3000			else
3001				reg |= 0x4;
3002		} else {
3003			reg |= 0x4;
3004		}
3005
3006		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3007
3008		/* free space in tx packet buffer to wake from DMA coal */
3009		E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
3010		    (2 * adapter->max_frame_size)) >> 6);
3011
3012		/* make low power state decision controlled by DMA coal */
3013		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3014		reg &= ~E1000_PCIEMISC_LX_DECISION;
3015		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3016
3017	} else if (hw->mac.type == e1000_82580) {
3018		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3019		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3020		    reg & ~E1000_PCIEMISC_LX_DECISION);
3021		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3022	}
3023}
3024
3025
3026/*********************************************************************
3027 *
3028 *  Set up an fresh starting state
3029 *
3030 **********************************************************************/
3031static void
3032igb_reset(struct adapter *adapter)
3033{
3034	device_t	dev = adapter->dev;
3035	struct e1000_hw *hw = &adapter->hw;
3036	struct e1000_fc_info *fc = &hw->fc;
3037	struct ifnet	*ifp = adapter->ifp;
3038	u32		pba = 0;
3039	u16		hwm;
3040
3041	INIT_DEBUGOUT("igb_reset: begin");
3042
3043	/* Let the firmware know the OS is in control */
3044	igb_get_hw_control(adapter);
3045
3046	/*
3047	 * Packet Buffer Allocation (PBA)
3048	 * Writing PBA sets the receive portion of the buffer
3049	 * the remainder is used for the transmit buffer.
3050	 */
3051	switch (hw->mac.type) {
3052	case e1000_82575:
3053		pba = E1000_PBA_32K;
3054		break;
3055	case e1000_82576:
3056	case e1000_vfadapt:
3057		pba = E1000_READ_REG(hw, E1000_RXPBS);
3058		pba &= E1000_RXPBS_SIZE_MASK_82576;
3059		break;
3060	case e1000_82580:
3061	case e1000_i350:
3062	case e1000_i354:
3063	case e1000_vfadapt_i350:
3064		pba = E1000_READ_REG(hw, E1000_RXPBS);
3065		pba = e1000_rxpbs_adjust_82580(pba);
3066		break;
3067	case e1000_i210:
3068	case e1000_i211:
3069		pba = E1000_PBA_34K;
3070	default:
3071		break;
3072	}
3073
3074	/* Special needs in case of Jumbo frames */
3075	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3076		u32 tx_space, min_tx, min_rx;
3077		pba = E1000_READ_REG(hw, E1000_PBA);
3078		tx_space = pba >> 16;
3079		pba &= 0xffff;
3080		min_tx = (adapter->max_frame_size +
3081		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3082		min_tx = roundup2(min_tx, 1024);
3083		min_tx >>= 10;
3084                min_rx = adapter->max_frame_size;
3085                min_rx = roundup2(min_rx, 1024);
3086                min_rx >>= 10;
3087		if (tx_space < min_tx &&
3088		    ((min_tx - tx_space) < pba)) {
3089			pba = pba - (min_tx - tx_space);
3090			/*
3091                         * if short on rx space, rx wins
3092                         * and must trump tx adjustment
3093			 */
3094                        if (pba < min_rx)
3095                                pba = min_rx;
3096		}
3097		E1000_WRITE_REG(hw, E1000_PBA, pba);
3098	}
3099
3100	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3101
3102	/*
3103	 * These parameters control the automatic generation (Tx) and
3104	 * response (Rx) to Ethernet PAUSE frames.
3105	 * - High water mark should allow for at least two frames to be
3106	 *   received after sending an XOFF.
3107	 * - Low water mark works best when it is very near the high water mark.
3108	 *   This allows the receiver to restart by sending XON when it has
3109	 *   drained a bit.
3110	 */
3111	hwm = min(((pba << 10) * 9 / 10),
3112	    ((pba << 10) - 2 * adapter->max_frame_size));
3113
3114	if (hw->mac.type < e1000_82576) {
3115		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3116		fc->low_water = fc->high_water - 8;
3117	} else {
3118		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3119		fc->low_water = fc->high_water - 16;
3120	}
3121
3122	fc->pause_time = IGB_FC_PAUSE_TIME;
3123	fc->send_xon = TRUE;
3124	if (adapter->fc)
3125		fc->requested_mode = adapter->fc;
3126	else
3127		fc->requested_mode = e1000_fc_default;
3128
3129	/* Issue a global reset */
3130	e1000_reset_hw(hw);
3131	E1000_WRITE_REG(hw, E1000_WUFC, 0);
3132
3133	/* Reset for AutoMediaDetect */
3134	if (adapter->flags & IGB_MEDIA_RESET) {
3135		e1000_setup_init_funcs(hw, TRUE);
3136		e1000_get_bus_info(hw);
3137		adapter->flags &= ~IGB_MEDIA_RESET;
3138	}
3139
3140	if (e1000_init_hw(hw) < 0)
3141		device_printf(dev, "Hardware Initialization Failed\n");
3142
3143	/* Setup DMA Coalescing */
3144	igb_init_dmac(adapter, pba);
3145
3146	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3147	e1000_get_phy_info(hw);
3148	e1000_check_for_link(hw);
3149	return;
3150}
3151
3152/*********************************************************************
3153 *
3154 *  Setup networking device structure and register an interface.
3155 *
3156 **********************************************************************/
3157static int
3158igb_setup_interface(device_t dev, struct adapter *adapter)
3159{
3160	struct ifnet   *ifp;
3161
3162	INIT_DEBUGOUT("igb_setup_interface: begin");
3163
3164	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3165	if (ifp == NULL) {
3166		device_printf(dev, "can not allocate ifnet structure\n");
3167		return (-1);
3168	}
3169	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3170	ifp->if_init =  igb_init;
3171	ifp->if_softc = adapter;
3172	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3173	ifp->if_ioctl = igb_ioctl;
3174	ifp->if_get_counter = igb_get_counter;
3175
3176	/* TSO parameters */
3177	ifp->if_hw_tsomax = IP_MAXPACKET;
3178	ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER;
3179	ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE;
3180
3181#ifndef IGB_LEGACY_TX
3182	ifp->if_transmit = igb_mq_start;
3183	ifp->if_qflush = igb_qflush;
3184#else
3185	ifp->if_start = igb_start;
3186	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3187	ifp->if_snd.ifq_drv_maxlen = 0;
3188	IFQ_SET_READY(&ifp->if_snd);
3189#endif
3190
3191	ether_ifattach(ifp, adapter->hw.mac.addr);
3192
3193	ifp->if_capabilities = ifp->if_capenable = 0;
3194
3195	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3196#if __FreeBSD_version >= 1000000
3197	ifp->if_capabilities |= IFCAP_HWCSUM_IPV6;
3198#endif
3199	ifp->if_capabilities |= IFCAP_TSO;
3200	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3201	ifp->if_capenable = ifp->if_capabilities;
3202
3203	/* Don't enable LRO by default */
3204	ifp->if_capabilities |= IFCAP_LRO;
3205
3206#ifdef DEVICE_POLLING
3207	ifp->if_capabilities |= IFCAP_POLLING;
3208#endif
3209
3210	/*
3211	 * Tell the upper layer(s) we
3212	 * support full VLAN capability.
3213	 */
3214	ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3215	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3216			     |  IFCAP_VLAN_HWTSO
3217			     |  IFCAP_VLAN_MTU;
3218	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3219			  |  IFCAP_VLAN_HWTSO
3220			  |  IFCAP_VLAN_MTU;
3221
3222	/*
3223	 * Enable only WOL MAGIC by default if WOL is enabled in EEPROM.
3224	 */
3225	ifp->if_capabilities |= IFCAP_WOL;
3226	if (adapter->wol)
3227		ifp->if_capenable |= IFCAP_WOL_MAGIC;
3228
3229	/*
3230	** Don't turn this on by default, if vlans are
3231	** created on another pseudo device (eg. lagg)
3232	** then vlan events are not passed thru, breaking
3233	** operation, but with HW FILTER off it works. If
3234	** using vlans directly on the igb driver you can
3235	** enable this and get full hardware tag filtering.
3236	*/
3237	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3238
3239	/*
3240	 * Specify the media types supported by this adapter and register
3241	 * callbacks to update media and link information
3242	 */
3243	ifmedia_init(&adapter->media, IFM_IMASK,
3244	    igb_media_change, igb_media_status);
3245	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3246	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3247		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3248			    0, NULL);
3249		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3250	} else {
3251		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3252		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3253			    0, NULL);
3254		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3255			    0, NULL);
3256		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3257			    0, NULL);
3258		if (adapter->hw.phy.type != e1000_phy_ife) {
3259			ifmedia_add(&adapter->media,
3260				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3261			ifmedia_add(&adapter->media,
3262				IFM_ETHER | IFM_1000_T, 0, NULL);
3263		}
3264	}
3265	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3266	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3267	return (0);
3268}
3269
3270
3271/*
3272 * Manage DMA'able memory.
3273 */
3274static void
3275igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3276{
3277	if (error)
3278		return;
3279	*(bus_addr_t *) arg = segs[0].ds_addr;
3280}
3281
3282static int
3283igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3284        struct igb_dma_alloc *dma, int mapflags)
3285{
3286	int error;
3287
3288	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3289				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3290				BUS_SPACE_MAXADDR,	/* lowaddr */
3291				BUS_SPACE_MAXADDR,	/* highaddr */
3292				NULL, NULL,		/* filter, filterarg */
3293				size,			/* maxsize */
3294				1,			/* nsegments */
3295				size,			/* maxsegsize */
3296				0,			/* flags */
3297				NULL,			/* lockfunc */
3298				NULL,			/* lockarg */
3299				&dma->dma_tag);
3300	if (error) {
3301		device_printf(adapter->dev,
3302		    "%s: bus_dma_tag_create failed: %d\n",
3303		    __func__, error);
3304		goto fail_0;
3305	}
3306
3307	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3308	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3309	if (error) {
3310		device_printf(adapter->dev,
3311		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3312		    __func__, (uintmax_t)size, error);
3313		goto fail_2;
3314	}
3315
3316	dma->dma_paddr = 0;
3317	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3318	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3319	if (error || dma->dma_paddr == 0) {
3320		device_printf(adapter->dev,
3321		    "%s: bus_dmamap_load failed: %d\n",
3322		    __func__, error);
3323		goto fail_3;
3324	}
3325
3326	return (0);
3327
3328fail_3:
3329	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3330fail_2:
3331	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3332	bus_dma_tag_destroy(dma->dma_tag);
3333fail_0:
3334	dma->dma_tag = NULL;
3335
3336	return (error);
3337}
3338
3339static void
3340igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3341{
3342	if (dma->dma_tag == NULL)
3343		return;
3344	if (dma->dma_paddr != 0) {
3345		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3346		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3347		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3348		dma->dma_paddr = 0;
3349	}
3350	if (dma->dma_vaddr != NULL) {
3351		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3352		dma->dma_vaddr = NULL;
3353	}
3354	bus_dma_tag_destroy(dma->dma_tag);
3355	dma->dma_tag = NULL;
3356}
3357
3358
3359/*********************************************************************
3360 *
3361 *  Allocate memory for the transmit and receive rings, and then
3362 *  the descriptors associated with each, called only once at attach.
3363 *
3364 **********************************************************************/
3365static int
3366igb_allocate_queues(struct adapter *adapter)
3367{
3368	device_t dev = adapter->dev;
3369	struct igb_queue	*que = NULL;
3370	struct tx_ring		*txr = NULL;
3371	struct rx_ring		*rxr = NULL;
3372	int rsize, tsize, error = E1000_SUCCESS;
3373	int txconf = 0, rxconf = 0;
3374
3375	/* First allocate the top level queue structs */
3376	if (!(adapter->queues =
3377	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3378	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3379		device_printf(dev, "Unable to allocate queue memory\n");
3380		error = ENOMEM;
3381		goto fail;
3382	}
3383
3384	/* Next allocate the TX ring struct memory */
3385	if (!(adapter->tx_rings =
3386	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3387	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3388		device_printf(dev, "Unable to allocate TX ring memory\n");
3389		error = ENOMEM;
3390		goto tx_fail;
3391	}
3392
3393	/* Now allocate the RX */
3394	if (!(adapter->rx_rings =
3395	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3396	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3397		device_printf(dev, "Unable to allocate RX ring memory\n");
3398		error = ENOMEM;
3399		goto rx_fail;
3400	}
3401
3402	tsize = roundup2(adapter->num_tx_desc *
3403	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3404	/*
3405	 * Now set up the TX queues, txconf is needed to handle the
3406	 * possibility that things fail midcourse and we need to
3407	 * undo memory gracefully
3408	 */
3409	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3410		/* Set up some basics */
3411		txr = &adapter->tx_rings[i];
3412		txr->adapter = adapter;
3413		txr->me = i;
3414		txr->num_desc = adapter->num_tx_desc;
3415
3416		/* Initialize the TX lock */
3417		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3418		    device_get_nameunit(dev), txr->me);
3419		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3420
3421		if (igb_dma_malloc(adapter, tsize,
3422			&txr->txdma, BUS_DMA_NOWAIT)) {
3423			device_printf(dev,
3424			    "Unable to allocate TX Descriptor memory\n");
3425			error = ENOMEM;
3426			goto err_tx_desc;
3427		}
3428		txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3429		bzero((void *)txr->tx_base, tsize);
3430
3431        	/* Now allocate transmit buffers for the ring */
3432        	if (igb_allocate_transmit_buffers(txr)) {
3433			device_printf(dev,
3434			    "Critical Failure setting up transmit buffers\n");
3435			error = ENOMEM;
3436			goto err_tx_desc;
3437        	}
3438#ifndef IGB_LEGACY_TX
3439		/* Allocate a buf ring */
3440		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3441		    M_WAITOK, &txr->tx_mtx);
3442#endif
3443	}
3444
3445	/*
3446	 * Next the RX queues...
3447	 */
3448	rsize = roundup2(adapter->num_rx_desc *
3449	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3450	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3451		rxr = &adapter->rx_rings[i];
3452		rxr->adapter = adapter;
3453		rxr->me = i;
3454
3455		/* Initialize the RX lock */
3456		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3457		    device_get_nameunit(dev), txr->me);
3458		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3459
3460		if (igb_dma_malloc(adapter, rsize,
3461			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3462			device_printf(dev,
3463			    "Unable to allocate RxDescriptor memory\n");
3464			error = ENOMEM;
3465			goto err_rx_desc;
3466		}
3467		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3468		bzero((void *)rxr->rx_base, rsize);
3469
3470        	/* Allocate receive buffers for the ring*/
3471		if (igb_allocate_receive_buffers(rxr)) {
3472			device_printf(dev,
3473			    "Critical Failure setting up receive buffers\n");
3474			error = ENOMEM;
3475			goto err_rx_desc;
3476		}
3477	}
3478
3479	/*
3480	** Finally set up the queue holding structs
3481	*/
3482	for (int i = 0; i < adapter->num_queues; i++) {
3483		que = &adapter->queues[i];
3484		que->adapter = adapter;
3485		que->txr = &adapter->tx_rings[i];
3486		que->rxr = &adapter->rx_rings[i];
3487	}
3488
3489	return (0);
3490
3491err_rx_desc:
3492	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3493		igb_dma_free(adapter, &rxr->rxdma);
3494err_tx_desc:
3495	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3496		igb_dma_free(adapter, &txr->txdma);
3497	free(adapter->rx_rings, M_DEVBUF);
3498rx_fail:
3499#ifndef IGB_LEGACY_TX
3500	buf_ring_free(txr->br, M_DEVBUF);
3501#endif
3502	free(adapter->tx_rings, M_DEVBUF);
3503tx_fail:
3504	free(adapter->queues, M_DEVBUF);
3505fail:
3506	return (error);
3507}
3508
3509/*********************************************************************
3510 *
3511 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3512 *  the information needed to transmit a packet on the wire. This is
3513 *  called only once at attach, setup is done every reset.
3514 *
3515 **********************************************************************/
3516static int
3517igb_allocate_transmit_buffers(struct tx_ring *txr)
3518{
3519	struct adapter *adapter = txr->adapter;
3520	device_t dev = adapter->dev;
3521	struct igb_tx_buf *txbuf;
3522	int error, i;
3523
3524	/*
3525	 * Setup DMA descriptor areas.
3526	 */
3527	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3528			       1, 0,			/* alignment, bounds */
3529			       BUS_SPACE_MAXADDR,	/* lowaddr */
3530			       BUS_SPACE_MAXADDR,	/* highaddr */
3531			       NULL, NULL,		/* filter, filterarg */
3532			       IGB_TSO_SIZE,		/* maxsize */
3533			       IGB_MAX_SCATTER,		/* nsegments */
3534			       PAGE_SIZE,		/* maxsegsize */
3535			       0,			/* flags */
3536			       NULL,			/* lockfunc */
3537			       NULL,			/* lockfuncarg */
3538			       &txr->txtag))) {
3539		device_printf(dev,"Unable to allocate TX DMA tag\n");
3540		goto fail;
3541	}
3542
3543	if (!(txr->tx_buffers =
3544	    (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3545	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3546		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3547		error = ENOMEM;
3548		goto fail;
3549	}
3550
3551        /* Create the descriptor buffer dma maps */
3552	txbuf = txr->tx_buffers;
3553	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3554		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3555		if (error != 0) {
3556			device_printf(dev, "Unable to create TX DMA map\n");
3557			goto fail;
3558		}
3559	}
3560
3561	return 0;
3562fail:
3563	/* We free all, it handles case where we are in the middle */
3564	igb_free_transmit_structures(adapter);
3565	return (error);
3566}
3567
3568/*********************************************************************
3569 *
3570 *  Initialize a transmit ring.
3571 *
3572 **********************************************************************/
3573static void
3574igb_setup_transmit_ring(struct tx_ring *txr)
3575{
3576	struct adapter *adapter = txr->adapter;
3577	struct igb_tx_buf *txbuf;
3578	int i;
3579#ifdef DEV_NETMAP
3580	struct netmap_adapter *na = NA(adapter->ifp);
3581	struct netmap_slot *slot;
3582#endif /* DEV_NETMAP */
3583
3584	/* Clear the old descriptor contents */
3585	IGB_TX_LOCK(txr);
3586#ifdef DEV_NETMAP
3587	slot = netmap_reset(na, NR_TX, txr->me, 0);
3588#endif /* DEV_NETMAP */
3589	bzero((void *)txr->tx_base,
3590	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3591	/* Reset indices */
3592	txr->next_avail_desc = 0;
3593	txr->next_to_clean = 0;
3594
3595	/* Free any existing tx buffers. */
3596        txbuf = txr->tx_buffers;
3597	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3598		if (txbuf->m_head != NULL) {
3599			bus_dmamap_sync(txr->txtag, txbuf->map,
3600			    BUS_DMASYNC_POSTWRITE);
3601			bus_dmamap_unload(txr->txtag, txbuf->map);
3602			m_freem(txbuf->m_head);
3603			txbuf->m_head = NULL;
3604		}
3605#ifdef DEV_NETMAP
3606		if (slot) {
3607			int si = netmap_idx_n2k(na->tx_rings[txr->me], i);
3608			/* no need to set the address */
3609			netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3610		}
3611#endif /* DEV_NETMAP */
3612		/* clear the watch index */
3613		txbuf->eop = NULL;
3614        }
3615
3616	/* Set number of descriptors available */
3617	txr->tx_avail = adapter->num_tx_desc;
3618
3619	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3620	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3621	IGB_TX_UNLOCK(txr);
3622}
3623
3624/*********************************************************************
3625 *
3626 *  Initialize all transmit rings.
3627 *
3628 **********************************************************************/
3629static void
3630igb_setup_transmit_structures(struct adapter *adapter)
3631{
3632	struct tx_ring *txr = adapter->tx_rings;
3633
3634	for (int i = 0; i < adapter->num_queues; i++, txr++)
3635		igb_setup_transmit_ring(txr);
3636
3637	return;
3638}
3639
3640/*********************************************************************
3641 *
3642 *  Enable transmit unit.
3643 *
3644 **********************************************************************/
3645static void
3646igb_initialize_transmit_units(struct adapter *adapter)
3647{
3648	struct tx_ring	*txr = adapter->tx_rings;
3649	struct e1000_hw *hw = &adapter->hw;
3650	u32		tctl, txdctl;
3651
3652	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3653	tctl = txdctl = 0;
3654
3655	/* Setup the Tx Descriptor Rings */
3656	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3657		u64 bus_addr = txr->txdma.dma_paddr;
3658
3659		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3660		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3661		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3662		    (uint32_t)(bus_addr >> 32));
3663		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3664		    (uint32_t)bus_addr);
3665
3666		/* Setup the HW Tx Head and Tail descriptor pointers */
3667		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3668		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3669
3670		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3671		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3672		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3673
3674		txr->queue_status = IGB_QUEUE_IDLE;
3675
3676		txdctl |= IGB_TX_PTHRESH;
3677		txdctl |= IGB_TX_HTHRESH << 8;
3678		txdctl |= IGB_TX_WTHRESH << 16;
3679		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3680		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3681	}
3682
3683	if (adapter->vf_ifp)
3684		return;
3685
3686	e1000_config_collision_dist(hw);
3687
3688	/* Program the Transmit Control Register */
3689	tctl = E1000_READ_REG(hw, E1000_TCTL);
3690	tctl &= ~E1000_TCTL_CT;
3691	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3692		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3693
3694	/* This write will effectively turn on the transmit unit. */
3695	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3696}
3697
3698/*********************************************************************
3699 *
3700 *  Free all transmit rings.
3701 *
3702 **********************************************************************/
3703static void
3704igb_free_transmit_structures(struct adapter *adapter)
3705{
3706	struct tx_ring *txr = adapter->tx_rings;
3707
3708	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3709		IGB_TX_LOCK(txr);
3710		igb_free_transmit_buffers(txr);
3711		igb_dma_free(adapter, &txr->txdma);
3712		IGB_TX_UNLOCK(txr);
3713		IGB_TX_LOCK_DESTROY(txr);
3714	}
3715	free(adapter->tx_rings, M_DEVBUF);
3716}
3717
3718/*********************************************************************
3719 *
3720 *  Free transmit ring related data structures.
3721 *
3722 **********************************************************************/
3723static void
3724igb_free_transmit_buffers(struct tx_ring *txr)
3725{
3726	struct adapter *adapter = txr->adapter;
3727	struct igb_tx_buf *tx_buffer;
3728	int             i;
3729
3730	INIT_DEBUGOUT("free_transmit_ring: begin");
3731
3732	if (txr->tx_buffers == NULL)
3733		return;
3734
3735	tx_buffer = txr->tx_buffers;
3736	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3737		if (tx_buffer->m_head != NULL) {
3738			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3739			    BUS_DMASYNC_POSTWRITE);
3740			bus_dmamap_unload(txr->txtag,
3741			    tx_buffer->map);
3742			m_freem(tx_buffer->m_head);
3743			tx_buffer->m_head = NULL;
3744			if (tx_buffer->map != NULL) {
3745				bus_dmamap_destroy(txr->txtag,
3746				    tx_buffer->map);
3747				tx_buffer->map = NULL;
3748			}
3749		} else if (tx_buffer->map != NULL) {
3750			bus_dmamap_unload(txr->txtag,
3751			    tx_buffer->map);
3752			bus_dmamap_destroy(txr->txtag,
3753			    tx_buffer->map);
3754			tx_buffer->map = NULL;
3755		}
3756	}
3757#ifndef IGB_LEGACY_TX
3758	if (txr->br != NULL)
3759		buf_ring_free(txr->br, M_DEVBUF);
3760#endif
3761	if (txr->tx_buffers != NULL) {
3762		free(txr->tx_buffers, M_DEVBUF);
3763		txr->tx_buffers = NULL;
3764	}
3765	if (txr->txtag != NULL) {
3766		bus_dma_tag_destroy(txr->txtag);
3767		txr->txtag = NULL;
3768	}
3769	return;
3770}
3771
3772/**********************************************************************
3773 *
3774 *  Setup work for hardware segmentation offload (TSO) on
3775 *  adapters using advanced tx descriptors
3776 *
3777 **********************************************************************/
3778static int
3779igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3780    u32 *cmd_type_len, u32 *olinfo_status)
3781{
3782	struct adapter *adapter = txr->adapter;
3783	struct e1000_adv_tx_context_desc *TXD;
3784	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3785	u32 mss_l4len_idx = 0, paylen;
3786	u16 vtag = 0, eh_type;
3787	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3788	struct ether_vlan_header *eh;
3789#ifdef INET6
3790	struct ip6_hdr *ip6;
3791#endif
3792#ifdef INET
3793	struct ip *ip;
3794#endif
3795	struct tcphdr *th;
3796
3797
3798	/*
3799	 * Determine where frame payload starts.
3800	 * Jump over vlan headers if already present
3801	 */
3802	eh = mtod(mp, struct ether_vlan_header *);
3803	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3804		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3805		eh_type = eh->evl_proto;
3806	} else {
3807		ehdrlen = ETHER_HDR_LEN;
3808		eh_type = eh->evl_encap_proto;
3809	}
3810
3811	switch (ntohs(eh_type)) {
3812#ifdef INET6
3813	case ETHERTYPE_IPV6:
3814		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3815		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
3816		if (ip6->ip6_nxt != IPPROTO_TCP)
3817			return (ENXIO);
3818		ip_hlen = sizeof(struct ip6_hdr);
3819		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3820		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3821		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3822		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3823		break;
3824#endif
3825#ifdef INET
3826	case ETHERTYPE_IP:
3827		ip = (struct ip *)(mp->m_data + ehdrlen);
3828		if (ip->ip_p != IPPROTO_TCP)
3829			return (ENXIO);
3830		ip->ip_sum = 0;
3831		ip_hlen = ip->ip_hl << 2;
3832		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3833		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3834		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3835		type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3836		/* Tell transmit desc to also do IPv4 checksum. */
3837		*olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3838		break;
3839#endif
3840	default:
3841		device_printf(adapter->dev,
3842		    "CSUM_TSO but no supported IP version (0x%04x)",
3843		    ntohs(eh_type));
3844		return (ENXIO);
3845	}
3846
3847	ctxd = txr->next_avail_desc;
3848	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3849
3850	tcp_hlen = th->th_off << 2;
3851
3852	/* This is used in the transmit desc in encap */
3853	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3854
3855	/* VLAN MACLEN IPLEN */
3856	if (mp->m_flags & M_VLANTAG) {
3857		vtag = htole16(mp->m_pkthdr.ether_vtag);
3858                vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3859	}
3860
3861	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3862	vlan_macip_lens |= ip_hlen;
3863	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3864
3865	/* ADV DTYPE TUCMD */
3866	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3867	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3868	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3869
3870	/* MSS L4LEN IDX */
3871	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3872	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3873	/* 82575 needs the queue index added */
3874	if (adapter->hw.mac.type == e1000_82575)
3875		mss_l4len_idx |= txr->me << 4;
3876	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3877
3878	TXD->seqnum_seed = htole32(0);
3879
3880	if (++ctxd == txr->num_desc)
3881		ctxd = 0;
3882
3883	txr->tx_avail--;
3884	txr->next_avail_desc = ctxd;
3885	*cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3886	*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3887	*olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3888	++txr->tso_tx;
3889	return (0);
3890}
3891
3892/*********************************************************************
3893 *
3894 *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3895 *
3896 **********************************************************************/
3897
3898static int
3899igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3900    u32 *cmd_type_len, u32 *olinfo_status)
3901{
3902	struct e1000_adv_tx_context_desc *TXD;
3903	struct adapter *adapter = txr->adapter;
3904	struct ether_vlan_header *eh;
3905	struct ip *ip;
3906	struct ip6_hdr *ip6;
3907	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3908	int	ehdrlen, ip_hlen = 0;
3909	u16	etype;
3910	u8	ipproto = 0;
3911	int	ctxd = txr->next_avail_desc;
3912	u16	vtag = 0;
3913
3914	/* First check if TSO is to be used */
3915	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3916		return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3917
3918	/* Indicate the whole packet as payload when not doing TSO */
3919       	*olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3920
3921	/* Now ready a context descriptor */
3922	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3923
3924	/*
3925	** In advanced descriptors the vlan tag must
3926	** be placed into the context descriptor. Hence
3927	** we need to make one even if not doing offloads.
3928	*/
3929	if (mp->m_flags & M_VLANTAG) {
3930		vtag = htole16(mp->m_pkthdr.ether_vtag);
3931		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3932	} else if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) {
3933		return (0);
3934	}
3935
3936	/*
3937	 * Determine where frame payload starts.
3938	 * Jump over vlan headers if already present,
3939	 * helpful for QinQ too.
3940	 */
3941	eh = mtod(mp, struct ether_vlan_header *);
3942	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3943		etype = ntohs(eh->evl_proto);
3944		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3945	} else {
3946		etype = ntohs(eh->evl_encap_proto);
3947		ehdrlen = ETHER_HDR_LEN;
3948	}
3949
3950	/* Set the ether header length */
3951	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3952
3953	switch (etype) {
3954		case ETHERTYPE_IP:
3955			ip = (struct ip *)(mp->m_data + ehdrlen);
3956			ip_hlen = ip->ip_hl << 2;
3957			ipproto = ip->ip_p;
3958			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3959			break;
3960		case ETHERTYPE_IPV6:
3961			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3962			ip_hlen = sizeof(struct ip6_hdr);
3963			/* XXX-BZ this will go badly in case of ext hdrs. */
3964			ipproto = ip6->ip6_nxt;
3965			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3966			break;
3967		default:
3968			break;
3969	}
3970
3971	vlan_macip_lens |= ip_hlen;
3972	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3973
3974	switch (ipproto) {
3975		case IPPROTO_TCP:
3976#if __FreeBSD_version >= 1000000
3977			if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) {
3978#else
3979			if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3980#endif
3981				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3982				*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3983			}
3984			break;
3985		case IPPROTO_UDP:
3986#if __FreeBSD_version >= 1000000
3987			if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) {
3988#else
3989			if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3990#endif
3991				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3992				*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3993			}
3994			break;
3995
3996#if __FreeBSD_version >= 800000
3997		case IPPROTO_SCTP:
3998#if __FreeBSD_version >= 1000000
3999			if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) {
4000#else
4001			if (mp->m_pkthdr.csum_flags & CSUM_SCTP) {
4002#endif
4003				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4004				*olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4005			}
4006			break;
4007#endif
4008		default:
4009			break;
4010	}
4011
4012	/* 82575 needs the queue index added */
4013	if (adapter->hw.mac.type == e1000_82575)
4014		mss_l4len_idx = txr->me << 4;
4015
4016	/* Now copy bits into descriptor */
4017	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
4018	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
4019	TXD->seqnum_seed = htole32(0);
4020	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
4021
4022	/* We've consumed the first desc, adjust counters */
4023	if (++ctxd == txr->num_desc)
4024		ctxd = 0;
4025	txr->next_avail_desc = ctxd;
4026	--txr->tx_avail;
4027
4028        return (0);
4029}
4030
4031/**********************************************************************
4032 *
4033 *  Examine each tx_buffer in the used queue. If the hardware is done
4034 *  processing the packet then free associated resources. The
4035 *  tx_buffer is put back on the free queue.
4036 *
4037 *  TRUE return means there's work in the ring to clean, FALSE its empty.
4038 **********************************************************************/
4039static bool
4040igb_txeof(struct tx_ring *txr)
4041{
4042	struct adapter		*adapter = txr->adapter;
4043#ifdef DEV_NETMAP
4044	struct ifnet		*ifp = adapter->ifp;
4045#endif /* DEV_NETMAP */
4046	u32			work, processed = 0;
4047	int			limit = adapter->tx_process_limit;
4048	struct igb_tx_buf	*buf;
4049	union e1000_adv_tx_desc *txd;
4050
4051	mtx_assert(&txr->tx_mtx, MA_OWNED);
4052
4053#ifdef DEV_NETMAP
4054	if (netmap_tx_irq(ifp, txr->me))
4055		return (FALSE);
4056#endif /* DEV_NETMAP */
4057
4058	if (txr->tx_avail == txr->num_desc) {
4059		txr->queue_status = IGB_QUEUE_IDLE;
4060		return FALSE;
4061	}
4062
4063	/* Get work starting point */
4064	work = txr->next_to_clean;
4065	buf = &txr->tx_buffers[work];
4066	txd = &txr->tx_base[work];
4067	work -= txr->num_desc; /* The distance to ring end */
4068        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4069            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4070	do {
4071		union e1000_adv_tx_desc *eop = buf->eop;
4072		if (eop == NULL) /* No work */
4073			break;
4074
4075		if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4076			break;	/* I/O not complete */
4077
4078		if (buf->m_head) {
4079			txr->bytes +=
4080			    buf->m_head->m_pkthdr.len;
4081			bus_dmamap_sync(txr->txtag,
4082			    buf->map,
4083			    BUS_DMASYNC_POSTWRITE);
4084			bus_dmamap_unload(txr->txtag,
4085			    buf->map);
4086			m_freem(buf->m_head);
4087			buf->m_head = NULL;
4088		}
4089		buf->eop = NULL;
4090		++txr->tx_avail;
4091
4092		/* We clean the range if multi segment */
4093		while (txd != eop) {
4094			++txd;
4095			++buf;
4096			++work;
4097			/* wrap the ring? */
4098			if (__predict_false(!work)) {
4099				work -= txr->num_desc;
4100				buf = txr->tx_buffers;
4101				txd = txr->tx_base;
4102			}
4103			if (buf->m_head) {
4104				txr->bytes +=
4105				    buf->m_head->m_pkthdr.len;
4106				bus_dmamap_sync(txr->txtag,
4107				    buf->map,
4108				    BUS_DMASYNC_POSTWRITE);
4109				bus_dmamap_unload(txr->txtag,
4110				    buf->map);
4111				m_freem(buf->m_head);
4112				buf->m_head = NULL;
4113			}
4114			++txr->tx_avail;
4115			buf->eop = NULL;
4116
4117		}
4118		++txr->packets;
4119		++processed;
4120		txr->watchdog_time = ticks;
4121
4122		/* Try the next packet */
4123		++txd;
4124		++buf;
4125		++work;
4126		/* reset with a wrap */
4127		if (__predict_false(!work)) {
4128			work -= txr->num_desc;
4129			buf = txr->tx_buffers;
4130			txd = txr->tx_base;
4131		}
4132		prefetch(txd);
4133	} while (__predict_true(--limit));
4134
4135	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4136	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4137
4138	work += txr->num_desc;
4139	txr->next_to_clean = work;
4140
4141	/*
4142	** Watchdog calculation, we know there's
4143	** work outstanding or the first return
4144	** would have been taken, so none processed
4145	** for too long indicates a hang.
4146	*/
4147	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4148		txr->queue_status |= IGB_QUEUE_HUNG;
4149
4150	if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4151		txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4152
4153	if (txr->tx_avail == txr->num_desc) {
4154		txr->queue_status = IGB_QUEUE_IDLE;
4155		return (FALSE);
4156	}
4157
4158	return (TRUE);
4159}
4160
4161/*********************************************************************
4162 *
4163 *  Refresh mbuf buffers for RX descriptor rings
4164 *   - now keeps its own state so discards due to resource
4165 *     exhaustion are unnecessary, if an mbuf cannot be obtained
4166 *     it just returns, keeping its placeholder, thus it can simply
4167 *     be recalled to try again.
4168 *
4169 **********************************************************************/
4170static void
4171igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4172{
4173	struct adapter		*adapter = rxr->adapter;
4174	bus_dma_segment_t	hseg[1];
4175	bus_dma_segment_t	pseg[1];
4176	struct igb_rx_buf	*rxbuf;
4177	struct mbuf		*mh, *mp;
4178	int			i, j, nsegs, error;
4179	bool			refreshed = FALSE;
4180
4181	i = j = rxr->next_to_refresh;
4182	/*
4183	** Get one descriptor beyond
4184	** our work mark to control
4185	** the loop.
4186        */
4187	if (++j == adapter->num_rx_desc)
4188		j = 0;
4189
4190	while (j != limit) {
4191		rxbuf = &rxr->rx_buffers[i];
4192		/* No hdr mbuf used with header split off */
4193		if (rxr->hdr_split == FALSE)
4194			goto no_split;
4195		if (rxbuf->m_head == NULL) {
4196			mh = m_gethdr(M_NOWAIT, MT_DATA);
4197			if (mh == NULL)
4198				goto update;
4199		} else
4200			mh = rxbuf->m_head;
4201
4202		mh->m_pkthdr.len = mh->m_len = MHLEN;
4203		mh->m_len = MHLEN;
4204		mh->m_flags |= M_PKTHDR;
4205		/* Get the memory mapping */
4206		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4207		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4208		if (error != 0) {
4209			printf("Refresh mbufs: hdr dmamap load"
4210			    " failure - %d\n", error);
4211			m_free(mh);
4212			rxbuf->m_head = NULL;
4213			goto update;
4214		}
4215		rxbuf->m_head = mh;
4216		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4217		    BUS_DMASYNC_PREREAD);
4218		rxr->rx_base[i].read.hdr_addr =
4219		    htole64(hseg[0].ds_addr);
4220no_split:
4221		if (rxbuf->m_pack == NULL) {
4222			mp = m_getjcl(M_NOWAIT, MT_DATA,
4223			    M_PKTHDR, adapter->rx_mbuf_sz);
4224			if (mp == NULL)
4225				goto update;
4226		} else
4227			mp = rxbuf->m_pack;
4228
4229		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4230		/* Get the memory mapping */
4231		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4232		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4233		if (error != 0) {
4234			printf("Refresh mbufs: payload dmamap load"
4235			    " failure - %d\n", error);
4236			m_free(mp);
4237			rxbuf->m_pack = NULL;
4238			goto update;
4239		}
4240		rxbuf->m_pack = mp;
4241		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4242		    BUS_DMASYNC_PREREAD);
4243		rxr->rx_base[i].read.pkt_addr =
4244		    htole64(pseg[0].ds_addr);
4245		refreshed = TRUE; /* I feel wefreshed :) */
4246
4247		i = j; /* our next is precalculated */
4248		rxr->next_to_refresh = i;
4249		if (++j == adapter->num_rx_desc)
4250			j = 0;
4251	}
4252update:
4253	if (refreshed) /* update tail */
4254		E1000_WRITE_REG(&adapter->hw,
4255		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4256	return;
4257}
4258
4259
4260/*********************************************************************
4261 *
4262 *  Allocate memory for rx_buffer structures. Since we use one
4263 *  rx_buffer per received packet, the maximum number of rx_buffer's
4264 *  that we'll need is equal to the number of receive descriptors
4265 *  that we've allocated.
4266 *
4267 **********************************************************************/
4268static int
4269igb_allocate_receive_buffers(struct rx_ring *rxr)
4270{
4271	struct	adapter 	*adapter = rxr->adapter;
4272	device_t 		dev = adapter->dev;
4273	struct igb_rx_buf	*rxbuf;
4274	int             	i, bsize, error;
4275
4276	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4277	if (!(rxr->rx_buffers =
4278	    (struct igb_rx_buf *) malloc(bsize,
4279	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4280		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4281		error = ENOMEM;
4282		goto fail;
4283	}
4284
4285	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4286				   1, 0,		/* alignment, bounds */
4287				   BUS_SPACE_MAXADDR,	/* lowaddr */
4288				   BUS_SPACE_MAXADDR,	/* highaddr */
4289				   NULL, NULL,		/* filter, filterarg */
4290				   MSIZE,		/* maxsize */
4291				   1,			/* nsegments */
4292				   MSIZE,		/* maxsegsize */
4293				   0,			/* flags */
4294				   NULL,		/* lockfunc */
4295				   NULL,		/* lockfuncarg */
4296				   &rxr->htag))) {
4297		device_printf(dev, "Unable to create RX DMA tag\n");
4298		goto fail;
4299	}
4300
4301	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4302				   1, 0,		/* alignment, bounds */
4303				   BUS_SPACE_MAXADDR,	/* lowaddr */
4304				   BUS_SPACE_MAXADDR,	/* highaddr */
4305				   NULL, NULL,		/* filter, filterarg */
4306				   MJUM9BYTES,		/* maxsize */
4307				   1,			/* nsegments */
4308				   MJUM9BYTES,		/* maxsegsize */
4309				   0,			/* flags */
4310				   NULL,		/* lockfunc */
4311				   NULL,		/* lockfuncarg */
4312				   &rxr->ptag))) {
4313		device_printf(dev, "Unable to create RX payload DMA tag\n");
4314		goto fail;
4315	}
4316
4317	for (i = 0; i < adapter->num_rx_desc; i++) {
4318		rxbuf = &rxr->rx_buffers[i];
4319		error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4320		if (error) {
4321			device_printf(dev,
4322			    "Unable to create RX head DMA maps\n");
4323			goto fail;
4324		}
4325		error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4326		if (error) {
4327			device_printf(dev,
4328			    "Unable to create RX packet DMA maps\n");
4329			goto fail;
4330		}
4331	}
4332
4333	return (0);
4334
4335fail:
4336	/* Frees all, but can handle partial completion */
4337	igb_free_receive_structures(adapter);
4338	return (error);
4339}
4340
4341
4342static void
4343igb_free_receive_ring(struct rx_ring *rxr)
4344{
4345	struct	adapter		*adapter = rxr->adapter;
4346	struct igb_rx_buf	*rxbuf;
4347
4348
4349	for (int i = 0; i < adapter->num_rx_desc; i++) {
4350		rxbuf = &rxr->rx_buffers[i];
4351		if (rxbuf->m_head != NULL) {
4352			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4353			    BUS_DMASYNC_POSTREAD);
4354			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4355			rxbuf->m_head->m_flags |= M_PKTHDR;
4356			m_freem(rxbuf->m_head);
4357		}
4358		if (rxbuf->m_pack != NULL) {
4359			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4360			    BUS_DMASYNC_POSTREAD);
4361			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4362			rxbuf->m_pack->m_flags |= M_PKTHDR;
4363			m_freem(rxbuf->m_pack);
4364		}
4365		rxbuf->m_head = NULL;
4366		rxbuf->m_pack = NULL;
4367	}
4368}
4369
4370
4371/*********************************************************************
4372 *
4373 *  Initialize a receive ring and its buffers.
4374 *
4375 **********************************************************************/
4376static int
4377igb_setup_receive_ring(struct rx_ring *rxr)
4378{
4379	struct	adapter		*adapter;
4380	struct  ifnet		*ifp;
4381	device_t		dev;
4382	struct igb_rx_buf	*rxbuf;
4383	bus_dma_segment_t	pseg[1], hseg[1];
4384	struct lro_ctrl		*lro = &rxr->lro;
4385	int			rsize, nsegs, error = 0;
4386#ifdef DEV_NETMAP
4387	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4388	struct netmap_slot *slot;
4389#endif /* DEV_NETMAP */
4390
4391	adapter = rxr->adapter;
4392	dev = adapter->dev;
4393	ifp = adapter->ifp;
4394
4395	/* Clear the ring contents */
4396	IGB_RX_LOCK(rxr);
4397#ifdef DEV_NETMAP
4398	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4399#endif /* DEV_NETMAP */
4400	rsize = roundup2(adapter->num_rx_desc *
4401	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4402	bzero((void *)rxr->rx_base, rsize);
4403
4404	/*
4405	** Free current RX buffer structures and their mbufs
4406	*/
4407	igb_free_receive_ring(rxr);
4408
4409	/* Configure for header split? */
4410	if (igb_header_split)
4411		rxr->hdr_split = TRUE;
4412
4413        /* Now replenish the ring mbufs */
4414	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4415		struct mbuf	*mh, *mp;
4416
4417		rxbuf = &rxr->rx_buffers[j];
4418#ifdef DEV_NETMAP
4419		if (slot) {
4420			/* slot sj is mapped to the j-th NIC-ring entry */
4421			int sj = netmap_idx_n2k(na->rx_rings[rxr->me], j);
4422			uint64_t paddr;
4423			void *addr;
4424
4425			addr = PNMB(na, slot + sj, &paddr);
4426			netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4427			/* Update descriptor */
4428			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4429			continue;
4430		}
4431#endif /* DEV_NETMAP */
4432		if (rxr->hdr_split == FALSE)
4433			goto skip_head;
4434
4435		/* First the header */
4436		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4437		if (rxbuf->m_head == NULL) {
4438			error = ENOBUFS;
4439                        goto fail;
4440		}
4441		m_adj(rxbuf->m_head, ETHER_ALIGN);
4442		mh = rxbuf->m_head;
4443		mh->m_len = mh->m_pkthdr.len = MHLEN;
4444		mh->m_flags |= M_PKTHDR;
4445		/* Get the memory mapping */
4446		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4447		    rxbuf->hmap, rxbuf->m_head, hseg,
4448		    &nsegs, BUS_DMA_NOWAIT);
4449		if (error != 0) /* Nothing elegant to do here */
4450                        goto fail;
4451		bus_dmamap_sync(rxr->htag,
4452		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4453		/* Update descriptor */
4454		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4455
4456skip_head:
4457		/* Now the payload cluster */
4458		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4459		    M_PKTHDR, adapter->rx_mbuf_sz);
4460		if (rxbuf->m_pack == NULL) {
4461			error = ENOBUFS;
4462                        goto fail;
4463		}
4464		mp = rxbuf->m_pack;
4465		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4466		/* Get the memory mapping */
4467		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4468		    rxbuf->pmap, mp, pseg,
4469		    &nsegs, BUS_DMA_NOWAIT);
4470		if (error != 0)
4471                        goto fail;
4472		bus_dmamap_sync(rxr->ptag,
4473		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4474		/* Update descriptor */
4475		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4476        }
4477
4478	/* Setup our descriptor indices */
4479	rxr->next_to_check = 0;
4480	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4481	rxr->lro_enabled = FALSE;
4482	rxr->rx_split_packets = 0;
4483	rxr->rx_bytes = 0;
4484
4485	rxr->fmp = NULL;
4486	rxr->lmp = NULL;
4487
4488	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4489	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4490
4491	/*
4492	** Now set up the LRO interface, we
4493	** also only do head split when LRO
4494	** is enabled, since so often they
4495	** are undesirable in similar setups.
4496	*/
4497	if (ifp->if_capenable & IFCAP_LRO) {
4498		error = tcp_lro_init(lro);
4499		if (error) {
4500			device_printf(dev, "LRO Initialization failed!\n");
4501			goto fail;
4502		}
4503		INIT_DEBUGOUT("RX LRO Initialized\n");
4504		rxr->lro_enabled = TRUE;
4505		lro->ifp = adapter->ifp;
4506	}
4507
4508	IGB_RX_UNLOCK(rxr);
4509	return (0);
4510
4511fail:
4512	igb_free_receive_ring(rxr);
4513	IGB_RX_UNLOCK(rxr);
4514	return (error);
4515}
4516
4517
4518/*********************************************************************
4519 *
4520 *  Initialize all receive rings.
4521 *
4522 **********************************************************************/
4523static int
4524igb_setup_receive_structures(struct adapter *adapter)
4525{
4526	struct rx_ring *rxr = adapter->rx_rings;
4527	int i;
4528
4529	for (i = 0; i < adapter->num_queues; i++, rxr++)
4530		if (igb_setup_receive_ring(rxr))
4531			goto fail;
4532
4533	return (0);
4534fail:
4535	/*
4536	 * Free RX buffers allocated so far, we will only handle
4537	 * the rings that completed, the failing case will have
4538	 * cleaned up for itself. 'i' is the endpoint.
4539	 */
4540	for (int j = 0; j < i; ++j) {
4541		rxr = &adapter->rx_rings[j];
4542		IGB_RX_LOCK(rxr);
4543		igb_free_receive_ring(rxr);
4544		IGB_RX_UNLOCK(rxr);
4545	}
4546
4547	return (ENOBUFS);
4548}
4549
4550/*
4551 * Initialise the RSS mapping for NICs that support multiple transmit/
4552 * receive rings.
4553 */
4554static void
4555igb_initialise_rss_mapping(struct adapter *adapter)
4556{
4557	struct e1000_hw *hw = &adapter->hw;
4558	int i;
4559	int queue_id;
4560	u32 reta;
4561	u32 rss_key[10], mrqc, shift = 0;
4562
4563	/* XXX? */
4564	if (adapter->hw.mac.type == e1000_82575)
4565		shift = 6;
4566
4567	/*
4568	 * The redirection table controls which destination
4569	 * queue each bucket redirects traffic to.
4570	 * Each DWORD represents four queues, with the LSB
4571	 * being the first queue in the DWORD.
4572	 *
4573	 * This just allocates buckets to queues using round-robin
4574	 * allocation.
4575	 *
4576	 * NOTE: It Just Happens to line up with the default
4577	 * RSS allocation method.
4578	 */
4579
4580	/* Warning FM follows */
4581	reta = 0;
4582	for (i = 0; i < 128; i++) {
4583#ifdef	RSS
4584		queue_id = rss_get_indirection_to_bucket(i);
4585		/*
4586		 * If we have more queues than buckets, we'll
4587		 * end up mapping buckets to a subset of the
4588		 * queues.
4589		 *
4590		 * If we have more buckets than queues, we'll
4591		 * end up instead assigning multiple buckets
4592		 * to queues.
4593		 *
4594		 * Both are suboptimal, but we need to handle
4595		 * the case so we don't go out of bounds
4596		 * indexing arrays and such.
4597		 */
4598		queue_id = queue_id % adapter->num_queues;
4599#else
4600		queue_id = (i % adapter->num_queues);
4601#endif
4602		/* Adjust if required */
4603		queue_id = queue_id << shift;
4604
4605		/*
4606		 * The low 8 bits are for hash value (n+0);
4607		 * The next 8 bits are for hash value (n+1), etc.
4608		 */
4609		reta = reta >> 8;
4610		reta = reta | ( ((uint32_t) queue_id) << 24);
4611		if ((i & 3) == 3) {
4612			E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4613			reta = 0;
4614		}
4615	}
4616
4617	/* Now fill in hash table */
4618
4619	/*
4620	 * MRQC: Multiple Receive Queues Command
4621	 * Set queuing to RSS control, number depends on the device.
4622	 */
4623	mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4624
4625#ifdef	RSS
4626	/* XXX ew typecasting */
4627	rss_getkey((uint8_t *) &rss_key);
4628#else
4629	arc4rand(&rss_key, sizeof(rss_key), 0);
4630#endif
4631	for (i = 0; i < 10; i++)
4632		E1000_WRITE_REG_ARRAY(hw,
4633		    E1000_RSSRK(0), i, rss_key[i]);
4634
4635	/*
4636	 * Configure the RSS fields to hash upon.
4637	 */
4638	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4639	    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4640	mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4641	    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4642	mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4643	    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4644	mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4645	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4646
4647	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4648}
4649
4650/*********************************************************************
4651 *
4652 *  Enable receive unit.
4653 *
4654 **********************************************************************/
4655static void
4656igb_initialize_receive_units(struct adapter *adapter)
4657{
4658	struct rx_ring	*rxr = adapter->rx_rings;
4659	struct ifnet	*ifp = adapter->ifp;
4660	struct e1000_hw *hw = &adapter->hw;
4661	u32		rctl, rxcsum, psize, srrctl = 0;
4662
4663	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4664
4665	/*
4666	 * Make sure receives are disabled while setting
4667	 * up the descriptor ring
4668	 */
4669	rctl = E1000_READ_REG(hw, E1000_RCTL);
4670	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4671
4672	/*
4673	** Set up for header split
4674	*/
4675	if (igb_header_split) {
4676		/* Use a standard mbuf for the header */
4677		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4678		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4679	} else
4680		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4681
4682	/*
4683	** Set up for jumbo frames
4684	*/
4685	if (ifp->if_mtu > ETHERMTU) {
4686		rctl |= E1000_RCTL_LPE;
4687		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4688			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4689			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4690		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4691			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4692			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4693		}
4694		/* Set maximum packet len */
4695		psize = adapter->max_frame_size;
4696		/* are we on a vlan? */
4697		if (adapter->ifp->if_vlantrunk != NULL)
4698			psize += VLAN_TAG_SIZE;
4699		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4700	} else {
4701		rctl &= ~E1000_RCTL_LPE;
4702		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4703		rctl |= E1000_RCTL_SZ_2048;
4704	}
4705
4706	/*
4707	 * If TX flow control is disabled and there's >1 queue defined,
4708	 * enable DROP.
4709	 *
4710	 * This drops frames rather than hanging the RX MAC for all queues.
4711	 */
4712	if ((adapter->num_queues > 1) &&
4713	    (adapter->fc == e1000_fc_none ||
4714	     adapter->fc == e1000_fc_rx_pause)) {
4715		srrctl |= E1000_SRRCTL_DROP_EN;
4716	}
4717
4718	/* Setup the Base and Length of the Rx Descriptor Rings */
4719	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4720		u64 bus_addr = rxr->rxdma.dma_paddr;
4721		u32 rxdctl;
4722
4723		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4724		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4725		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4726		    (uint32_t)(bus_addr >> 32));
4727		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4728		    (uint32_t)bus_addr);
4729		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4730		/* Enable this Queue */
4731		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4732		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4733		rxdctl &= 0xFFF00000;
4734		rxdctl |= IGB_RX_PTHRESH;
4735		rxdctl |= IGB_RX_HTHRESH << 8;
4736		rxdctl |= IGB_RX_WTHRESH << 16;
4737		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4738	}
4739
4740	/*
4741	** Setup for RX MultiQueue
4742	*/
4743	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4744	if (adapter->num_queues >1) {
4745
4746		/* rss setup */
4747		igb_initialise_rss_mapping(adapter);
4748
4749		/*
4750		** NOTE: Receive Full-Packet Checksum Offload
4751		** is mutually exclusive with Multiqueue. However
4752		** this is not the same as TCP/IP checksums which
4753		** still work.
4754		*/
4755		rxcsum |= E1000_RXCSUM_PCSD;
4756#if __FreeBSD_version >= 800000
4757		/* For SCTP Offload */
4758		if ((hw->mac.type != e1000_82575) &&
4759		    (ifp->if_capenable & IFCAP_RXCSUM))
4760			rxcsum |= E1000_RXCSUM_CRCOFL;
4761#endif
4762	} else {
4763		/* Non RSS setup */
4764		if (ifp->if_capenable & IFCAP_RXCSUM) {
4765			rxcsum |= E1000_RXCSUM_IPPCSE;
4766#if __FreeBSD_version >= 800000
4767			if (adapter->hw.mac.type != e1000_82575)
4768				rxcsum |= E1000_RXCSUM_CRCOFL;
4769#endif
4770		} else
4771			rxcsum &= ~E1000_RXCSUM_TUOFL;
4772	}
4773	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4774
4775	/* Setup the Receive Control Register */
4776	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4777	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4778		   E1000_RCTL_RDMTS_HALF |
4779		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4780	/* Strip CRC bytes. */
4781	rctl |= E1000_RCTL_SECRC;
4782	/* Make sure VLAN Filters are off */
4783	rctl &= ~E1000_RCTL_VFE;
4784	/* Don't store bad packets */
4785	rctl &= ~E1000_RCTL_SBP;
4786
4787	/* Enable Receives */
4788	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4789
4790	/*
4791	 * Setup the HW Rx Head and Tail Descriptor Pointers
4792	 *   - needs to be after enable
4793	 */
4794	for (int i = 0; i < adapter->num_queues; i++) {
4795		rxr = &adapter->rx_rings[i];
4796		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4797#ifdef DEV_NETMAP
4798		/*
4799		 * an init() while a netmap client is active must
4800		 * preserve the rx buffers passed to userspace.
4801		 * In this driver it means we adjust RDT to
4802		 * something different from next_to_refresh
4803		 * (which is not used in netmap mode).
4804		 */
4805		if (ifp->if_capenable & IFCAP_NETMAP) {
4806			struct netmap_adapter *na = NA(adapter->ifp);
4807			struct netmap_kring *kring = na->rx_rings[i];
4808			int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4809
4810			if (t >= adapter->num_rx_desc)
4811				t -= adapter->num_rx_desc;
4812			else if (t < 0)
4813				t += adapter->num_rx_desc;
4814			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4815		} else
4816#endif /* DEV_NETMAP */
4817		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4818	}
4819	return;
4820}
4821
4822/*********************************************************************
4823 *
4824 *  Free receive rings.
4825 *
4826 **********************************************************************/
4827static void
4828igb_free_receive_structures(struct adapter *adapter)
4829{
4830	struct rx_ring *rxr = adapter->rx_rings;
4831
4832	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4833		struct lro_ctrl	*lro = &rxr->lro;
4834		igb_free_receive_buffers(rxr);
4835		tcp_lro_free(lro);
4836		igb_dma_free(adapter, &rxr->rxdma);
4837	}
4838
4839	free(adapter->rx_rings, M_DEVBUF);
4840}
4841
4842/*********************************************************************
4843 *
4844 *  Free receive ring data structures.
4845 *
4846 **********************************************************************/
4847static void
4848igb_free_receive_buffers(struct rx_ring *rxr)
4849{
4850	struct adapter		*adapter = rxr->adapter;
4851	struct igb_rx_buf	*rxbuf;
4852	int i;
4853
4854	INIT_DEBUGOUT("free_receive_structures: begin");
4855
4856	/* Cleanup any existing buffers */
4857	if (rxr->rx_buffers != NULL) {
4858		for (i = 0; i < adapter->num_rx_desc; i++) {
4859			rxbuf = &rxr->rx_buffers[i];
4860			if (rxbuf->m_head != NULL) {
4861				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4862				    BUS_DMASYNC_POSTREAD);
4863				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4864				rxbuf->m_head->m_flags |= M_PKTHDR;
4865				m_freem(rxbuf->m_head);
4866			}
4867			if (rxbuf->m_pack != NULL) {
4868				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4869				    BUS_DMASYNC_POSTREAD);
4870				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4871				rxbuf->m_pack->m_flags |= M_PKTHDR;
4872				m_freem(rxbuf->m_pack);
4873			}
4874			rxbuf->m_head = NULL;
4875			rxbuf->m_pack = NULL;
4876			if (rxbuf->hmap != NULL) {
4877				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4878				rxbuf->hmap = NULL;
4879			}
4880			if (rxbuf->pmap != NULL) {
4881				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4882				rxbuf->pmap = NULL;
4883			}
4884		}
4885		if (rxr->rx_buffers != NULL) {
4886			free(rxr->rx_buffers, M_DEVBUF);
4887			rxr->rx_buffers = NULL;
4888		}
4889	}
4890
4891	if (rxr->htag != NULL) {
4892		bus_dma_tag_destroy(rxr->htag);
4893		rxr->htag = NULL;
4894	}
4895	if (rxr->ptag != NULL) {
4896		bus_dma_tag_destroy(rxr->ptag);
4897		rxr->ptag = NULL;
4898	}
4899}
4900
4901static __inline void
4902igb_rx_discard(struct rx_ring *rxr, int i)
4903{
4904	struct igb_rx_buf	*rbuf;
4905
4906	rbuf = &rxr->rx_buffers[i];
4907
4908	/* Partially received? Free the chain */
4909	if (rxr->fmp != NULL) {
4910		rxr->fmp->m_flags |= M_PKTHDR;
4911		m_freem(rxr->fmp);
4912		rxr->fmp = NULL;
4913		rxr->lmp = NULL;
4914	}
4915
4916	/*
4917	** With advanced descriptors the writeback
4918	** clobbers the buffer addrs, so its easier
4919	** to just free the existing mbufs and take
4920	** the normal refresh path to get new buffers
4921	** and mapping.
4922	*/
4923	if (rbuf->m_head) {
4924		m_free(rbuf->m_head);
4925		rbuf->m_head = NULL;
4926		bus_dmamap_unload(rxr->htag, rbuf->hmap);
4927	}
4928
4929	if (rbuf->m_pack) {
4930		m_free(rbuf->m_pack);
4931		rbuf->m_pack = NULL;
4932		bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4933	}
4934
4935	return;
4936}
4937
4938static __inline void
4939igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4940{
4941
4942	/*
4943	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4944	 * should be computed by hardware. Also it should not have VLAN tag in
4945	 * ethernet header.
4946	 */
4947	if (rxr->lro_enabled &&
4948	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4949	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4950	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4951	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4952	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4953	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4954		/*
4955		 * Send to the stack if:
4956		 **  - LRO not enabled, or
4957		 **  - no LRO resources, or
4958		 **  - lro enqueue fails
4959		 */
4960		if (rxr->lro.lro_cnt != 0)
4961			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4962				return;
4963	}
4964	IGB_RX_UNLOCK(rxr);
4965	(*ifp->if_input)(ifp, m);
4966	IGB_RX_LOCK(rxr);
4967}
4968
4969/*********************************************************************
4970 *
4971 *  This routine executes in interrupt context. It replenishes
4972 *  the mbufs in the descriptor and sends data which has been
4973 *  dma'ed into host memory to upper layer.
4974 *
4975 *  We loop at most count times if count is > 0, or until done if
4976 *  count < 0.
4977 *
4978 *  Return TRUE if more to clean, FALSE otherwise
4979 *********************************************************************/
4980static bool
4981igb_rxeof(struct igb_queue *que, int count, int *done)
4982{
4983	struct adapter		*adapter = que->adapter;
4984	struct rx_ring		*rxr = que->rxr;
4985	struct ifnet		*ifp = adapter->ifp;
4986	struct lro_ctrl		*lro = &rxr->lro;
4987	int			i, processed = 0, rxdone = 0;
4988	u32			ptype, staterr = 0;
4989	union e1000_adv_rx_desc	*cur;
4990
4991	IGB_RX_LOCK(rxr);
4992	/* Sync the ring. */
4993	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4994	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4995
4996#ifdef DEV_NETMAP
4997	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4998		IGB_RX_UNLOCK(rxr);
4999		return (FALSE);
5000	}
5001#endif /* DEV_NETMAP */
5002
5003	/* Main clean loop */
5004	for (i = rxr->next_to_check; count != 0;) {
5005		struct mbuf		*sendmp, *mh, *mp;
5006		struct igb_rx_buf	*rxbuf;
5007		u16			hlen, plen, hdr, vtag, pkt_info;
5008		bool			eop = FALSE;
5009
5010		cur = &rxr->rx_base[i];
5011		staterr = le32toh(cur->wb.upper.status_error);
5012		if ((staterr & E1000_RXD_STAT_DD) == 0)
5013			break;
5014		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
5015			break;
5016		count--;
5017		sendmp = mh = mp = NULL;
5018		cur->wb.upper.status_error = 0;
5019		rxbuf = &rxr->rx_buffers[i];
5020		plen = le16toh(cur->wb.upper.length);
5021		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
5022		if (((adapter->hw.mac.type == e1000_i350) ||
5023		    (adapter->hw.mac.type == e1000_i354)) &&
5024		    (staterr & E1000_RXDEXT_STATERR_LB))
5025			vtag = be16toh(cur->wb.upper.vlan);
5026		else
5027			vtag = le16toh(cur->wb.upper.vlan);
5028		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
5029		pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
5030		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
5031
5032		/*
5033		 * Free the frame (all segments) if we're at EOP and
5034		 * it's an error.
5035		 *
5036		 * The datasheet states that EOP + status is only valid for
5037		 * the final segment in a multi-segment frame.
5038		 */
5039		if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
5040			adapter->dropped_pkts++;
5041			++rxr->rx_discarded;
5042			igb_rx_discard(rxr, i);
5043			goto next_desc;
5044		}
5045
5046		/*
5047		** The way the hardware is configured to
5048		** split, it will ONLY use the header buffer
5049		** when header split is enabled, otherwise we
5050		** get normal behavior, ie, both header and
5051		** payload are DMA'd into the payload buffer.
5052		**
5053		** The fmp test is to catch the case where a
5054		** packet spans multiple descriptors, in that
5055		** case only the first header is valid.
5056		*/
5057		if (rxr->hdr_split && rxr->fmp == NULL) {
5058			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5059			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5060			    E1000_RXDADV_HDRBUFLEN_SHIFT;
5061			if (hlen > IGB_HDR_BUF)
5062				hlen = IGB_HDR_BUF;
5063			mh = rxr->rx_buffers[i].m_head;
5064			mh->m_len = hlen;
5065			/* clear buf pointer for refresh */
5066			rxbuf->m_head = NULL;
5067			/*
5068			** Get the payload length, this
5069			** could be zero if its a small
5070			** packet.
5071			*/
5072			if (plen > 0) {
5073				mp = rxr->rx_buffers[i].m_pack;
5074				mp->m_len = plen;
5075				mh->m_next = mp;
5076				/* clear buf pointer */
5077				rxbuf->m_pack = NULL;
5078				rxr->rx_split_packets++;
5079			}
5080		} else {
5081			/*
5082			** Either no header split, or a
5083			** secondary piece of a fragmented
5084			** split packet.
5085			*/
5086			mh = rxr->rx_buffers[i].m_pack;
5087			mh->m_len = plen;
5088			/* clear buf info for refresh */
5089			rxbuf->m_pack = NULL;
5090		}
5091		bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5092
5093		++processed; /* So we know when to refresh */
5094
5095		/* Initial frame - setup */
5096		if (rxr->fmp == NULL) {
5097			mh->m_pkthdr.len = mh->m_len;
5098			/* Save the head of the chain */
5099			rxr->fmp = mh;
5100			rxr->lmp = mh;
5101			if (mp != NULL) {
5102				/* Add payload if split */
5103				mh->m_pkthdr.len += mp->m_len;
5104				rxr->lmp = mh->m_next;
5105			}
5106		} else {
5107			/* Chain mbuf's together */
5108			rxr->lmp->m_next = mh;
5109			rxr->lmp = rxr->lmp->m_next;
5110			rxr->fmp->m_pkthdr.len += mh->m_len;
5111		}
5112
5113		if (eop) {
5114			rxr->fmp->m_pkthdr.rcvif = ifp;
5115			rxr->rx_packets++;
5116			/* capture data for AIM */
5117			rxr->packets++;
5118			rxr->bytes += rxr->fmp->m_pkthdr.len;
5119			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5120
5121			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5122				igb_rx_checksum(staterr, rxr->fmp, ptype);
5123
5124			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5125			    (staterr & E1000_RXD_STAT_VP) != 0) {
5126				rxr->fmp->m_pkthdr.ether_vtag = vtag;
5127				rxr->fmp->m_flags |= M_VLANTAG;
5128			}
5129
5130			/*
5131			 * In case of multiqueue, we have RXCSUM.PCSD bit set
5132			 * and never cleared. This means we have RSS hash
5133			 * available to be used.
5134			 */
5135			if (adapter->num_queues > 1) {
5136				rxr->fmp->m_pkthdr.flowid =
5137				    le32toh(cur->wb.lower.hi_dword.rss);
5138				switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5139					case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5140						M_HASHTYPE_SET(rxr->fmp,
5141						    M_HASHTYPE_RSS_TCP_IPV4);
5142					break;
5143					case E1000_RXDADV_RSSTYPE_IPV4:
5144						M_HASHTYPE_SET(rxr->fmp,
5145						    M_HASHTYPE_RSS_IPV4);
5146					break;
5147					case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5148						M_HASHTYPE_SET(rxr->fmp,
5149						    M_HASHTYPE_RSS_TCP_IPV6);
5150					break;
5151					case E1000_RXDADV_RSSTYPE_IPV6_EX:
5152						M_HASHTYPE_SET(rxr->fmp,
5153						    M_HASHTYPE_RSS_IPV6_EX);
5154					break;
5155					case E1000_RXDADV_RSSTYPE_IPV6:
5156						M_HASHTYPE_SET(rxr->fmp,
5157						    M_HASHTYPE_RSS_IPV6);
5158					break;
5159					case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5160						M_HASHTYPE_SET(rxr->fmp,
5161						    M_HASHTYPE_RSS_TCP_IPV6_EX);
5162					break;
5163					default:
5164						/* XXX fallthrough */
5165						M_HASHTYPE_SET(rxr->fmp,
5166						    M_HASHTYPE_OPAQUE_HASH);
5167				}
5168			} else {
5169#ifndef IGB_LEGACY_TX
5170				rxr->fmp->m_pkthdr.flowid = que->msix;
5171				M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5172#endif
5173			}
5174			sendmp = rxr->fmp;
5175			/* Make sure to set M_PKTHDR. */
5176			sendmp->m_flags |= M_PKTHDR;
5177			rxr->fmp = NULL;
5178			rxr->lmp = NULL;
5179		}
5180
5181next_desc:
5182		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5183		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5184
5185		/* Advance our pointers to the next descriptor. */
5186		if (++i == adapter->num_rx_desc)
5187			i = 0;
5188		/*
5189		** Send to the stack or LRO
5190		*/
5191		if (sendmp != NULL) {
5192			rxr->next_to_check = i;
5193			igb_rx_input(rxr, ifp, sendmp, ptype);
5194			i = rxr->next_to_check;
5195			rxdone++;
5196		}
5197
5198		/* Every 8 descriptors we go to refresh mbufs */
5199		if (processed == 8) {
5200                        igb_refresh_mbufs(rxr, i);
5201                        processed = 0;
5202		}
5203	}
5204
5205	/* Catch any remainders */
5206	if (igb_rx_unrefreshed(rxr))
5207		igb_refresh_mbufs(rxr, i);
5208
5209	rxr->next_to_check = i;
5210
5211	/*
5212	 * Flush any outstanding LRO work
5213	 */
5214	tcp_lro_flush_all(lro);
5215
5216	if (done != NULL)
5217		*done += rxdone;
5218
5219	IGB_RX_UNLOCK(rxr);
5220	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5221}
5222
5223/*********************************************************************
5224 *
5225 *  Verify that the hardware indicated that the checksum is valid.
5226 *  Inform the stack about the status of checksum so that stack
5227 *  doesn't spend time verifying the checksum.
5228 *
5229 *********************************************************************/
5230static void
5231igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5232{
5233	u16 status = (u16)staterr;
5234	u8  errors = (u8) (staterr >> 24);
5235	int sctp;
5236
5237	/* Ignore Checksum bit is set */
5238	if (status & E1000_RXD_STAT_IXSM) {
5239		mp->m_pkthdr.csum_flags = 0;
5240		return;
5241	}
5242
5243	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5244	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5245		sctp = 1;
5246	else
5247		sctp = 0;
5248	if (status & E1000_RXD_STAT_IPCS) {
5249		/* Did it pass? */
5250		if (!(errors & E1000_RXD_ERR_IPE)) {
5251			/* IP Checksum Good */
5252			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5253			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5254		} else
5255			mp->m_pkthdr.csum_flags = 0;
5256	}
5257
5258	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5259		u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5260#if __FreeBSD_version >= 800000
5261		if (sctp) /* reassign */
5262			type = CSUM_SCTP_VALID;
5263#endif
5264		/* Did it pass? */
5265		if (!(errors & E1000_RXD_ERR_TCPE)) {
5266			mp->m_pkthdr.csum_flags |= type;
5267			if (sctp == 0)
5268				mp->m_pkthdr.csum_data = htons(0xffff);
5269		}
5270	}
5271	return;
5272}
5273
5274/*
5275 * This routine is run via an vlan
5276 * config EVENT
5277 */
5278static void
5279igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5280{
5281	struct adapter	*adapter = ifp->if_softc;
5282	u32		index, bit;
5283
5284	if (ifp->if_softc !=  arg)   /* Not our event */
5285		return;
5286
5287	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5288                return;
5289
5290	IGB_CORE_LOCK(adapter);
5291	index = (vtag >> 5) & 0x7F;
5292	bit = vtag & 0x1F;
5293	adapter->shadow_vfta[index] |= (1 << bit);
5294	++adapter->num_vlans;
5295	/* Change hw filter setting */
5296	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5297		igb_setup_vlan_hw_support(adapter);
5298	IGB_CORE_UNLOCK(adapter);
5299}
5300
5301/*
5302 * This routine is run via an vlan
5303 * unconfig EVENT
5304 */
5305static void
5306igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5307{
5308	struct adapter	*adapter = ifp->if_softc;
5309	u32		index, bit;
5310
5311	if (ifp->if_softc !=  arg)
5312		return;
5313
5314	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5315                return;
5316
5317	IGB_CORE_LOCK(adapter);
5318	index = (vtag >> 5) & 0x7F;
5319	bit = vtag & 0x1F;
5320	adapter->shadow_vfta[index] &= ~(1 << bit);
5321	--adapter->num_vlans;
5322	/* Change hw filter setting */
5323	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5324		igb_setup_vlan_hw_support(adapter);
5325	IGB_CORE_UNLOCK(adapter);
5326}
5327
5328static void
5329igb_setup_vlan_hw_support(struct adapter *adapter)
5330{
5331	struct e1000_hw *hw = &adapter->hw;
5332	struct ifnet	*ifp = adapter->ifp;
5333	u32             reg;
5334
5335	if (adapter->vf_ifp) {
5336		e1000_rlpml_set_vf(hw,
5337		    adapter->max_frame_size + VLAN_TAG_SIZE);
5338		return;
5339	}
5340
5341	reg = E1000_READ_REG(hw, E1000_CTRL);
5342	reg |= E1000_CTRL_VME;
5343	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5344
5345	/* Enable the Filter Table */
5346	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5347		reg = E1000_READ_REG(hw, E1000_RCTL);
5348		reg &= ~E1000_RCTL_CFIEN;
5349		reg |= E1000_RCTL_VFE;
5350		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5351	}
5352
5353	/* Update the frame size */
5354	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5355	    adapter->max_frame_size + VLAN_TAG_SIZE);
5356
5357	/* Don't bother with table if no vlans */
5358	if ((adapter->num_vlans == 0) ||
5359	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5360                return;
5361	/*
5362	** A soft reset zero's out the VFTA, so
5363	** we need to repopulate it now.
5364	*/
5365	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5366                if (adapter->shadow_vfta[i] != 0) {
5367			if (adapter->vf_ifp)
5368				e1000_vfta_set_vf(hw,
5369				    adapter->shadow_vfta[i], TRUE);
5370			else
5371				e1000_write_vfta(hw,
5372				    i, adapter->shadow_vfta[i]);
5373		}
5374}
5375
5376static void
5377igb_enable_intr(struct adapter *adapter)
5378{
5379	/* With RSS set up what to auto clear */
5380	if (adapter->msix_mem) {
5381		u32 mask = (adapter->que_mask | adapter->link_mask);
5382		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5383		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5384		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5385		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5386		    E1000_IMS_LSC);
5387	} else {
5388		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5389		    IMS_ENABLE_MASK);
5390	}
5391	E1000_WRITE_FLUSH(&adapter->hw);
5392
5393	return;
5394}
5395
5396static void
5397igb_disable_intr(struct adapter *adapter)
5398{
5399	if (adapter->msix_mem) {
5400		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5401		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5402	}
5403	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5404	E1000_WRITE_FLUSH(&adapter->hw);
5405	return;
5406}
5407
5408/*
5409 * Bit of a misnomer, what this really means is
5410 * to enable OS management of the system... aka
5411 * to disable special hardware management features
5412 */
5413static void
5414igb_init_manageability(struct adapter *adapter)
5415{
5416	if (adapter->has_manage) {
5417		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5418		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5419
5420		/* disable hardware interception of ARP */
5421		manc &= ~(E1000_MANC_ARP_EN);
5422
5423                /* enable receiving management packets to the host */
5424		manc |= E1000_MANC_EN_MNG2HOST;
5425		manc2h |= 1 << 5;  /* Mng Port 623 */
5426		manc2h |= 1 << 6;  /* Mng Port 664 */
5427		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5428		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5429	}
5430}
5431
5432/*
5433 * Give control back to hardware management
5434 * controller if there is one.
5435 */
5436static void
5437igb_release_manageability(struct adapter *adapter)
5438{
5439	if (adapter->has_manage) {
5440		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5441
5442		/* re-enable hardware interception of ARP */
5443		manc |= E1000_MANC_ARP_EN;
5444		manc &= ~E1000_MANC_EN_MNG2HOST;
5445
5446		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5447	}
5448}
5449
5450/*
5451 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5452 * For ASF and Pass Through versions of f/w this means that
5453 * the driver is loaded.
5454 *
5455 */
5456static void
5457igb_get_hw_control(struct adapter *adapter)
5458{
5459	u32 ctrl_ext;
5460
5461	if (adapter->vf_ifp)
5462		return;
5463
5464	/* Let firmware know the driver has taken over */
5465	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5466	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5467	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5468}
5469
5470/*
5471 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5472 * For ASF and Pass Through versions of f/w this means that the
5473 * driver is no longer loaded.
5474 *
5475 */
5476static void
5477igb_release_hw_control(struct adapter *adapter)
5478{
5479	u32 ctrl_ext;
5480
5481	if (adapter->vf_ifp)
5482		return;
5483
5484	/* Let firmware taken over control of h/w */
5485	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5486	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5487	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5488}
5489
5490static int
5491igb_is_valid_ether_addr(uint8_t *addr)
5492{
5493	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5494
5495	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5496		return (FALSE);
5497	}
5498
5499	return (TRUE);
5500}
5501
5502
5503/*
5504 * Enable PCI Wake On Lan capability
5505 */
5506static void
5507igb_enable_wakeup(device_t dev)
5508{
5509	struct adapter	*adapter = device_get_softc(dev);
5510	struct ifnet	*ifp = adapter->ifp;
5511	u32		pmc, ctrl, ctrl_ext, rctl, wuc;
5512	u16		status;
5513
5514	if (pci_find_cap(dev, PCIY_PMG, &pmc) != 0)
5515		return;
5516
5517	adapter->wol = E1000_READ_REG(&adapter->hw, E1000_WUFC);
5518	if (ifp->if_capenable & IFCAP_WOL_MAGIC)
5519		adapter->wol |=  E1000_WUFC_MAG;
5520	else
5521		adapter->wol &= ~E1000_WUFC_MAG;
5522
5523	if (ifp->if_capenable & IFCAP_WOL_MCAST) {
5524		adapter->wol |=  E1000_WUFC_MC;
5525		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
5526		rctl |= E1000_RCTL_MPE;
5527		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
5528	} else
5529		adapter->wol &= ~E1000_WUFC_MC;
5530
5531	if (ifp->if_capenable & IFCAP_WOL_UCAST)
5532		adapter->wol |=  E1000_WUFC_EX;
5533	else
5534		adapter->wol &= ~E1000_WUFC_EX;
5535
5536	if (!(adapter->wol & (E1000_WUFC_EX | E1000_WUFC_MAG | E1000_WUFC_MC)))
5537		goto pme;
5538
5539	/* Advertise the wakeup capability */
5540	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
5541	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
5542	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
5543
5544	/* Keep the laser running on Fiber adapters */
5545	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
5546	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
5547		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5548		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
5549		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
5550	}
5551
5552	/* Enable wakeup by the MAC */
5553	wuc = E1000_READ_REG(&adapter->hw, E1000_WUC);
5554	wuc |= E1000_WUC_PME_EN | E1000_WUC_APME;
5555	E1000_WRITE_REG(&adapter->hw, E1000_WUC, wuc);
5556	E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
5557
5558pme:
5559	status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
5560	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
5561	if (ifp->if_capenable & IFCAP_WOL)
5562		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5563	pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
5564}
5565
5566static void
5567igb_led_func(void *arg, int onoff)
5568{
5569	struct adapter	*adapter = arg;
5570
5571	IGB_CORE_LOCK(adapter);
5572	if (onoff) {
5573		e1000_setup_led(&adapter->hw);
5574		e1000_led_on(&adapter->hw);
5575	} else {
5576		e1000_led_off(&adapter->hw);
5577		e1000_cleanup_led(&adapter->hw);
5578	}
5579	IGB_CORE_UNLOCK(adapter);
5580}
5581
5582static uint64_t
5583igb_get_vf_counter(if_t ifp, ift_counter cnt)
5584{
5585	struct adapter *adapter;
5586	struct e1000_vf_stats *stats;
5587#ifndef IGB_LEGACY_TX
5588	struct tx_ring *txr;
5589	uint64_t rv;
5590#endif
5591
5592	adapter = if_getsoftc(ifp);
5593	stats = (struct e1000_vf_stats *)adapter->stats;
5594
5595	switch (cnt) {
5596	case IFCOUNTER_IPACKETS:
5597		return (stats->gprc);
5598	case IFCOUNTER_OPACKETS:
5599		return (stats->gptc);
5600	case IFCOUNTER_IBYTES:
5601		return (stats->gorc);
5602	case IFCOUNTER_OBYTES:
5603		return (stats->gotc);
5604	case IFCOUNTER_IMCASTS:
5605		return (stats->mprc);
5606	case IFCOUNTER_IERRORS:
5607		return (adapter->dropped_pkts);
5608	case IFCOUNTER_OERRORS:
5609		return (adapter->watchdog_events);
5610#ifndef IGB_LEGACY_TX
5611	case IFCOUNTER_OQDROPS:
5612		rv = 0;
5613		txr = adapter->tx_rings;
5614		for (int i = 0; i < adapter->num_queues; i++, txr++)
5615			rv += txr->br->br_drops;
5616		return (rv);
5617#endif
5618	default:
5619		return (if_get_counter_default(ifp, cnt));
5620	}
5621}
5622
5623static uint64_t
5624igb_get_counter(if_t ifp, ift_counter cnt)
5625{
5626	struct adapter *adapter;
5627	struct e1000_hw_stats *stats;
5628#ifndef IGB_LEGACY_TX
5629	struct tx_ring *txr;
5630	uint64_t rv;
5631#endif
5632
5633	adapter = if_getsoftc(ifp);
5634	if (adapter->vf_ifp)
5635		return (igb_get_vf_counter(ifp, cnt));
5636
5637	stats = (struct e1000_hw_stats *)adapter->stats;
5638
5639	switch (cnt) {
5640	case IFCOUNTER_IPACKETS:
5641		return (stats->gprc);
5642	case IFCOUNTER_OPACKETS:
5643		return (stats->gptc);
5644	case IFCOUNTER_IBYTES:
5645		return (stats->gorc);
5646	case IFCOUNTER_OBYTES:
5647		return (stats->gotc);
5648	case IFCOUNTER_IMCASTS:
5649		return (stats->mprc);
5650	case IFCOUNTER_OMCASTS:
5651		return (stats->mptc);
5652	case IFCOUNTER_IERRORS:
5653		return (adapter->dropped_pkts + stats->rxerrc +
5654		    stats->crcerrs + stats->algnerrc +
5655		    stats->ruc + stats->roc + stats->cexterr);
5656	case IFCOUNTER_OERRORS:
5657		return (stats->ecol + stats->latecol +
5658		    adapter->watchdog_events);
5659	case IFCOUNTER_COLLISIONS:
5660		return (stats->colc);
5661	case IFCOUNTER_IQDROPS:
5662		return (stats->mpc);
5663#ifndef IGB_LEGACY_TX
5664	case IFCOUNTER_OQDROPS:
5665		rv = 0;
5666		txr = adapter->tx_rings;
5667		for (int i = 0; i < adapter->num_queues; i++, txr++)
5668			rv += txr->br->br_drops;
5669		return (rv);
5670#endif
5671	default:
5672		return (if_get_counter_default(ifp, cnt));
5673	}
5674}
5675
5676/**********************************************************************
5677 *
5678 *  Update the board statistics counters.
5679 *
5680 **********************************************************************/
5681static void
5682igb_update_stats_counters(struct adapter *adapter)
5683{
5684        struct e1000_hw		*hw = &adapter->hw;
5685	struct e1000_hw_stats	*stats;
5686
5687	/*
5688	** The virtual function adapter has only a
5689	** small controlled set of stats, do only
5690	** those and return.
5691	*/
5692	if (adapter->vf_ifp) {
5693		igb_update_vf_stats_counters(adapter);
5694		return;
5695	}
5696
5697	stats = (struct e1000_hw_stats	*)adapter->stats;
5698
5699	if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5700	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5701		stats->symerrs +=
5702		    E1000_READ_REG(hw,E1000_SYMERRS);
5703		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5704	}
5705
5706	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5707	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5708	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5709	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5710
5711	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5712	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5713	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5714	stats->dc += E1000_READ_REG(hw, E1000_DC);
5715	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5716	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5717	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5718	/*
5719	** For watchdog management we need to know if we have been
5720	** paused during the last interval, so capture that here.
5721	*/
5722        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5723        stats->xoffrxc += adapter->pause_frames;
5724	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5725	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5726	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5727	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5728	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5729	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5730	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5731	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5732	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5733	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5734	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5735	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5736
5737	/* For the 64-bit byte counters the low dword must be read first. */
5738	/* Both registers clear on the read of the high dword */
5739
5740	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5741	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5742	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5743	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5744
5745	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5746	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5747	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5748	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5749	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5750
5751	stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5752	stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5753	stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5754
5755	stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5756	    ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5757	stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5758	    ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5759
5760	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5761	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5762	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5763	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5764	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5765	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5766	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5767	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5768	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5769	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5770
5771	/* Interrupt Counts */
5772
5773	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5774	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5775	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5776	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5777	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5778	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5779	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5780	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5781	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5782
5783	/* Host to Card Statistics */
5784
5785	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5786	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5787	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5788	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5789	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5790	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5791	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5792	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5793	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5794	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5795	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5796	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5797	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5798	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5799
5800	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5801	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5802	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5803	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5804	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5805	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5806
5807	/* Driver specific counters */
5808	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5809	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5810	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5811	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5812	adapter->packet_buf_alloc_tx =
5813	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5814	adapter->packet_buf_alloc_rx =
5815	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5816}
5817
5818
5819/**********************************************************************
5820 *
5821 *  Initialize the VF board statistics counters.
5822 *
5823 **********************************************************************/
5824static void
5825igb_vf_init_stats(struct adapter *adapter)
5826{
5827        struct e1000_hw *hw = &adapter->hw;
5828	struct e1000_vf_stats	*stats;
5829
5830	stats = (struct e1000_vf_stats	*)adapter->stats;
5831	if (stats == NULL)
5832		return;
5833        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5834        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5835        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5836        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5837        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5838}
5839
5840/**********************************************************************
5841 *
5842 *  Update the VF board statistics counters.
5843 *
5844 **********************************************************************/
5845static void
5846igb_update_vf_stats_counters(struct adapter *adapter)
5847{
5848	struct e1000_hw *hw = &adapter->hw;
5849	struct e1000_vf_stats	*stats;
5850
5851	if (adapter->link_speed == 0)
5852		return;
5853
5854	stats = (struct e1000_vf_stats	*)adapter->stats;
5855
5856	UPDATE_VF_REG(E1000_VFGPRC,
5857	    stats->last_gprc, stats->gprc);
5858	UPDATE_VF_REG(E1000_VFGORC,
5859	    stats->last_gorc, stats->gorc);
5860	UPDATE_VF_REG(E1000_VFGPTC,
5861	    stats->last_gptc, stats->gptc);
5862	UPDATE_VF_REG(E1000_VFGOTC,
5863	    stats->last_gotc, stats->gotc);
5864	UPDATE_VF_REG(E1000_VFMPRC,
5865	    stats->last_mprc, stats->mprc);
5866}
5867
5868/* Export a single 32-bit register via a read-only sysctl. */
5869static int
5870igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5871{
5872	struct adapter *adapter;
5873	u_int val;
5874
5875	adapter = oidp->oid_arg1;
5876	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5877	return (sysctl_handle_int(oidp, &val, 0, req));
5878}
5879
5880/*
5881**  Tuneable interrupt rate handler
5882*/
5883static int
5884igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5885{
5886	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5887	int			error;
5888	u32			reg, usec, rate;
5889
5890	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5891	usec = ((reg & 0x7FFC) >> 2);
5892	if (usec > 0)
5893		rate = 1000000 / usec;
5894	else
5895		rate = 0;
5896	error = sysctl_handle_int(oidp, &rate, 0, req);
5897	if (error || !req->newptr)
5898		return error;
5899	return 0;
5900}
5901
5902/*
5903 * Add sysctl variables, one per statistic, to the system.
5904 */
5905static void
5906igb_add_hw_stats(struct adapter *adapter)
5907{
5908	device_t dev = adapter->dev;
5909
5910	struct tx_ring *txr = adapter->tx_rings;
5911	struct rx_ring *rxr = adapter->rx_rings;
5912
5913	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5914	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5915	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5916	struct e1000_hw_stats *stats = adapter->stats;
5917
5918	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5919	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5920
5921#define QUEUE_NAME_LEN 32
5922	char namebuf[QUEUE_NAME_LEN];
5923
5924	/* Driver Statistics */
5925	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5926			CTLFLAG_RD, &adapter->dropped_pkts,
5927			"Driver dropped packets");
5928	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5929			CTLFLAG_RD, &adapter->link_irq,
5930			"Link MSIX IRQ Handled");
5931	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5932			CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5933			"Defragmenting mbuf chain failed");
5934	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5935			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5936			"Driver tx dma failure in xmit");
5937	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5938			CTLFLAG_RD, &adapter->rx_overruns,
5939			"RX overruns");
5940	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5941			CTLFLAG_RD, &adapter->watchdog_events,
5942			"Watchdog timeouts");
5943
5944	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5945			CTLFLAG_RD, &adapter->device_control,
5946			"Device Control Register");
5947	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5948			CTLFLAG_RD, &adapter->rx_control,
5949			"Receiver Control Register");
5950	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5951			CTLFLAG_RD, &adapter->int_mask,
5952			"Interrupt Mask");
5953	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5954			CTLFLAG_RD, &adapter->eint_mask,
5955			"Extended Interrupt Mask");
5956	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5957			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5958			"Transmit Buffer Packet Allocation");
5959	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5960			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5961			"Receive Buffer Packet Allocation");
5962	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5963			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5964			"Flow Control High Watermark");
5965	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5966			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5967			"Flow Control Low Watermark");
5968
5969	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5970		struct lro_ctrl *lro = &rxr->lro;
5971
5972		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5973		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5974					    CTLFLAG_RD, NULL, "Queue Name");
5975		queue_list = SYSCTL_CHILDREN(queue_node);
5976
5977		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5978				CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5979				sizeof(&adapter->queues[i]),
5980				igb_sysctl_interrupt_rate_handler,
5981				"IU", "Interrupt Rate");
5982
5983		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5984				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5985				igb_sysctl_reg_handler, "IU",
5986 				"Transmit Descriptor Head");
5987		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5988				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5989				igb_sysctl_reg_handler, "IU",
5990 				"Transmit Descriptor Tail");
5991		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5992				CTLFLAG_RD, &txr->no_desc_avail,
5993				"Queue Descriptors Unavailable");
5994		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5995				CTLFLAG_RD, &txr->total_packets,
5996				"Queue Packets Transmitted");
5997
5998		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5999				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
6000				igb_sysctl_reg_handler, "IU",
6001				"Receive Descriptor Head");
6002		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
6003				CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
6004				igb_sysctl_reg_handler, "IU",
6005				"Receive Descriptor Tail");
6006		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
6007				CTLFLAG_RD, &rxr->rx_packets,
6008				"Queue Packets Received");
6009		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
6010				CTLFLAG_RD, &rxr->rx_bytes,
6011				"Queue Bytes Received");
6012		SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
6013				CTLFLAG_RD, &lro->lro_queued, 0,
6014				"LRO Queued");
6015		SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
6016				CTLFLAG_RD, &lro->lro_flushed, 0,
6017				"LRO Flushed");
6018	}
6019
6020	/* MAC stats get their own sub node */
6021
6022	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
6023				    CTLFLAG_RD, NULL, "MAC Statistics");
6024	stat_list = SYSCTL_CHILDREN(stat_node);
6025
6026	/*
6027	** VF adapter has a very limited set of stats
6028	** since its not managing the metal, so to speak.
6029	*/
6030	if (adapter->vf_ifp) {
6031	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6032			CTLFLAG_RD, &stats->gprc,
6033			"Good Packets Received");
6034	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6035			CTLFLAG_RD, &stats->gptc,
6036			"Good Packets Transmitted");
6037 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6038 			CTLFLAG_RD, &stats->gorc,
6039 			"Good Octets Received");
6040 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6041 			CTLFLAG_RD, &stats->gotc,
6042 			"Good Octets Transmitted");
6043	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6044			CTLFLAG_RD, &stats->mprc,
6045			"Multicast Packets Received");
6046		return;
6047	}
6048
6049	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
6050			CTLFLAG_RD, &stats->ecol,
6051			"Excessive collisions");
6052	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
6053			CTLFLAG_RD, &stats->scc,
6054			"Single collisions");
6055	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
6056			CTLFLAG_RD, &stats->mcc,
6057			"Multiple collisions");
6058	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
6059			CTLFLAG_RD, &stats->latecol,
6060			"Late collisions");
6061	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
6062			CTLFLAG_RD, &stats->colc,
6063			"Collision Count");
6064	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
6065			CTLFLAG_RD, &stats->symerrs,
6066			"Symbol Errors");
6067	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
6068			CTLFLAG_RD, &stats->sec,
6069			"Sequence Errors");
6070	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
6071			CTLFLAG_RD, &stats->dc,
6072			"Defer Count");
6073	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
6074			CTLFLAG_RD, &stats->mpc,
6075			"Missed Packets");
6076	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
6077			CTLFLAG_RD, &stats->rlec,
6078			"Receive Length Errors");
6079	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
6080			CTLFLAG_RD, &stats->rnbc,
6081			"Receive No Buffers");
6082	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
6083			CTLFLAG_RD, &stats->ruc,
6084			"Receive Undersize");
6085	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
6086			CTLFLAG_RD, &stats->rfc,
6087			"Fragmented Packets Received");
6088	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
6089			CTLFLAG_RD, &stats->roc,
6090			"Oversized Packets Received");
6091	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
6092			CTLFLAG_RD, &stats->rjc,
6093			"Recevied Jabber");
6094	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6095			CTLFLAG_RD, &stats->rxerrc,
6096			"Receive Errors");
6097	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6098			CTLFLAG_RD, &stats->crcerrs,
6099			"CRC errors");
6100	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6101			CTLFLAG_RD, &stats->algnerrc,
6102			"Alignment Errors");
6103	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6104			CTLFLAG_RD, &stats->tncrs,
6105			"Transmit with No CRS");
6106	/* On 82575 these are collision counts */
6107	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6108			CTLFLAG_RD, &stats->cexterr,
6109			"Collision/Carrier extension errors");
6110	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6111			CTLFLAG_RD, &stats->xonrxc,
6112			"XON Received");
6113	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6114			CTLFLAG_RD, &stats->xontxc,
6115			"XON Transmitted");
6116	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6117			CTLFLAG_RD, &stats->xoffrxc,
6118			"XOFF Received");
6119	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6120			CTLFLAG_RD, &stats->xofftxc,
6121			"XOFF Transmitted");
6122	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6123			CTLFLAG_RD, &stats->fcruc,
6124			"Unsupported Flow Control Received");
6125	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6126			CTLFLAG_RD, &stats->mgprc,
6127			"Management Packets Received");
6128	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6129			CTLFLAG_RD, &stats->mgpdc,
6130			"Management Packets Dropped");
6131	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6132			CTLFLAG_RD, &stats->mgptc,
6133			"Management Packets Transmitted");
6134	/* Packet Reception Stats */
6135	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6136			CTLFLAG_RD, &stats->tpr,
6137			"Total Packets Received");
6138	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6139			CTLFLAG_RD, &stats->gprc,
6140			"Good Packets Received");
6141	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6142			CTLFLAG_RD, &stats->bprc,
6143			"Broadcast Packets Received");
6144	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6145			CTLFLAG_RD, &stats->mprc,
6146			"Multicast Packets Received");
6147	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6148			CTLFLAG_RD, &stats->prc64,
6149			"64 byte frames received");
6150	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6151			CTLFLAG_RD, &stats->prc127,
6152			"65-127 byte frames received");
6153	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6154			CTLFLAG_RD, &stats->prc255,
6155			"128-255 byte frames received");
6156	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6157			CTLFLAG_RD, &stats->prc511,
6158			"256-511 byte frames received");
6159	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6160			CTLFLAG_RD, &stats->prc1023,
6161			"512-1023 byte frames received");
6162	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6163			CTLFLAG_RD, &stats->prc1522,
6164			"1023-1522 byte frames received");
6165 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6166 			CTLFLAG_RD, &stats->gorc,
6167			"Good Octets Received");
6168	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
6169			CTLFLAG_RD, &stats->tor,
6170			"Total Octets Received");
6171
6172	/* Packet Transmission Stats */
6173 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6174 			CTLFLAG_RD, &stats->gotc,
6175 			"Good Octets Transmitted");
6176	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
6177			CTLFLAG_RD, &stats->tot,
6178			"Total Octets Transmitted");
6179	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6180			CTLFLAG_RD, &stats->tpt,
6181			"Total Packets Transmitted");
6182	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6183			CTLFLAG_RD, &stats->gptc,
6184			"Good Packets Transmitted");
6185	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6186			CTLFLAG_RD, &stats->bptc,
6187			"Broadcast Packets Transmitted");
6188	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6189			CTLFLAG_RD, &stats->mptc,
6190			"Multicast Packets Transmitted");
6191	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6192			CTLFLAG_RD, &stats->ptc64,
6193			"64 byte frames transmitted");
6194	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6195			CTLFLAG_RD, &stats->ptc127,
6196			"65-127 byte frames transmitted");
6197	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6198			CTLFLAG_RD, &stats->ptc255,
6199			"128-255 byte frames transmitted");
6200	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6201			CTLFLAG_RD, &stats->ptc511,
6202			"256-511 byte frames transmitted");
6203	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6204			CTLFLAG_RD, &stats->ptc1023,
6205			"512-1023 byte frames transmitted");
6206	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6207			CTLFLAG_RD, &stats->ptc1522,
6208			"1024-1522 byte frames transmitted");
6209	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6210			CTLFLAG_RD, &stats->tsctc,
6211			"TSO Contexts Transmitted");
6212	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6213			CTLFLAG_RD, &stats->tsctfc,
6214			"TSO Contexts Failed");
6215
6216
6217	/* Interrupt Stats */
6218
6219	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
6220				    CTLFLAG_RD, NULL, "Interrupt Statistics");
6221	int_list = SYSCTL_CHILDREN(int_node);
6222
6223	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6224			CTLFLAG_RD, &stats->iac,
6225			"Interrupt Assertion Count");
6226
6227	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6228			CTLFLAG_RD, &stats->icrxptc,
6229			"Interrupt Cause Rx Pkt Timer Expire Count");
6230
6231	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6232			CTLFLAG_RD, &stats->icrxatc,
6233			"Interrupt Cause Rx Abs Timer Expire Count");
6234
6235	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6236			CTLFLAG_RD, &stats->ictxptc,
6237			"Interrupt Cause Tx Pkt Timer Expire Count");
6238
6239	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6240			CTLFLAG_RD, &stats->ictxatc,
6241			"Interrupt Cause Tx Abs Timer Expire Count");
6242
6243	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6244			CTLFLAG_RD, &stats->ictxqec,
6245			"Interrupt Cause Tx Queue Empty Count");
6246
6247	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6248			CTLFLAG_RD, &stats->ictxqmtc,
6249			"Interrupt Cause Tx Queue Min Thresh Count");
6250
6251	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6252			CTLFLAG_RD, &stats->icrxdmtc,
6253			"Interrupt Cause Rx Desc Min Thresh Count");
6254
6255	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6256			CTLFLAG_RD, &stats->icrxoc,
6257			"Interrupt Cause Receiver Overrun Count");
6258
6259	/* Host to Card Stats */
6260
6261	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
6262				    CTLFLAG_RD, NULL,
6263				    "Host to Card Statistics");
6264
6265	host_list = SYSCTL_CHILDREN(host_node);
6266
6267	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6268			CTLFLAG_RD, &stats->cbtmpc,
6269			"Circuit Breaker Tx Packet Count");
6270
6271	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6272			CTLFLAG_RD, &stats->htdpmc,
6273			"Host Transmit Discarded Packets");
6274
6275	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6276			CTLFLAG_RD, &stats->rpthc,
6277			"Rx Packets To Host");
6278
6279	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6280			CTLFLAG_RD, &stats->cbrmpc,
6281			"Circuit Breaker Rx Packet Count");
6282
6283	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6284			CTLFLAG_RD, &stats->cbrdpc,
6285			"Circuit Breaker Rx Dropped Count");
6286
6287	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6288			CTLFLAG_RD, &stats->hgptc,
6289			"Host Good Packets Tx Count");
6290
6291	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6292			CTLFLAG_RD, &stats->htcbdpc,
6293			"Host Tx Circuit Breaker Dropped Count");
6294
6295	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6296			CTLFLAG_RD, &stats->hgorc,
6297			"Host Good Octets Received Count");
6298
6299	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6300			CTLFLAG_RD, &stats->hgotc,
6301			"Host Good Octets Transmit Count");
6302
6303	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6304			CTLFLAG_RD, &stats->lenerrs,
6305			"Length Errors");
6306
6307	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6308			CTLFLAG_RD, &stats->scvpc,
6309			"SerDes/SGMII Code Violation Pkt Count");
6310
6311	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6312			CTLFLAG_RD, &stats->hrmpc,
6313			"Header Redirection Missed Packet Count");
6314}
6315
6316
6317/**********************************************************************
6318 *
6319 *  This routine provides a way to dump out the adapter eeprom,
6320 *  often a useful debug/service tool. This only dumps the first
6321 *  32 words, stuff that matters is in that extent.
6322 *
6323 **********************************************************************/
6324static int
6325igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6326{
6327	struct adapter *adapter;
6328	int error;
6329	int result;
6330
6331	result = -1;
6332	error = sysctl_handle_int(oidp, &result, 0, req);
6333
6334	if (error || !req->newptr)
6335		return (error);
6336
6337	/*
6338	 * This value will cause a hex dump of the
6339	 * first 32 16-bit words of the EEPROM to
6340	 * the screen.
6341	 */
6342	if (result == 1) {
6343		adapter = (struct adapter *)arg1;
6344		igb_print_nvm_info(adapter);
6345        }
6346
6347	return (error);
6348}
6349
6350static void
6351igb_print_nvm_info(struct adapter *adapter)
6352{
6353	u16	eeprom_data;
6354	int	i, j, row = 0;
6355
6356	/* Its a bit crude, but it gets the job done */
6357	printf("\nInterface EEPROM Dump:\n");
6358	printf("Offset\n0x0000  ");
6359	for (i = 0, j = 0; i < 32; i++, j++) {
6360		if (j == 8) { /* Make the offset block */
6361			j = 0; ++row;
6362			printf("\n0x00%x0  ",row);
6363		}
6364		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6365		printf("%04x ", eeprom_data);
6366	}
6367	printf("\n");
6368}
6369
6370static void
6371igb_set_sysctl_value(struct adapter *adapter, const char *name,
6372	const char *description, int *limit, int value)
6373{
6374	*limit = value;
6375	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6376	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6377	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6378}
6379
6380/*
6381** Set flow control using sysctl:
6382** Flow control values:
6383** 	0 - off
6384**	1 - rx pause
6385**	2 - tx pause
6386**	3 - full
6387*/
6388static int
6389igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6390{
6391	int		error;
6392	static int	input = 3; /* default is full */
6393	struct adapter	*adapter = (struct adapter *) arg1;
6394
6395	error = sysctl_handle_int(oidp, &input, 0, req);
6396
6397	if ((error) || (req->newptr == NULL))
6398		return (error);
6399
6400	switch (input) {
6401		case e1000_fc_rx_pause:
6402		case e1000_fc_tx_pause:
6403		case e1000_fc_full:
6404		case e1000_fc_none:
6405			adapter->hw.fc.requested_mode = input;
6406			adapter->fc = input;
6407			break;
6408		default:
6409			/* Do nothing */
6410			return (error);
6411	}
6412
6413	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6414	e1000_force_mac_fc(&adapter->hw);
6415	/* XXX TODO: update DROP_EN on each RX queue if appropriate */
6416	return (error);
6417}
6418
6419/*
6420** Manage DMA Coalesce:
6421** Control values:
6422** 	0/1 - off/on
6423**	Legal timer values are:
6424**	250,500,1000-10000 in thousands
6425*/
6426static int
6427igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6428{
6429	struct adapter *adapter = (struct adapter *) arg1;
6430	int		error;
6431
6432	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6433
6434	if ((error) || (req->newptr == NULL))
6435		return (error);
6436
6437	switch (adapter->dmac) {
6438		case 0:
6439			/* Disabling */
6440			break;
6441		case 1: /* Just enable and use default */
6442			adapter->dmac = 1000;
6443			break;
6444		case 250:
6445		case 500:
6446		case 1000:
6447		case 2000:
6448		case 3000:
6449		case 4000:
6450		case 5000:
6451		case 6000:
6452		case 7000:
6453		case 8000:
6454		case 9000:
6455		case 10000:
6456			/* Legal values - allow */
6457			break;
6458		default:
6459			/* Do nothing, illegal value */
6460			adapter->dmac = 0;
6461			return (EINVAL);
6462	}
6463	/* Reinit the interface */
6464	igb_init(adapter);
6465	return (error);
6466}
6467
6468/*
6469** Manage Energy Efficient Ethernet:
6470** Control values:
6471**     0/1 - enabled/disabled
6472*/
6473static int
6474igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6475{
6476	struct adapter	*adapter = (struct adapter *) arg1;
6477	int		error, value;
6478
6479	value = adapter->hw.dev_spec._82575.eee_disable;
6480	error = sysctl_handle_int(oidp, &value, 0, req);
6481	if (error || req->newptr == NULL)
6482		return (error);
6483	IGB_CORE_LOCK(adapter);
6484	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6485	igb_init_locked(adapter);
6486	IGB_CORE_UNLOCK(adapter);
6487	return (0);
6488}
6489