if_igb.c revision 241885
1/******************************************************************************
2
3  Copyright (c) 2001-2012, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 241885 2012-10-22 13:06:09Z eadler $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#if __FreeBSD_version >= 800000
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.3.5";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	/* required last entry */
162	{ 0, 0, 0, 0, 0}
163};
164
165/*********************************************************************
166 *  Table of branding strings for all supported NICs.
167 *********************************************************************/
168
169static char *igb_strings[] = {
170	"Intel(R) PRO/1000 Network Connection"
171};
172
173/*********************************************************************
174 *  Function prototypes
175 *********************************************************************/
176static int	igb_probe(device_t);
177static int	igb_attach(device_t);
178static int	igb_detach(device_t);
179static int	igb_shutdown(device_t);
180static int	igb_suspend(device_t);
181static int	igb_resume(device_t);
182#if __FreeBSD_version >= 800000
183static int	igb_mq_start(struct ifnet *, struct mbuf *);
184static int	igb_mq_start_locked(struct ifnet *,
185		    struct tx_ring *, struct mbuf *);
186static void	igb_qflush(struct ifnet *);
187static void	igb_deferred_mq_start(void *, int);
188#else
189static void	igb_start(struct ifnet *);
190static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
191#endif
192static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
193static void	igb_init(void *);
194static void	igb_init_locked(struct adapter *);
195static void	igb_stop(void *);
196static void	igb_media_status(struct ifnet *, struct ifmediareq *);
197static int	igb_media_change(struct ifnet *);
198static void	igb_identify_hardware(struct adapter *);
199static int	igb_allocate_pci_resources(struct adapter *);
200static int	igb_allocate_msix(struct adapter *);
201static int	igb_allocate_legacy(struct adapter *);
202static int	igb_setup_msix(struct adapter *);
203static void	igb_free_pci_resources(struct adapter *);
204static void	igb_local_timer(void *);
205static void	igb_reset(struct adapter *);
206static int	igb_setup_interface(device_t, struct adapter *);
207static int	igb_allocate_queues(struct adapter *);
208static void	igb_configure_queues(struct adapter *);
209
210static int	igb_allocate_transmit_buffers(struct tx_ring *);
211static void	igb_setup_transmit_structures(struct adapter *);
212static void	igb_setup_transmit_ring(struct tx_ring *);
213static void	igb_initialize_transmit_units(struct adapter *);
214static void	igb_free_transmit_structures(struct adapter *);
215static void	igb_free_transmit_buffers(struct tx_ring *);
216
217static int	igb_allocate_receive_buffers(struct rx_ring *);
218static int	igb_setup_receive_structures(struct adapter *);
219static int	igb_setup_receive_ring(struct rx_ring *);
220static void	igb_initialize_receive_units(struct adapter *);
221static void	igb_free_receive_structures(struct adapter *);
222static void	igb_free_receive_buffers(struct rx_ring *);
223static void	igb_free_receive_ring(struct rx_ring *);
224
225static void	igb_enable_intr(struct adapter *);
226static void	igb_disable_intr(struct adapter *);
227static void	igb_update_stats_counters(struct adapter *);
228static bool	igb_txeof(struct tx_ring *);
229
230static __inline	void igb_rx_discard(struct rx_ring *, int);
231static __inline void igb_rx_input(struct rx_ring *,
232		    struct ifnet *, struct mbuf *, u32);
233
234static bool	igb_rxeof(struct igb_queue *, int, int *);
235static void	igb_rx_checksum(u32, struct mbuf *, u32);
236static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
237static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
238		    struct ip *, struct tcphdr *);
239static void	igb_set_promisc(struct adapter *);
240static void	igb_disable_promisc(struct adapter *);
241static void	igb_set_multi(struct adapter *);
242static void	igb_update_link_status(struct adapter *);
243static void	igb_refresh_mbufs(struct rx_ring *, int);
244
245static void	igb_register_vlan(void *, struct ifnet *, u16);
246static void	igb_unregister_vlan(void *, struct ifnet *, u16);
247static void	igb_setup_vlan_hw_support(struct adapter *);
248
249static int	igb_xmit(struct tx_ring *, struct mbuf **);
250static int	igb_dma_malloc(struct adapter *, bus_size_t,
251		    struct igb_dma_alloc *, int);
252static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
253static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
254static void	igb_print_nvm_info(struct adapter *);
255static int 	igb_is_valid_ether_addr(u8 *);
256static void     igb_add_hw_stats(struct adapter *);
257
258static void	igb_vf_init_stats(struct adapter *);
259static void	igb_update_vf_stats_counters(struct adapter *);
260
261/* Management and WOL Support */
262static void	igb_init_manageability(struct adapter *);
263static void	igb_release_manageability(struct adapter *);
264static void     igb_get_hw_control(struct adapter *);
265static void     igb_release_hw_control(struct adapter *);
266static void     igb_enable_wakeup(device_t);
267static void     igb_led_func(void *, int);
268
269static int	igb_irq_fast(void *);
270static void	igb_msix_que(void *);
271static void	igb_msix_link(void *);
272static void	igb_handle_que(void *context, int pending);
273static void	igb_handle_link(void *context, int pending);
274static void	igb_handle_link_locked(struct adapter *);
275
276static void	igb_set_sysctl_value(struct adapter *, const char *,
277		    const char *, int *, int);
278static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
279static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
280static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
281
282#ifdef DEVICE_POLLING
283static poll_handler_t igb_poll;
284#endif /* POLLING */
285
286/*********************************************************************
287 *  FreeBSD Device Interface Entry Points
288 *********************************************************************/
289
290static device_method_t igb_methods[] = {
291	/* Device interface */
292	DEVMETHOD(device_probe, igb_probe),
293	DEVMETHOD(device_attach, igb_attach),
294	DEVMETHOD(device_detach, igb_detach),
295	DEVMETHOD(device_shutdown, igb_shutdown),
296	DEVMETHOD(device_suspend, igb_suspend),
297	DEVMETHOD(device_resume, igb_resume),
298	{0, 0}
299};
300
301static driver_t igb_driver = {
302	"igb", igb_methods, sizeof(struct adapter),
303};
304
305static devclass_t igb_devclass;
306DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
307MODULE_DEPEND(igb, pci, 1, 1, 1);
308MODULE_DEPEND(igb, ether, 1, 1, 1);
309
310/*********************************************************************
311 *  Tunable default values.
312 *********************************************************************/
313
314static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
315
316/* Descriptor defaults */
317static int igb_rxd = IGB_DEFAULT_RXD;
318static int igb_txd = IGB_DEFAULT_TXD;
319TUNABLE_INT("hw.igb.rxd", &igb_rxd);
320TUNABLE_INT("hw.igb.txd", &igb_txd);
321SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
322    "Number of receive descriptors per queue");
323SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
324    "Number of transmit descriptors per queue");
325
326/*
327** AIM: Adaptive Interrupt Moderation
328** which means that the interrupt rate
329** is varied over time based on the
330** traffic for that interrupt vector
331*/
332static int igb_enable_aim = TRUE;
333TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
334SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
335    "Enable adaptive interrupt moderation");
336
337/*
338 * MSIX should be the default for best performance,
339 * but this allows it to be forced off for testing.
340 */
341static int igb_enable_msix = 1;
342TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
343SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
344    "Enable MSI-X interrupts");
345
346/*
347** Tuneable Interrupt rate
348*/
349static int igb_max_interrupt_rate = 8000;
350TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
351SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
352    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
353
354/*
355** Header split causes the packet header to
356** be dma'd to a seperate mbuf from the payload.
357** this can have memory alignment benefits. But
358** another plus is that small packets often fit
359** into the header and thus use no cluster. Its
360** a very workload dependent type feature.
361*/
362static int igb_header_split = FALSE;
363TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
364SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
365    "Enable receive mbuf header split");
366
367/*
368** This will autoconfigure based on
369** the number of CPUs if left at 0.
370*/
371static int igb_num_queues = 0;
372TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
373SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
374    "Number of queues to configure, 0 indicates autoconfigure");
375
376/*
377** Global variable to store last used CPU when binding queues
378** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
379** queue is bound to a cpu.
380*/
381static int igb_last_bind_cpu = -1;
382
383/* How many packets rxeof tries to clean at a time */
384static int igb_rx_process_limit = 100;
385TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
386SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
387    &igb_rx_process_limit, 0,
388    "Maximum number of received packets to process at a time, -1 means unlimited");
389
390#ifdef DEV_NETMAP	/* see ixgbe.c for details */
391#include <dev/netmap/if_igb_netmap.h>
392#endif /* DEV_NETMAP */
393/*********************************************************************
394 *  Device identification routine
395 *
396 *  igb_probe determines if the driver should be loaded on
397 *  adapter based on PCI vendor/device id of the adapter.
398 *
399 *  return BUS_PROBE_DEFAULT on success, positive on failure
400 *********************************************************************/
401
402static int
403igb_probe(device_t dev)
404{
405	char		adapter_name[60];
406	uint16_t	pci_vendor_id = 0;
407	uint16_t	pci_device_id = 0;
408	uint16_t	pci_subvendor_id = 0;
409	uint16_t	pci_subdevice_id = 0;
410	igb_vendor_info_t *ent;
411
412	INIT_DEBUGOUT("igb_probe: begin");
413
414	pci_vendor_id = pci_get_vendor(dev);
415	if (pci_vendor_id != IGB_VENDOR_ID)
416		return (ENXIO);
417
418	pci_device_id = pci_get_device(dev);
419	pci_subvendor_id = pci_get_subvendor(dev);
420	pci_subdevice_id = pci_get_subdevice(dev);
421
422	ent = igb_vendor_info_array;
423	while (ent->vendor_id != 0) {
424		if ((pci_vendor_id == ent->vendor_id) &&
425		    (pci_device_id == ent->device_id) &&
426
427		    ((pci_subvendor_id == ent->subvendor_id) ||
428		    (ent->subvendor_id == PCI_ANY_ID)) &&
429
430		    ((pci_subdevice_id == ent->subdevice_id) ||
431		    (ent->subdevice_id == PCI_ANY_ID))) {
432			sprintf(adapter_name, "%s %s",
433				igb_strings[ent->index],
434				igb_driver_version);
435			device_set_desc_copy(dev, adapter_name);
436			return (BUS_PROBE_DEFAULT);
437		}
438		ent++;
439	}
440
441	return (ENXIO);
442}
443
444/*********************************************************************
445 *  Device initialization routine
446 *
447 *  The attach entry point is called when the driver is being loaded.
448 *  This routine identifies the type of hardware, allocates all resources
449 *  and initializes the hardware.
450 *
451 *  return 0 on success, positive on failure
452 *********************************************************************/
453
454static int
455igb_attach(device_t dev)
456{
457	struct adapter	*adapter;
458	int		error = 0;
459	u16		eeprom_data;
460
461	INIT_DEBUGOUT("igb_attach: begin");
462
463	if (resource_disabled("igb", device_get_unit(dev))) {
464		device_printf(dev, "Disabled by device hint\n");
465		return (ENXIO);
466	}
467
468	adapter = device_get_softc(dev);
469	adapter->dev = adapter->osdep.dev = dev;
470	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
471
472	/* SYSCTL stuff */
473	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
474	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
475	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
476	    igb_sysctl_nvm_info, "I", "NVM Information");
477
478	igb_set_sysctl_value(adapter, "enable_aim",
479	    "Interrupt Moderation", &adapter->enable_aim,
480	    igb_enable_aim);
481
482	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
483	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
484	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
485	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
486
487	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
488
489	/* Determine hardware and mac info */
490	igb_identify_hardware(adapter);
491
492	/* Setup PCI resources */
493	if (igb_allocate_pci_resources(adapter)) {
494		device_printf(dev, "Allocation of PCI resources failed\n");
495		error = ENXIO;
496		goto err_pci;
497	}
498
499	/* Do Shared Code initialization */
500	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
501		device_printf(dev, "Setup of Shared code failed\n");
502		error = ENXIO;
503		goto err_pci;
504	}
505
506	e1000_get_bus_info(&adapter->hw);
507
508	/* Sysctl for limiting the amount of work done in the taskqueue */
509	igb_set_sysctl_value(adapter, "rx_processing_limit",
510	    "max number of rx packets to process",
511	    &adapter->rx_process_limit, igb_rx_process_limit);
512
513	/*
514	 * Validate number of transmit and receive descriptors. It
515	 * must not exceed hardware maximum, and must be multiple
516	 * of E1000_DBA_ALIGN.
517	 */
518	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
519	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
520		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
521		    IGB_DEFAULT_TXD, igb_txd);
522		adapter->num_tx_desc = IGB_DEFAULT_TXD;
523	} else
524		adapter->num_tx_desc = igb_txd;
525	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
526	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
527		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
528		    IGB_DEFAULT_RXD, igb_rxd);
529		adapter->num_rx_desc = IGB_DEFAULT_RXD;
530	} else
531		adapter->num_rx_desc = igb_rxd;
532
533	adapter->hw.mac.autoneg = DO_AUTO_NEG;
534	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
535	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
536
537	/* Copper options */
538	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
539		adapter->hw.phy.mdix = AUTO_ALL_MODES;
540		adapter->hw.phy.disable_polarity_correction = FALSE;
541		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
542	}
543
544	/*
545	 * Set the frame limits assuming
546	 * standard ethernet sized frames.
547	 */
548	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
549	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
550
551	/*
552	** Allocate and Setup Queues
553	*/
554	if (igb_allocate_queues(adapter)) {
555		error = ENOMEM;
556		goto err_pci;
557	}
558
559	/* Allocate the appropriate stats memory */
560	if (adapter->vf_ifp) {
561		adapter->stats =
562		    (struct e1000_vf_stats *)malloc(sizeof \
563		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
564		igb_vf_init_stats(adapter);
565	} else
566		adapter->stats =
567		    (struct e1000_hw_stats *)malloc(sizeof \
568		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
569	if (adapter->stats == NULL) {
570		device_printf(dev, "Can not allocate stats memory\n");
571		error = ENOMEM;
572		goto err_late;
573	}
574
575	/* Allocate multicast array memory. */
576	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
577	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
578	if (adapter->mta == NULL) {
579		device_printf(dev, "Can not allocate multicast setup array\n");
580		error = ENOMEM;
581		goto err_late;
582	}
583
584	/* Some adapter-specific advanced features */
585	if (adapter->hw.mac.type >= e1000_i350) {
586		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
587		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
588		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
589		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
590		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
591		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
592		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
593		    adapter, 0, igb_sysctl_eee, "I",
594		    "Disable Energy Efficient Ethernet");
595		if (adapter->hw.phy.media_type == e1000_media_type_copper)
596			e1000_set_eee_i350(&adapter->hw);
597	}
598
599	/*
600	** Start from a known state, this is
601	** important in reading the nvm and
602	** mac from that.
603	*/
604	e1000_reset_hw(&adapter->hw);
605
606	/* Make sure we have a good EEPROM before we read from it */
607	if (((adapter->hw.mac.type != e1000_i210) &&
608	    (adapter->hw.mac.type != e1000_i211)) &&
609	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
610		/*
611		** Some PCI-E parts fail the first check due to
612		** the link being in sleep state, call it again,
613		** if it fails a second time its a real issue.
614		*/
615		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
616			device_printf(dev,
617			    "The EEPROM Checksum Is Not Valid\n");
618			error = EIO;
619			goto err_late;
620		}
621	}
622
623	/*
624	** Copy the permanent MAC address out of the EEPROM
625	*/
626	if (e1000_read_mac_addr(&adapter->hw) < 0) {
627		device_printf(dev, "EEPROM read error while reading MAC"
628		    " address\n");
629		error = EIO;
630		goto err_late;
631	}
632	/* Check its sanity */
633	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
634		device_printf(dev, "Invalid MAC address\n");
635		error = EIO;
636		goto err_late;
637	}
638
639	/* Setup OS specific network interface */
640	if (igb_setup_interface(dev, adapter) != 0)
641		goto err_late;
642
643	/* Now get a good starting state */
644	igb_reset(adapter);
645
646	/* Initialize statistics */
647	igb_update_stats_counters(adapter);
648
649	adapter->hw.mac.get_link_status = 1;
650	igb_update_link_status(adapter);
651
652	/* Indicate SOL/IDER usage */
653	if (e1000_check_reset_block(&adapter->hw))
654		device_printf(dev,
655		    "PHY reset is blocked due to SOL/IDER session.\n");
656
657	/* Determine if we have to control management hardware */
658	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
659
660	/*
661	 * Setup Wake-on-Lan
662	 */
663	/* APME bit in EEPROM is mapped to WUC.APME */
664	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
665	if (eeprom_data)
666		adapter->wol = E1000_WUFC_MAG;
667
668	/* Register for VLAN events */
669	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
670	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
671	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
672	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
673
674	igb_add_hw_stats(adapter);
675
676	/* Tell the stack that the interface is not active */
677	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
678	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
679
680	adapter->led_dev = led_create(igb_led_func, adapter,
681	    device_get_nameunit(dev));
682
683	/*
684	** Configure Interrupts
685	*/
686	if ((adapter->msix > 1) && (igb_enable_msix))
687		error = igb_allocate_msix(adapter);
688	else /* MSI or Legacy */
689		error = igb_allocate_legacy(adapter);
690	if (error)
691		goto err_late;
692
693#ifdef DEV_NETMAP
694	igb_netmap_attach(adapter);
695#endif /* DEV_NETMAP */
696	INIT_DEBUGOUT("igb_attach: end");
697
698	return (0);
699
700err_late:
701	igb_detach(dev);
702	igb_free_transmit_structures(adapter);
703	igb_free_receive_structures(adapter);
704	igb_release_hw_control(adapter);
705err_pci:
706	igb_free_pci_resources(adapter);
707	if (adapter->ifp != NULL)
708		if_free(adapter->ifp);
709	free(adapter->mta, M_DEVBUF);
710	IGB_CORE_LOCK_DESTROY(adapter);
711
712	return (error);
713}
714
715/*********************************************************************
716 *  Device removal routine
717 *
718 *  The detach entry point is called when the driver is being removed.
719 *  This routine stops the adapter and deallocates all the resources
720 *  that were allocated for driver operation.
721 *
722 *  return 0 on success, positive on failure
723 *********************************************************************/
724
725static int
726igb_detach(device_t dev)
727{
728	struct adapter	*adapter = device_get_softc(dev);
729	struct ifnet	*ifp = adapter->ifp;
730
731	INIT_DEBUGOUT("igb_detach: begin");
732
733	/* Make sure VLANS are not using driver */
734	if (adapter->ifp->if_vlantrunk != NULL) {
735		device_printf(dev,"Vlan in use, detach first\n");
736		return (EBUSY);
737	}
738
739	ether_ifdetach(adapter->ifp);
740
741	if (adapter->led_dev != NULL)
742		led_destroy(adapter->led_dev);
743
744#ifdef DEVICE_POLLING
745	if (ifp->if_capenable & IFCAP_POLLING)
746		ether_poll_deregister(ifp);
747#endif
748
749	IGB_CORE_LOCK(adapter);
750	adapter->in_detach = 1;
751	igb_stop(adapter);
752	IGB_CORE_UNLOCK(adapter);
753
754	e1000_phy_hw_reset(&adapter->hw);
755
756	/* Give control back to firmware */
757	igb_release_manageability(adapter);
758	igb_release_hw_control(adapter);
759
760	if (adapter->wol) {
761		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
762		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
763		igb_enable_wakeup(dev);
764	}
765
766	/* Unregister VLAN events */
767	if (adapter->vlan_attach != NULL)
768		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
769	if (adapter->vlan_detach != NULL)
770		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
771
772	callout_drain(&adapter->timer);
773
774#ifdef DEV_NETMAP
775	netmap_detach(adapter->ifp);
776#endif /* DEV_NETMAP */
777	igb_free_pci_resources(adapter);
778	bus_generic_detach(dev);
779	if_free(ifp);
780
781	igb_free_transmit_structures(adapter);
782	igb_free_receive_structures(adapter);
783	if (adapter->mta != NULL)
784		free(adapter->mta, M_DEVBUF);
785
786	IGB_CORE_LOCK_DESTROY(adapter);
787
788	return (0);
789}
790
791/*********************************************************************
792 *
793 *  Shutdown entry point
794 *
795 **********************************************************************/
796
797static int
798igb_shutdown(device_t dev)
799{
800	return igb_suspend(dev);
801}
802
803/*
804 * Suspend/resume device methods.
805 */
806static int
807igb_suspend(device_t dev)
808{
809	struct adapter *adapter = device_get_softc(dev);
810
811	IGB_CORE_LOCK(adapter);
812
813	igb_stop(adapter);
814
815        igb_release_manageability(adapter);
816	igb_release_hw_control(adapter);
817
818        if (adapter->wol) {
819                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
820                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
821                igb_enable_wakeup(dev);
822        }
823
824	IGB_CORE_UNLOCK(adapter);
825
826	return bus_generic_suspend(dev);
827}
828
829static int
830igb_resume(device_t dev)
831{
832	struct adapter *adapter = device_get_softc(dev);
833	struct tx_ring	*txr = adapter->tx_rings;
834	struct ifnet *ifp = adapter->ifp;
835
836	IGB_CORE_LOCK(adapter);
837	igb_init_locked(adapter);
838	igb_init_manageability(adapter);
839
840	if ((ifp->if_flags & IFF_UP) &&
841	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
842		for (int i = 0; i < adapter->num_queues; i++, txr++) {
843			IGB_TX_LOCK(txr);
844#if __FreeBSD_version >= 800000
845			/* Process the stack queue only if not depleted */
846			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
847			    !drbr_empty(ifp, txr->br))
848				igb_mq_start_locked(ifp, txr, NULL);
849#else
850			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
851				igb_start_locked(txr, ifp);
852#endif
853			IGB_TX_UNLOCK(txr);
854		}
855	}
856	IGB_CORE_UNLOCK(adapter);
857
858	return bus_generic_resume(dev);
859}
860
861
862#if __FreeBSD_version < 800000
863
864/*********************************************************************
865 *  Transmit entry point
866 *
867 *  igb_start is called by the stack to initiate a transmit.
868 *  The driver will remain in this routine as long as there are
869 *  packets to transmit and transmit resources are available.
870 *  In case resources are not available stack is notified and
871 *  the packet is requeued.
872 **********************************************************************/
873
874static void
875igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
876{
877	struct adapter	*adapter = ifp->if_softc;
878	struct mbuf	*m_head;
879
880	IGB_TX_LOCK_ASSERT(txr);
881
882	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
883	    IFF_DRV_RUNNING)
884		return;
885	if (!adapter->link_active)
886		return;
887
888	/* Call cleanup if number of TX descriptors low */
889	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
890		igb_txeof(txr);
891
892	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
893		if (txr->tx_avail <= IGB_MAX_SCATTER) {
894			txr->queue_status |= IGB_QUEUE_DEPLETED;
895			break;
896		}
897		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
898		if (m_head == NULL)
899			break;
900		/*
901		 *  Encapsulation can modify our pointer, and or make it
902		 *  NULL on failure.  In that event, we can't requeue.
903		 */
904		if (igb_xmit(txr, &m_head)) {
905			if (m_head != NULL)
906				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
907			if (txr->tx_avail <= IGB_MAX_SCATTER)
908				txr->queue_status |= IGB_QUEUE_DEPLETED;
909			break;
910		}
911
912		/* Send a copy of the frame to the BPF listener */
913		ETHER_BPF_MTAP(ifp, m_head);
914
915		/* Set watchdog on */
916		txr->watchdog_time = ticks;
917		txr->queue_status |= IGB_QUEUE_WORKING;
918	}
919}
920
921/*
922 * Legacy TX driver routine, called from the
923 * stack, always uses tx[0], and spins for it.
924 * Should not be used with multiqueue tx
925 */
926static void
927igb_start(struct ifnet *ifp)
928{
929	struct adapter	*adapter = ifp->if_softc;
930	struct tx_ring	*txr = adapter->tx_rings;
931
932	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
933		IGB_TX_LOCK(txr);
934		igb_start_locked(txr, ifp);
935		IGB_TX_UNLOCK(txr);
936	}
937	return;
938}
939
940#else /* __FreeBSD_version >= 800000 */
941
942/*
943** Multiqueue Transmit driver
944**
945*/
946static int
947igb_mq_start(struct ifnet *ifp, struct mbuf *m)
948{
949	struct adapter		*adapter = ifp->if_softc;
950	struct igb_queue	*que;
951	struct tx_ring		*txr;
952	int 			i, err = 0;
953
954	/* Which queue to use */
955	if ((m->m_flags & M_FLOWID) != 0)
956		i = m->m_pkthdr.flowid % adapter->num_queues;
957	else
958		i = curcpu % adapter->num_queues;
959
960	txr = &adapter->tx_rings[i];
961	que = &adapter->queues[i];
962	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
963	    IGB_TX_TRYLOCK(txr)) {
964		struct mbuf *pm = NULL;
965		/*
966		** Try to queue first to avoid
967		** out-of-order delivery, but
968		** settle for it if that fails
969		*/
970		if (m && drbr_enqueue(ifp, txr->br, m))
971			pm = m;
972		err = igb_mq_start_locked(ifp, txr, pm);
973		IGB_TX_UNLOCK(txr);
974	} else {
975		err = drbr_enqueue(ifp, txr->br, m);
976		taskqueue_enqueue(que->tq, &txr->txq_task);
977	}
978
979	return (err);
980}
981
982static int
983igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
984{
985	struct adapter  *adapter = txr->adapter;
986        struct mbuf     *next;
987        int             err = 0, enq;
988
989	IGB_TX_LOCK_ASSERT(txr);
990
991	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
992	    (txr->queue_status & IGB_QUEUE_DEPLETED) ||
993	    adapter->link_active == 0) {
994		if (m != NULL)
995			err = drbr_enqueue(ifp, txr->br, m);
996		return (err);
997	}
998
999	enq = 0;
1000	if (m == NULL) {
1001		next = drbr_dequeue(ifp, txr->br);
1002	} else if (drbr_needs_enqueue(ifp, txr->br)) {
1003		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
1004			return (err);
1005		next = drbr_dequeue(ifp, txr->br);
1006	} else
1007		next = m;
1008
1009	/* Process the queue */
1010	while (next != NULL) {
1011		if ((err = igb_xmit(txr, &next)) != 0) {
1012			if (next != NULL)
1013				err = drbr_enqueue(ifp, txr->br, next);
1014			break;
1015		}
1016		enq++;
1017		ifp->if_obytes += next->m_pkthdr.len;
1018		if (next->m_flags & M_MCAST)
1019			ifp->if_omcasts++;
1020		ETHER_BPF_MTAP(ifp, next);
1021		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1022			break;
1023		next = drbr_dequeue(ifp, txr->br);
1024	}
1025	if (enq > 0) {
1026		/* Set the watchdog */
1027		txr->queue_status |= IGB_QUEUE_WORKING;
1028		txr->watchdog_time = ticks;
1029	}
1030	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1031		igb_txeof(txr);
1032	if (txr->tx_avail <= IGB_MAX_SCATTER)
1033		txr->queue_status |= IGB_QUEUE_DEPLETED;
1034	return (err);
1035}
1036
1037/*
1038 * Called from a taskqueue to drain queued transmit packets.
1039 */
1040static void
1041igb_deferred_mq_start(void *arg, int pending)
1042{
1043	struct tx_ring *txr = arg;
1044	struct adapter *adapter = txr->adapter;
1045	struct ifnet *ifp = adapter->ifp;
1046
1047	IGB_TX_LOCK(txr);
1048	if (!drbr_empty(ifp, txr->br))
1049		igb_mq_start_locked(ifp, txr, NULL);
1050	IGB_TX_UNLOCK(txr);
1051}
1052
1053/*
1054** Flush all ring buffers
1055*/
1056static void
1057igb_qflush(struct ifnet *ifp)
1058{
1059	struct adapter	*adapter = ifp->if_softc;
1060	struct tx_ring	*txr = adapter->tx_rings;
1061	struct mbuf	*m;
1062
1063	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1064		IGB_TX_LOCK(txr);
1065		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1066			m_freem(m);
1067		IGB_TX_UNLOCK(txr);
1068	}
1069	if_qflush(ifp);
1070}
1071#endif /* __FreeBSD_version >= 800000 */
1072
1073/*********************************************************************
1074 *  Ioctl entry point
1075 *
1076 *  igb_ioctl is called when the user wants to configure the
1077 *  interface.
1078 *
1079 *  return 0 on success, positive on failure
1080 **********************************************************************/
1081
1082static int
1083igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1084{
1085	struct adapter	*adapter = ifp->if_softc;
1086	struct ifreq	*ifr = (struct ifreq *)data;
1087#if defined(INET) || defined(INET6)
1088	struct ifaddr	*ifa = (struct ifaddr *)data;
1089#endif
1090	bool		avoid_reset = FALSE;
1091	int		error = 0;
1092
1093	if (adapter->in_detach)
1094		return (error);
1095
1096	switch (command) {
1097	case SIOCSIFADDR:
1098#ifdef INET
1099		if (ifa->ifa_addr->sa_family == AF_INET)
1100			avoid_reset = TRUE;
1101#endif
1102#ifdef INET6
1103		if (ifa->ifa_addr->sa_family == AF_INET6)
1104			avoid_reset = TRUE;
1105#endif
1106		/*
1107		** Calling init results in link renegotiation,
1108		** so we avoid doing it when possible.
1109		*/
1110		if (avoid_reset) {
1111			ifp->if_flags |= IFF_UP;
1112			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1113				igb_init(adapter);
1114#ifdef INET
1115			if (!(ifp->if_flags & IFF_NOARP))
1116				arp_ifinit(ifp, ifa);
1117#endif
1118		} else
1119			error = ether_ioctl(ifp, command, data);
1120		break;
1121	case SIOCSIFMTU:
1122	    {
1123		int max_frame_size;
1124
1125		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1126
1127		IGB_CORE_LOCK(adapter);
1128		max_frame_size = 9234;
1129		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1130		    ETHER_CRC_LEN) {
1131			IGB_CORE_UNLOCK(adapter);
1132			error = EINVAL;
1133			break;
1134		}
1135
1136		ifp->if_mtu = ifr->ifr_mtu;
1137		adapter->max_frame_size =
1138		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1139		igb_init_locked(adapter);
1140		IGB_CORE_UNLOCK(adapter);
1141		break;
1142	    }
1143	case SIOCSIFFLAGS:
1144		IOCTL_DEBUGOUT("ioctl rcv'd:\
1145		    SIOCSIFFLAGS (Set Interface Flags)");
1146		IGB_CORE_LOCK(adapter);
1147		if (ifp->if_flags & IFF_UP) {
1148			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1149				if ((ifp->if_flags ^ adapter->if_flags) &
1150				    (IFF_PROMISC | IFF_ALLMULTI)) {
1151					igb_disable_promisc(adapter);
1152					igb_set_promisc(adapter);
1153				}
1154			} else
1155				igb_init_locked(adapter);
1156		} else
1157			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1158				igb_stop(adapter);
1159		adapter->if_flags = ifp->if_flags;
1160		IGB_CORE_UNLOCK(adapter);
1161		break;
1162	case SIOCADDMULTI:
1163	case SIOCDELMULTI:
1164		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1165		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1166			IGB_CORE_LOCK(adapter);
1167			igb_disable_intr(adapter);
1168			igb_set_multi(adapter);
1169#ifdef DEVICE_POLLING
1170			if (!(ifp->if_capenable & IFCAP_POLLING))
1171#endif
1172				igb_enable_intr(adapter);
1173			IGB_CORE_UNLOCK(adapter);
1174		}
1175		break;
1176	case SIOCSIFMEDIA:
1177		/* Check SOL/IDER usage */
1178		IGB_CORE_LOCK(adapter);
1179		if (e1000_check_reset_block(&adapter->hw)) {
1180			IGB_CORE_UNLOCK(adapter);
1181			device_printf(adapter->dev, "Media change is"
1182			    " blocked due to SOL/IDER session.\n");
1183			break;
1184		}
1185		IGB_CORE_UNLOCK(adapter);
1186	case SIOCGIFMEDIA:
1187		IOCTL_DEBUGOUT("ioctl rcv'd: \
1188		    SIOCxIFMEDIA (Get/Set Interface Media)");
1189		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1190		break;
1191	case SIOCSIFCAP:
1192	    {
1193		int mask, reinit;
1194
1195		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1196		reinit = 0;
1197		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1198#ifdef DEVICE_POLLING
1199		if (mask & IFCAP_POLLING) {
1200			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1201				error = ether_poll_register(igb_poll, ifp);
1202				if (error)
1203					return (error);
1204				IGB_CORE_LOCK(adapter);
1205				igb_disable_intr(adapter);
1206				ifp->if_capenable |= IFCAP_POLLING;
1207				IGB_CORE_UNLOCK(adapter);
1208			} else {
1209				error = ether_poll_deregister(ifp);
1210				/* Enable interrupt even in error case */
1211				IGB_CORE_LOCK(adapter);
1212				igb_enable_intr(adapter);
1213				ifp->if_capenable &= ~IFCAP_POLLING;
1214				IGB_CORE_UNLOCK(adapter);
1215			}
1216		}
1217#endif
1218		if (mask & IFCAP_HWCSUM) {
1219			ifp->if_capenable ^= IFCAP_HWCSUM;
1220			reinit = 1;
1221		}
1222		if (mask & IFCAP_TSO4) {
1223			ifp->if_capenable ^= IFCAP_TSO4;
1224			reinit = 1;
1225		}
1226		if (mask & IFCAP_VLAN_HWTAGGING) {
1227			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1228			reinit = 1;
1229		}
1230		if (mask & IFCAP_VLAN_HWFILTER) {
1231			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1232			reinit = 1;
1233		}
1234		if (mask & IFCAP_VLAN_HWTSO) {
1235			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1236			reinit = 1;
1237		}
1238		if (mask & IFCAP_LRO) {
1239			ifp->if_capenable ^= IFCAP_LRO;
1240			reinit = 1;
1241		}
1242		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1243			igb_init(adapter);
1244		VLAN_CAPABILITIES(ifp);
1245		break;
1246	    }
1247
1248	default:
1249		error = ether_ioctl(ifp, command, data);
1250		break;
1251	}
1252
1253	return (error);
1254}
1255
1256
1257/*********************************************************************
1258 *  Init entry point
1259 *
1260 *  This routine is used in two ways. It is used by the stack as
1261 *  init entry point in network interface structure. It is also used
1262 *  by the driver as a hw/sw initialization routine to get to a
1263 *  consistent state.
1264 *
1265 *  return 0 on success, positive on failure
1266 **********************************************************************/
1267
1268static void
1269igb_init_locked(struct adapter *adapter)
1270{
1271	struct ifnet	*ifp = adapter->ifp;
1272	device_t	dev = adapter->dev;
1273
1274	INIT_DEBUGOUT("igb_init: begin");
1275
1276	IGB_CORE_LOCK_ASSERT(adapter);
1277
1278	igb_disable_intr(adapter);
1279	callout_stop(&adapter->timer);
1280
1281	/* Get the latest mac address, User can use a LAA */
1282        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1283              ETHER_ADDR_LEN);
1284
1285	/* Put the address into the Receive Address Array */
1286	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1287
1288	igb_reset(adapter);
1289	igb_update_link_status(adapter);
1290
1291	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1292
1293	/* Set hardware offload abilities */
1294	ifp->if_hwassist = 0;
1295	if (ifp->if_capenable & IFCAP_TXCSUM) {
1296		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1297#if __FreeBSD_version >= 800000
1298		if (adapter->hw.mac.type == e1000_82576)
1299			ifp->if_hwassist |= CSUM_SCTP;
1300#endif
1301	}
1302
1303	if (ifp->if_capenable & IFCAP_TSO4)
1304		ifp->if_hwassist |= CSUM_TSO;
1305
1306	/* Configure for OS presence */
1307	igb_init_manageability(adapter);
1308
1309	/* Prepare transmit descriptors and buffers */
1310	igb_setup_transmit_structures(adapter);
1311	igb_initialize_transmit_units(adapter);
1312
1313	/* Setup Multicast table */
1314	igb_set_multi(adapter);
1315
1316	/*
1317	** Figure out the desired mbuf pool
1318	** for doing jumbo/packetsplit
1319	*/
1320	if (adapter->max_frame_size <= 2048)
1321		adapter->rx_mbuf_sz = MCLBYTES;
1322	else if (adapter->max_frame_size <= 4096)
1323		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1324	else
1325		adapter->rx_mbuf_sz = MJUM9BYTES;
1326
1327	/* Prepare receive descriptors and buffers */
1328	if (igb_setup_receive_structures(adapter)) {
1329		device_printf(dev, "Could not setup receive structures\n");
1330		return;
1331	}
1332	igb_initialize_receive_units(adapter);
1333
1334        /* Enable VLAN support */
1335	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1336		igb_setup_vlan_hw_support(adapter);
1337
1338	/* Don't lose promiscuous settings */
1339	igb_set_promisc(adapter);
1340
1341	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1342	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1343
1344	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1345	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1346
1347	if (adapter->msix > 1) /* Set up queue routing */
1348		igb_configure_queues(adapter);
1349
1350	/* this clears any pending interrupts */
1351	E1000_READ_REG(&adapter->hw, E1000_ICR);
1352#ifdef DEVICE_POLLING
1353	/*
1354	 * Only enable interrupts if we are not polling, make sure
1355	 * they are off otherwise.
1356	 */
1357	if (ifp->if_capenable & IFCAP_POLLING)
1358		igb_disable_intr(adapter);
1359	else
1360#endif /* DEVICE_POLLING */
1361	{
1362		igb_enable_intr(adapter);
1363		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1364	}
1365
1366	/* Set Energy Efficient Ethernet */
1367	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1368		e1000_set_eee_i350(&adapter->hw);
1369}
1370
1371static void
1372igb_init(void *arg)
1373{
1374	struct adapter *adapter = arg;
1375
1376	IGB_CORE_LOCK(adapter);
1377	igb_init_locked(adapter);
1378	IGB_CORE_UNLOCK(adapter);
1379}
1380
1381
1382static void
1383igb_handle_que(void *context, int pending)
1384{
1385	struct igb_queue *que = context;
1386	struct adapter *adapter = que->adapter;
1387	struct tx_ring *txr = que->txr;
1388	struct ifnet	*ifp = adapter->ifp;
1389
1390	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1391		bool	more;
1392
1393		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1394
1395		IGB_TX_LOCK(txr);
1396		igb_txeof(txr);
1397#if __FreeBSD_version >= 800000
1398		/* Process the stack queue only if not depleted */
1399		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1400		    !drbr_empty(ifp, txr->br))
1401			igb_mq_start_locked(ifp, txr, NULL);
1402#else
1403		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1404			igb_start_locked(txr, ifp);
1405#endif
1406		IGB_TX_UNLOCK(txr);
1407		/* Do we need another? */
1408		if (more) {
1409			taskqueue_enqueue(que->tq, &que->que_task);
1410			return;
1411		}
1412	}
1413
1414#ifdef DEVICE_POLLING
1415	if (ifp->if_capenable & IFCAP_POLLING)
1416		return;
1417#endif
1418	/* Reenable this interrupt */
1419	if (que->eims)
1420		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1421	else
1422		igb_enable_intr(adapter);
1423}
1424
1425/* Deal with link in a sleepable context */
1426static void
1427igb_handle_link(void *context, int pending)
1428{
1429	struct adapter *adapter = context;
1430
1431	IGB_CORE_LOCK(adapter);
1432	igb_handle_link_locked(adapter);
1433	IGB_CORE_UNLOCK(adapter);
1434}
1435
1436static void
1437igb_handle_link_locked(struct adapter *adapter)
1438{
1439	struct tx_ring	*txr = adapter->tx_rings;
1440	struct ifnet *ifp = adapter->ifp;
1441
1442	IGB_CORE_LOCK_ASSERT(adapter);
1443	adapter->hw.mac.get_link_status = 1;
1444	igb_update_link_status(adapter);
1445	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1446		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1447			IGB_TX_LOCK(txr);
1448#if __FreeBSD_version >= 800000
1449			/* Process the stack queue only if not depleted */
1450			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1451			    !drbr_empty(ifp, txr->br))
1452				igb_mq_start_locked(ifp, txr, NULL);
1453#else
1454			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1455				igb_start_locked(txr, ifp);
1456#endif
1457			IGB_TX_UNLOCK(txr);
1458		}
1459	}
1460}
1461
1462/*********************************************************************
1463 *
1464 *  MSI/Legacy Deferred
1465 *  Interrupt Service routine
1466 *
1467 *********************************************************************/
1468static int
1469igb_irq_fast(void *arg)
1470{
1471	struct adapter		*adapter = arg;
1472	struct igb_queue	*que = adapter->queues;
1473	u32			reg_icr;
1474
1475
1476	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1477
1478	/* Hot eject?  */
1479	if (reg_icr == 0xffffffff)
1480		return FILTER_STRAY;
1481
1482	/* Definitely not our interrupt.  */
1483	if (reg_icr == 0x0)
1484		return FILTER_STRAY;
1485
1486	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1487		return FILTER_STRAY;
1488
1489	/*
1490	 * Mask interrupts until the taskqueue is finished running.  This is
1491	 * cheap, just assume that it is needed.  This also works around the
1492	 * MSI message reordering errata on certain systems.
1493	 */
1494	igb_disable_intr(adapter);
1495	taskqueue_enqueue(que->tq, &que->que_task);
1496
1497	/* Link status change */
1498	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1499		taskqueue_enqueue(que->tq, &adapter->link_task);
1500
1501	if (reg_icr & E1000_ICR_RXO)
1502		adapter->rx_overruns++;
1503	return FILTER_HANDLED;
1504}
1505
1506#ifdef DEVICE_POLLING
1507#if __FreeBSD_version >= 800000
1508#define POLL_RETURN_COUNT(a) (a)
1509static int
1510#else
1511#define POLL_RETURN_COUNT(a)
1512static void
1513#endif
1514igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1515{
1516	struct adapter		*adapter = ifp->if_softc;
1517	struct igb_queue	*que;
1518	struct tx_ring		*txr;
1519	u32			reg_icr, rx_done = 0;
1520	u32			loop = IGB_MAX_LOOP;
1521	bool			more;
1522
1523	IGB_CORE_LOCK(adapter);
1524	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1525		IGB_CORE_UNLOCK(adapter);
1526		return POLL_RETURN_COUNT(rx_done);
1527	}
1528
1529	if (cmd == POLL_AND_CHECK_STATUS) {
1530		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1531		/* Link status change */
1532		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1533			igb_handle_link_locked(adapter);
1534
1535		if (reg_icr & E1000_ICR_RXO)
1536			adapter->rx_overruns++;
1537	}
1538	IGB_CORE_UNLOCK(adapter);
1539
1540	for (int i = 0; i < adapter->num_queues; i++) {
1541		que = &adapter->queues[i];
1542		txr = que->txr;
1543
1544		igb_rxeof(que, count, &rx_done);
1545
1546		IGB_TX_LOCK(txr);
1547		do {
1548			more = igb_txeof(txr);
1549		} while (loop-- && more);
1550#if __FreeBSD_version >= 800000
1551		if (!drbr_empty(ifp, txr->br))
1552			igb_mq_start_locked(ifp, txr, NULL);
1553#else
1554		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1555			igb_start_locked(txr, ifp);
1556#endif
1557		IGB_TX_UNLOCK(txr);
1558	}
1559
1560	return POLL_RETURN_COUNT(rx_done);
1561}
1562#endif /* DEVICE_POLLING */
1563
1564/*********************************************************************
1565 *
1566 *  MSIX Que Interrupt Service routine
1567 *
1568 **********************************************************************/
1569static void
1570igb_msix_que(void *arg)
1571{
1572	struct igb_queue *que = arg;
1573	struct adapter *adapter = que->adapter;
1574	struct ifnet   *ifp = adapter->ifp;
1575	struct tx_ring *txr = que->txr;
1576	struct rx_ring *rxr = que->rxr;
1577	u32		newitr = 0;
1578	bool		more_rx;
1579
1580	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1581	++que->irqs;
1582
1583	IGB_TX_LOCK(txr);
1584	igb_txeof(txr);
1585#if __FreeBSD_version >= 800000
1586	/* Process the stack queue only if not depleted */
1587	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1588	    !drbr_empty(ifp, txr->br))
1589		igb_mq_start_locked(ifp, txr, NULL);
1590#else
1591	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1592		igb_start_locked(txr, ifp);
1593#endif
1594	IGB_TX_UNLOCK(txr);
1595
1596	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1597
1598	if (adapter->enable_aim == FALSE)
1599		goto no_calc;
1600	/*
1601	** Do Adaptive Interrupt Moderation:
1602        **  - Write out last calculated setting
1603	**  - Calculate based on average size over
1604	**    the last interval.
1605	*/
1606        if (que->eitr_setting)
1607                E1000_WRITE_REG(&adapter->hw,
1608                    E1000_EITR(que->msix), que->eitr_setting);
1609
1610        que->eitr_setting = 0;
1611
1612        /* Idle, do nothing */
1613        if ((txr->bytes == 0) && (rxr->bytes == 0))
1614                goto no_calc;
1615
1616        /* Used half Default if sub-gig */
1617        if (adapter->link_speed != 1000)
1618                newitr = IGB_DEFAULT_ITR / 2;
1619        else {
1620		if ((txr->bytes) && (txr->packets))
1621                	newitr = txr->bytes/txr->packets;
1622		if ((rxr->bytes) && (rxr->packets))
1623			newitr = max(newitr,
1624			    (rxr->bytes / rxr->packets));
1625                newitr += 24; /* account for hardware frame, crc */
1626		/* set an upper boundary */
1627		newitr = min(newitr, 3000);
1628		/* Be nice to the mid range */
1629                if ((newitr > 300) && (newitr < 1200))
1630                        newitr = (newitr / 3);
1631                else
1632                        newitr = (newitr / 2);
1633        }
1634        newitr &= 0x7FFC;  /* Mask invalid bits */
1635        if (adapter->hw.mac.type == e1000_82575)
1636                newitr |= newitr << 16;
1637        else
1638                newitr |= E1000_EITR_CNT_IGNR;
1639
1640        /* save for next interrupt */
1641        que->eitr_setting = newitr;
1642
1643        /* Reset state */
1644        txr->bytes = 0;
1645        txr->packets = 0;
1646        rxr->bytes = 0;
1647        rxr->packets = 0;
1648
1649no_calc:
1650	/* Schedule a clean task if needed*/
1651	if (more_rx)
1652		taskqueue_enqueue(que->tq, &que->que_task);
1653	else
1654		/* Reenable this interrupt */
1655		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1656	return;
1657}
1658
1659
1660/*********************************************************************
1661 *
1662 *  MSIX Link Interrupt Service routine
1663 *
1664 **********************************************************************/
1665
1666static void
1667igb_msix_link(void *arg)
1668{
1669	struct adapter	*adapter = arg;
1670	u32       	icr;
1671
1672	++adapter->link_irq;
1673	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1674	if (!(icr & E1000_ICR_LSC))
1675		goto spurious;
1676	igb_handle_link(adapter, 0);
1677
1678spurious:
1679	/* Rearm */
1680	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1681	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1682	return;
1683}
1684
1685
1686/*********************************************************************
1687 *
1688 *  Media Ioctl callback
1689 *
1690 *  This routine is called whenever the user queries the status of
1691 *  the interface using ifconfig.
1692 *
1693 **********************************************************************/
1694static void
1695igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1696{
1697	struct adapter *adapter = ifp->if_softc;
1698	u_char fiber_type = IFM_1000_SX;
1699
1700	INIT_DEBUGOUT("igb_media_status: begin");
1701
1702	IGB_CORE_LOCK(adapter);
1703	igb_update_link_status(adapter);
1704
1705	ifmr->ifm_status = IFM_AVALID;
1706	ifmr->ifm_active = IFM_ETHER;
1707
1708	if (!adapter->link_active) {
1709		IGB_CORE_UNLOCK(adapter);
1710		return;
1711	}
1712
1713	ifmr->ifm_status |= IFM_ACTIVE;
1714
1715	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1716	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1717		ifmr->ifm_active |= fiber_type | IFM_FDX;
1718	else {
1719		switch (adapter->link_speed) {
1720		case 10:
1721			ifmr->ifm_active |= IFM_10_T;
1722			break;
1723		case 100:
1724			ifmr->ifm_active |= IFM_100_TX;
1725			break;
1726		case 1000:
1727			ifmr->ifm_active |= IFM_1000_T;
1728			break;
1729		}
1730		if (adapter->link_duplex == FULL_DUPLEX)
1731			ifmr->ifm_active |= IFM_FDX;
1732		else
1733			ifmr->ifm_active |= IFM_HDX;
1734	}
1735	IGB_CORE_UNLOCK(adapter);
1736}
1737
1738/*********************************************************************
1739 *
1740 *  Media Ioctl callback
1741 *
1742 *  This routine is called when the user changes speed/duplex using
1743 *  media/mediopt option with ifconfig.
1744 *
1745 **********************************************************************/
1746static int
1747igb_media_change(struct ifnet *ifp)
1748{
1749	struct adapter *adapter = ifp->if_softc;
1750	struct ifmedia  *ifm = &adapter->media;
1751
1752	INIT_DEBUGOUT("igb_media_change: begin");
1753
1754	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1755		return (EINVAL);
1756
1757	IGB_CORE_LOCK(adapter);
1758	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1759	case IFM_AUTO:
1760		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1761		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1762		break;
1763	case IFM_1000_LX:
1764	case IFM_1000_SX:
1765	case IFM_1000_T:
1766		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1767		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1768		break;
1769	case IFM_100_TX:
1770		adapter->hw.mac.autoneg = FALSE;
1771		adapter->hw.phy.autoneg_advertised = 0;
1772		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1773			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1774		else
1775			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1776		break;
1777	case IFM_10_T:
1778		adapter->hw.mac.autoneg = FALSE;
1779		adapter->hw.phy.autoneg_advertised = 0;
1780		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1781			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1782		else
1783			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1784		break;
1785	default:
1786		device_printf(adapter->dev, "Unsupported media type\n");
1787	}
1788
1789	igb_init_locked(adapter);
1790	IGB_CORE_UNLOCK(adapter);
1791
1792	return (0);
1793}
1794
1795
1796/*********************************************************************
1797 *
1798 *  This routine maps the mbufs to Advanced TX descriptors.
1799 *
1800 **********************************************************************/
1801static int
1802igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1803{
1804	struct adapter		*adapter = txr->adapter;
1805	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1806	bus_dmamap_t		map;
1807	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1808	union e1000_adv_tx_desc	*txd = NULL;
1809	struct mbuf		*m_head = *m_headp;
1810	struct ether_vlan_header *eh = NULL;
1811	struct ip		*ip = NULL;
1812	struct tcphdr		*th = NULL;
1813	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1814	int			ehdrlen, poff;
1815	int			nsegs, i, first, last = 0;
1816	int			error, do_tso, remap = 1;
1817
1818	/* Set basic descriptor constants */
1819	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1820	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1821	if (m_head->m_flags & M_VLANTAG)
1822		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1823
1824retry:
1825	m_head = *m_headp;
1826	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1827	hdrlen = ehdrlen = poff = 0;
1828
1829	/*
1830	 * Intel recommends entire IP/TCP header length reside in a single
1831	 * buffer. If multiple descriptors are used to describe the IP and
1832	 * TCP header, each descriptor should describe one or more
1833	 * complete headers; descriptors referencing only parts of headers
1834	 * are not supported. If all layer headers are not coalesced into
1835	 * a single buffer, each buffer should not cross a 4KB boundary,
1836	 * or be larger than the maximum read request size.
1837	 * Controller also requires modifing IP/TCP header to make TSO work
1838	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1839	 * IP/TCP header into a single buffer to meet the requirement of
1840	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1841	 * which also has similiar restrictions.
1842	 */
1843	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1844		if (do_tso || (m_head->m_next != NULL &&
1845		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1846			if (M_WRITABLE(*m_headp) == 0) {
1847				m_head = m_dup(*m_headp, M_DONTWAIT);
1848				m_freem(*m_headp);
1849				if (m_head == NULL) {
1850					*m_headp = NULL;
1851					return (ENOBUFS);
1852				}
1853				*m_headp = m_head;
1854			}
1855		}
1856		/*
1857		 * Assume IPv4, we don't have TSO/checksum offload support
1858		 * for IPv6 yet.
1859		 */
1860		ehdrlen = sizeof(struct ether_header);
1861		m_head = m_pullup(m_head, ehdrlen);
1862		if (m_head == NULL) {
1863			*m_headp = NULL;
1864			return (ENOBUFS);
1865		}
1866		eh = mtod(m_head, struct ether_vlan_header *);
1867		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1868			ehdrlen = sizeof(struct ether_vlan_header);
1869			m_head = m_pullup(m_head, ehdrlen);
1870			if (m_head == NULL) {
1871				*m_headp = NULL;
1872				return (ENOBUFS);
1873			}
1874		}
1875		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1876		if (m_head == NULL) {
1877			*m_headp = NULL;
1878			return (ENOBUFS);
1879		}
1880		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1881		poff = ehdrlen + (ip->ip_hl << 2);
1882		if (do_tso) {
1883			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1884			if (m_head == NULL) {
1885				*m_headp = NULL;
1886				return (ENOBUFS);
1887			}
1888			/*
1889			 * The pseudo TCP checksum does not include TCP payload
1890			 * length so driver should recompute the checksum here
1891			 * what hardware expect to see. This is adherence of
1892			 * Microsoft's Large Send specification.
1893			 */
1894			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1895			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1896			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1897			/* Keep track of the full header length */
1898			hdrlen = poff + (th->th_off << 2);
1899		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1900			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1901			if (m_head == NULL) {
1902				*m_headp = NULL;
1903				return (ENOBUFS);
1904			}
1905			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1906			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1907			if (m_head == NULL) {
1908				*m_headp = NULL;
1909				return (ENOBUFS);
1910			}
1911			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1912			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1913		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1914			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1915			if (m_head == NULL) {
1916				*m_headp = NULL;
1917				return (ENOBUFS);
1918			}
1919			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1920		}
1921		*m_headp = m_head;
1922	}
1923
1924	/*
1925	 * Map the packet for DMA
1926	 *
1927	 * Capture the first descriptor index,
1928	 * this descriptor will have the index
1929	 * of the EOP which is the only one that
1930	 * now gets a DONE bit writeback.
1931	 */
1932	first = txr->next_avail_desc;
1933	tx_buffer = &txr->tx_buffers[first];
1934	tx_buffer_mapped = tx_buffer;
1935	map = tx_buffer->map;
1936
1937	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1938	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1939
1940	/*
1941	 * There are two types of errors we can (try) to handle:
1942	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1943	 *   out of segments.  Defragment the mbuf chain and try again.
1944	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1945	 *   at this point in time.  Defer sending and try again later.
1946	 * All other errors, in particular EINVAL, are fatal and prevent the
1947	 * mbuf chain from ever going through.  Drop it and report error.
1948	 */
1949	if (error == EFBIG && remap) {
1950		struct mbuf *m;
1951
1952		m = m_defrag(*m_headp, M_DONTWAIT);
1953		if (m == NULL) {
1954			adapter->mbuf_defrag_failed++;
1955			m_freem(*m_headp);
1956			*m_headp = NULL;
1957			return (ENOBUFS);
1958		}
1959		*m_headp = m;
1960
1961		/* Try it again, but only once */
1962		remap = 0;
1963		goto retry;
1964	} else if (error == ENOMEM) {
1965		adapter->no_tx_dma_setup++;
1966		return (error);
1967	} else if (error != 0) {
1968		adapter->no_tx_dma_setup++;
1969		m_freem(*m_headp);
1970		*m_headp = NULL;
1971		return (error);
1972	}
1973
1974	/*
1975	** Make sure we don't overrun the ring,
1976	** we need nsegs descriptors and one for
1977	** the context descriptor used for the
1978	** offloads.
1979	*/
1980        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1981                txr->no_desc_avail++;
1982		bus_dmamap_unload(txr->txtag, map);
1983		return (ENOBUFS);
1984        }
1985	m_head = *m_headp;
1986
1987	/* Do hardware assists:
1988         * Set up the context descriptor, used
1989         * when any hardware offload is done.
1990         * This includes CSUM, VLAN, and TSO.
1991         * It will use the first descriptor.
1992         */
1993
1994	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1995		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1996			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1997			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1998			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1999		} else
2000			return (ENXIO);
2001	} else if (igb_tx_ctx_setup(txr, m_head))
2002			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2003
2004	/* Calculate payload length */
2005	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2006	    << E1000_ADVTXD_PAYLEN_SHIFT);
2007
2008	/* 82575 needs the queue index added */
2009	if (adapter->hw.mac.type == e1000_82575)
2010		olinfo_status |= txr->me << 4;
2011
2012	/* Set up our transmit descriptors */
2013	i = txr->next_avail_desc;
2014	for (int j = 0; j < nsegs; j++) {
2015		bus_size_t seg_len;
2016		bus_addr_t seg_addr;
2017
2018		tx_buffer = &txr->tx_buffers[i];
2019		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2020		seg_addr = segs[j].ds_addr;
2021		seg_len  = segs[j].ds_len;
2022
2023		txd->read.buffer_addr = htole64(seg_addr);
2024		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2025		txd->read.olinfo_status = htole32(olinfo_status);
2026		last = i;
2027		if (++i == adapter->num_tx_desc)
2028			i = 0;
2029		tx_buffer->m_head = NULL;
2030		tx_buffer->next_eop = -1;
2031	}
2032
2033	txr->next_avail_desc = i;
2034	txr->tx_avail -= nsegs;
2035        tx_buffer->m_head = m_head;
2036
2037	/*
2038	** Here we swap the map so the last descriptor,
2039	** which gets the completion interrupt has the
2040	** real map, and the first descriptor gets the
2041	** unused map from this descriptor.
2042	*/
2043	tx_buffer_mapped->map = tx_buffer->map;
2044	tx_buffer->map = map;
2045        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2046
2047        /*
2048         * Last Descriptor of Packet
2049	 * needs End Of Packet (EOP)
2050	 * and Report Status (RS)
2051         */
2052        txd->read.cmd_type_len |=
2053	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2054	/*
2055	 * Keep track in the first buffer which
2056	 * descriptor will be written back
2057	 */
2058	tx_buffer = &txr->tx_buffers[first];
2059	tx_buffer->next_eop = last;
2060	/* Update the watchdog time early and often */
2061	txr->watchdog_time = ticks;
2062
2063	/*
2064	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2065	 * that this frame is available to transmit.
2066	 */
2067	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2068	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2069	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2070	++txr->tx_packets;
2071
2072	return (0);
2073}
2074static void
2075igb_set_promisc(struct adapter *adapter)
2076{
2077	struct ifnet	*ifp = adapter->ifp;
2078	struct e1000_hw *hw = &adapter->hw;
2079	u32		reg;
2080
2081	if (adapter->vf_ifp) {
2082		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2083		return;
2084	}
2085
2086	reg = E1000_READ_REG(hw, E1000_RCTL);
2087	if (ifp->if_flags & IFF_PROMISC) {
2088		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2089		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2090	} else if (ifp->if_flags & IFF_ALLMULTI) {
2091		reg |= E1000_RCTL_MPE;
2092		reg &= ~E1000_RCTL_UPE;
2093		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2094	}
2095}
2096
2097static void
2098igb_disable_promisc(struct adapter *adapter)
2099{
2100	struct e1000_hw *hw = &adapter->hw;
2101	u32		reg;
2102
2103	if (adapter->vf_ifp) {
2104		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2105		return;
2106	}
2107	reg = E1000_READ_REG(hw, E1000_RCTL);
2108	reg &=  (~E1000_RCTL_UPE);
2109	reg &=  (~E1000_RCTL_MPE);
2110	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2111}
2112
2113
2114/*********************************************************************
2115 *  Multicast Update
2116 *
2117 *  This routine is called whenever multicast address list is updated.
2118 *
2119 **********************************************************************/
2120
2121static void
2122igb_set_multi(struct adapter *adapter)
2123{
2124	struct ifnet	*ifp = adapter->ifp;
2125	struct ifmultiaddr *ifma;
2126	u32 reg_rctl = 0;
2127	u8  *mta;
2128
2129	int mcnt = 0;
2130
2131	IOCTL_DEBUGOUT("igb_set_multi: begin");
2132
2133	mta = adapter->mta;
2134	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2135	    MAX_NUM_MULTICAST_ADDRESSES);
2136
2137#if __FreeBSD_version < 800000
2138	IF_ADDR_LOCK(ifp);
2139#else
2140	if_maddr_rlock(ifp);
2141#endif
2142	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2143		if (ifma->ifma_addr->sa_family != AF_LINK)
2144			continue;
2145
2146		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2147			break;
2148
2149		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2150		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2151		mcnt++;
2152	}
2153#if __FreeBSD_version < 800000
2154	IF_ADDR_UNLOCK(ifp);
2155#else
2156	if_maddr_runlock(ifp);
2157#endif
2158
2159	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2160		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2161		reg_rctl |= E1000_RCTL_MPE;
2162		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2163	} else
2164		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2165}
2166
2167
2168/*********************************************************************
2169 *  Timer routine:
2170 *  	This routine checks for link status,
2171 *	updates statistics, and does the watchdog.
2172 *
2173 **********************************************************************/
2174
2175static void
2176igb_local_timer(void *arg)
2177{
2178	struct adapter		*adapter = arg;
2179	device_t		dev = adapter->dev;
2180	struct ifnet		*ifp = adapter->ifp;
2181	struct tx_ring		*txr = adapter->tx_rings;
2182	struct igb_queue	*que = adapter->queues;
2183	int			hung = 0, busy = 0;
2184
2185
2186	IGB_CORE_LOCK_ASSERT(adapter);
2187
2188	igb_update_link_status(adapter);
2189	igb_update_stats_counters(adapter);
2190
2191        /*
2192        ** Check the TX queues status
2193	**	- central locked handling of OACTIVE
2194	**	- watchdog only if all queues show hung
2195        */
2196	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2197		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2198		    (adapter->pause_frames == 0))
2199			++hung;
2200		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2201			++busy;
2202		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2203			taskqueue_enqueue(que->tq, &que->que_task);
2204	}
2205	if (hung == adapter->num_queues)
2206		goto timeout;
2207	if (busy == adapter->num_queues)
2208		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2209	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2210	    (busy < adapter->num_queues))
2211		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2212
2213	adapter->pause_frames = 0;
2214	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2215#ifndef DEVICE_POLLING
2216	/* Schedule all queue interrupts - deadlock protection */
2217	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2218#endif
2219	return;
2220
2221timeout:
2222	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2223	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2224            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2225            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2226	device_printf(dev,"TX(%d) desc avail = %d,"
2227            "Next TX to Clean = %d\n",
2228            txr->me, txr->tx_avail, txr->next_to_clean);
2229	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2230	adapter->watchdog_events++;
2231	igb_init_locked(adapter);
2232}
2233
2234static void
2235igb_update_link_status(struct adapter *adapter)
2236{
2237	struct e1000_hw *hw = &adapter->hw;
2238	struct ifnet *ifp = adapter->ifp;
2239	device_t dev = adapter->dev;
2240	struct tx_ring *txr = adapter->tx_rings;
2241	u32 link_check, thstat, ctrl;
2242
2243	link_check = thstat = ctrl = 0;
2244
2245	/* Get the cached link value or read for real */
2246        switch (hw->phy.media_type) {
2247        case e1000_media_type_copper:
2248                if (hw->mac.get_link_status) {
2249			/* Do the work to read phy */
2250                        e1000_check_for_link(hw);
2251                        link_check = !hw->mac.get_link_status;
2252                } else
2253                        link_check = TRUE;
2254                break;
2255        case e1000_media_type_fiber:
2256                e1000_check_for_link(hw);
2257                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2258                                 E1000_STATUS_LU);
2259                break;
2260        case e1000_media_type_internal_serdes:
2261                e1000_check_for_link(hw);
2262                link_check = adapter->hw.mac.serdes_has_link;
2263                break;
2264	/* VF device is type_unknown */
2265        case e1000_media_type_unknown:
2266                e1000_check_for_link(hw);
2267		link_check = !hw->mac.get_link_status;
2268		/* Fall thru */
2269        default:
2270                break;
2271        }
2272
2273	/* Check for thermal downshift or shutdown */
2274	if (hw->mac.type == e1000_i350) {
2275		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2276		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2277	}
2278
2279	/* Now we check if a transition has happened */
2280	if (link_check && (adapter->link_active == 0)) {
2281		e1000_get_speed_and_duplex(&adapter->hw,
2282		    &adapter->link_speed, &adapter->link_duplex);
2283		if (bootverbose)
2284			device_printf(dev, "Link is up %d Mbps %s\n",
2285			    adapter->link_speed,
2286			    ((adapter->link_duplex == FULL_DUPLEX) ?
2287			    "Full Duplex" : "Half Duplex"));
2288		adapter->link_active = 1;
2289		ifp->if_baudrate = adapter->link_speed * 1000000;
2290		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2291		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2292			device_printf(dev, "Link: thermal downshift\n");
2293		/* This can sleep */
2294		if_link_state_change(ifp, LINK_STATE_UP);
2295	} else if (!link_check && (adapter->link_active == 1)) {
2296		ifp->if_baudrate = adapter->link_speed = 0;
2297		adapter->link_duplex = 0;
2298		if (bootverbose)
2299			device_printf(dev, "Link is Down\n");
2300		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2301		    (thstat & E1000_THSTAT_PWR_DOWN))
2302			device_printf(dev, "Link: thermal shutdown\n");
2303		adapter->link_active = 0;
2304		/* This can sleep */
2305		if_link_state_change(ifp, LINK_STATE_DOWN);
2306		/* Reset queue state */
2307		for (int i = 0; i < adapter->num_queues; i++, txr++)
2308			txr->queue_status = IGB_QUEUE_IDLE;
2309	}
2310}
2311
2312/*********************************************************************
2313 *
2314 *  This routine disables all traffic on the adapter by issuing a
2315 *  global reset on the MAC and deallocates TX/RX buffers.
2316 *
2317 **********************************************************************/
2318
2319static void
2320igb_stop(void *arg)
2321{
2322	struct adapter	*adapter = arg;
2323	struct ifnet	*ifp = adapter->ifp;
2324	struct tx_ring *txr = adapter->tx_rings;
2325
2326	IGB_CORE_LOCK_ASSERT(adapter);
2327
2328	INIT_DEBUGOUT("igb_stop: begin");
2329
2330	igb_disable_intr(adapter);
2331
2332	callout_stop(&adapter->timer);
2333
2334	/* Tell the stack that the interface is no longer active */
2335	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2336	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2337
2338	/* Disarm watchdog timer. */
2339	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2340		IGB_TX_LOCK(txr);
2341		txr->queue_status = IGB_QUEUE_IDLE;
2342		IGB_TX_UNLOCK(txr);
2343	}
2344
2345	e1000_reset_hw(&adapter->hw);
2346	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2347
2348	e1000_led_off(&adapter->hw);
2349	e1000_cleanup_led(&adapter->hw);
2350}
2351
2352
2353/*********************************************************************
2354 *
2355 *  Determine hardware revision.
2356 *
2357 **********************************************************************/
2358static void
2359igb_identify_hardware(struct adapter *adapter)
2360{
2361	device_t dev = adapter->dev;
2362
2363	/* Make sure our PCI config space has the necessary stuff set */
2364	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2365	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2366	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2367		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2368		    "bits were not set!\n");
2369		adapter->hw.bus.pci_cmd_word |=
2370		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2371		pci_write_config(dev, PCIR_COMMAND,
2372		    adapter->hw.bus.pci_cmd_word, 2);
2373	}
2374
2375	/* Save off the information about this board */
2376	adapter->hw.vendor_id = pci_get_vendor(dev);
2377	adapter->hw.device_id = pci_get_device(dev);
2378	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2379	adapter->hw.subsystem_vendor_id =
2380	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2381	adapter->hw.subsystem_device_id =
2382	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2383
2384	/* Set MAC type early for PCI setup */
2385	e1000_set_mac_type(&adapter->hw);
2386
2387	/* Are we a VF device? */
2388	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2389	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2390		adapter->vf_ifp = 1;
2391	else
2392		adapter->vf_ifp = 0;
2393}
2394
2395static int
2396igb_allocate_pci_resources(struct adapter *adapter)
2397{
2398	device_t	dev = adapter->dev;
2399	int		rid;
2400
2401	rid = PCIR_BAR(0);
2402	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2403	    &rid, RF_ACTIVE);
2404	if (adapter->pci_mem == NULL) {
2405		device_printf(dev, "Unable to allocate bus resource: memory\n");
2406		return (ENXIO);
2407	}
2408	adapter->osdep.mem_bus_space_tag =
2409	    rman_get_bustag(adapter->pci_mem);
2410	adapter->osdep.mem_bus_space_handle =
2411	    rman_get_bushandle(adapter->pci_mem);
2412	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2413
2414	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2415
2416	/* This will setup either MSI/X or MSI */
2417	adapter->msix = igb_setup_msix(adapter);
2418	adapter->hw.back = &adapter->osdep;
2419
2420	return (0);
2421}
2422
2423/*********************************************************************
2424 *
2425 *  Setup the Legacy or MSI Interrupt handler
2426 *
2427 **********************************************************************/
2428static int
2429igb_allocate_legacy(struct adapter *adapter)
2430{
2431	device_t		dev = adapter->dev;
2432	struct igb_queue	*que = adapter->queues;
2433	struct tx_ring		*txr = adapter->tx_rings;
2434	int			error, rid = 0;
2435
2436	/* Turn off all interrupts */
2437	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2438
2439	/* MSI RID is 1 */
2440	if (adapter->msix == 1)
2441		rid = 1;
2442
2443	/* We allocate a single interrupt resource */
2444	adapter->res = bus_alloc_resource_any(dev,
2445	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2446	if (adapter->res == NULL) {
2447		device_printf(dev, "Unable to allocate bus resource: "
2448		    "interrupt\n");
2449		return (ENXIO);
2450	}
2451
2452#if __FreeBSD_version >= 800000
2453	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2454#endif
2455
2456	/*
2457	 * Try allocating a fast interrupt and the associated deferred
2458	 * processing contexts.
2459	 */
2460	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2461	/* Make tasklet for deferred link handling */
2462	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2463	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2464	    taskqueue_thread_enqueue, &que->tq);
2465	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2466	    device_get_nameunit(adapter->dev));
2467	if ((error = bus_setup_intr(dev, adapter->res,
2468	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2469	    adapter, &adapter->tag)) != 0) {
2470		device_printf(dev, "Failed to register fast interrupt "
2471			    "handler: %d\n", error);
2472		taskqueue_free(que->tq);
2473		que->tq = NULL;
2474		return (error);
2475	}
2476
2477	return (0);
2478}
2479
2480
2481/*********************************************************************
2482 *
2483 *  Setup the MSIX Queue Interrupt handlers:
2484 *
2485 **********************************************************************/
2486static int
2487igb_allocate_msix(struct adapter *adapter)
2488{
2489	device_t		dev = adapter->dev;
2490	struct igb_queue	*que = adapter->queues;
2491	int			error, rid, vector = 0;
2492
2493	/* Be sure to start with all interrupts disabled */
2494	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2495	E1000_WRITE_FLUSH(&adapter->hw);
2496
2497	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2498		rid = vector +1;
2499		que->res = bus_alloc_resource_any(dev,
2500		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2501		if (que->res == NULL) {
2502			device_printf(dev,
2503			    "Unable to allocate bus resource: "
2504			    "MSIX Queue Interrupt\n");
2505			return (ENXIO);
2506		}
2507		error = bus_setup_intr(dev, que->res,
2508	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2509		    igb_msix_que, que, &que->tag);
2510		if (error) {
2511			que->res = NULL;
2512			device_printf(dev, "Failed to register Queue handler");
2513			return (error);
2514		}
2515#if __FreeBSD_version >= 800504
2516		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2517#endif
2518		que->msix = vector;
2519		if (adapter->hw.mac.type == e1000_82575)
2520			que->eims = E1000_EICR_TX_QUEUE0 << i;
2521		else
2522			que->eims = 1 << vector;
2523		/*
2524		** Bind the msix vector, and thus the
2525		** rings to the corresponding cpu.
2526		*/
2527		if (adapter->num_queues > 1) {
2528			if (igb_last_bind_cpu < 0)
2529				igb_last_bind_cpu = CPU_FIRST();
2530			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2531			device_printf(dev,
2532				"Bound queue %d to cpu %d\n",
2533				i,igb_last_bind_cpu);
2534			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2535		}
2536#if __FreeBSD_version >= 800000
2537		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2538		    que->txr);
2539#endif
2540		/* Make tasklet for deferred handling */
2541		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2542		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2543		    taskqueue_thread_enqueue, &que->tq);
2544		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2545		    device_get_nameunit(adapter->dev));
2546	}
2547
2548	/* And Link */
2549	rid = vector + 1;
2550	adapter->res = bus_alloc_resource_any(dev,
2551	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2552	if (adapter->res == NULL) {
2553		device_printf(dev,
2554		    "Unable to allocate bus resource: "
2555		    "MSIX Link Interrupt\n");
2556		return (ENXIO);
2557	}
2558	if ((error = bus_setup_intr(dev, adapter->res,
2559	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2560	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2561		device_printf(dev, "Failed to register Link handler");
2562		return (error);
2563	}
2564#if __FreeBSD_version >= 800504
2565	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2566#endif
2567	adapter->linkvec = vector;
2568
2569	return (0);
2570}
2571
2572
2573static void
2574igb_configure_queues(struct adapter *adapter)
2575{
2576	struct	e1000_hw	*hw = &adapter->hw;
2577	struct	igb_queue	*que;
2578	u32			tmp, ivar = 0, newitr = 0;
2579
2580	/* First turn on RSS capability */
2581	if (adapter->hw.mac.type != e1000_82575)
2582		E1000_WRITE_REG(hw, E1000_GPIE,
2583		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2584		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2585
2586	/* Turn on MSIX */
2587	switch (adapter->hw.mac.type) {
2588	case e1000_82580:
2589	case e1000_i350:
2590	case e1000_i210:
2591	case e1000_i211:
2592	case e1000_vfadapt:
2593	case e1000_vfadapt_i350:
2594		/* RX entries */
2595		for (int i = 0; i < adapter->num_queues; i++) {
2596			u32 index = i >> 1;
2597			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2598			que = &adapter->queues[i];
2599			if (i & 1) {
2600				ivar &= 0xFF00FFFF;
2601				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2602			} else {
2603				ivar &= 0xFFFFFF00;
2604				ivar |= que->msix | E1000_IVAR_VALID;
2605			}
2606			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2607		}
2608		/* TX entries */
2609		for (int i = 0; i < adapter->num_queues; i++) {
2610			u32 index = i >> 1;
2611			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2612			que = &adapter->queues[i];
2613			if (i & 1) {
2614				ivar &= 0x00FFFFFF;
2615				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2616			} else {
2617				ivar &= 0xFFFF00FF;
2618				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2619			}
2620			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2621			adapter->que_mask |= que->eims;
2622		}
2623
2624		/* And for the link interrupt */
2625		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2626		adapter->link_mask = 1 << adapter->linkvec;
2627		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2628		break;
2629	case e1000_82576:
2630		/* RX entries */
2631		for (int i = 0; i < adapter->num_queues; i++) {
2632			u32 index = i & 0x7; /* Each IVAR has two entries */
2633			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2634			que = &adapter->queues[i];
2635			if (i < 8) {
2636				ivar &= 0xFFFFFF00;
2637				ivar |= que->msix | E1000_IVAR_VALID;
2638			} else {
2639				ivar &= 0xFF00FFFF;
2640				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2641			}
2642			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2643			adapter->que_mask |= que->eims;
2644		}
2645		/* TX entries */
2646		for (int i = 0; i < adapter->num_queues; i++) {
2647			u32 index = i & 0x7; /* Each IVAR has two entries */
2648			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2649			que = &adapter->queues[i];
2650			if (i < 8) {
2651				ivar &= 0xFFFF00FF;
2652				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2653			} else {
2654				ivar &= 0x00FFFFFF;
2655				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2656			}
2657			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2658			adapter->que_mask |= que->eims;
2659		}
2660
2661		/* And for the link interrupt */
2662		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2663		adapter->link_mask = 1 << adapter->linkvec;
2664		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2665		break;
2666
2667	case e1000_82575:
2668                /* enable MSI-X support*/
2669		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2670                tmp |= E1000_CTRL_EXT_PBA_CLR;
2671                /* Auto-Mask interrupts upon ICR read. */
2672                tmp |= E1000_CTRL_EXT_EIAME;
2673                tmp |= E1000_CTRL_EXT_IRCA;
2674                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2675
2676		/* Queues */
2677		for (int i = 0; i < adapter->num_queues; i++) {
2678			que = &adapter->queues[i];
2679			tmp = E1000_EICR_RX_QUEUE0 << i;
2680			tmp |= E1000_EICR_TX_QUEUE0 << i;
2681			que->eims = tmp;
2682			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2683			    i, que->eims);
2684			adapter->que_mask |= que->eims;
2685		}
2686
2687		/* Link */
2688		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2689		    E1000_EIMS_OTHER);
2690		adapter->link_mask |= E1000_EIMS_OTHER;
2691	default:
2692		break;
2693	}
2694
2695	/* Set the starting interrupt rate */
2696	if (igb_max_interrupt_rate > 0)
2697		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2698
2699        if (hw->mac.type == e1000_82575)
2700                newitr |= newitr << 16;
2701        else
2702                newitr |= E1000_EITR_CNT_IGNR;
2703
2704	for (int i = 0; i < adapter->num_queues; i++) {
2705		que = &adapter->queues[i];
2706		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2707	}
2708
2709	return;
2710}
2711
2712
2713static void
2714igb_free_pci_resources(struct adapter *adapter)
2715{
2716	struct		igb_queue *que = adapter->queues;
2717	device_t	dev = adapter->dev;
2718	int		rid;
2719
2720	/*
2721	** There is a slight possibility of a failure mode
2722	** in attach that will result in entering this function
2723	** before interrupt resources have been initialized, and
2724	** in that case we do not want to execute the loops below
2725	** We can detect this reliably by the state of the adapter
2726	** res pointer.
2727	*/
2728	if (adapter->res == NULL)
2729		goto mem;
2730
2731	/*
2732	 * First release all the interrupt resources:
2733	 */
2734	for (int i = 0; i < adapter->num_queues; i++, que++) {
2735		rid = que->msix + 1;
2736		if (que->tag != NULL) {
2737			bus_teardown_intr(dev, que->res, que->tag);
2738			que->tag = NULL;
2739		}
2740		if (que->res != NULL)
2741			bus_release_resource(dev,
2742			    SYS_RES_IRQ, rid, que->res);
2743	}
2744
2745	/* Clean the Legacy or Link interrupt last */
2746	if (adapter->linkvec) /* we are doing MSIX */
2747		rid = adapter->linkvec + 1;
2748	else
2749		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2750
2751	que = adapter->queues;
2752	if (adapter->tag != NULL) {
2753		taskqueue_drain(que->tq, &adapter->link_task);
2754		bus_teardown_intr(dev, adapter->res, adapter->tag);
2755		adapter->tag = NULL;
2756	}
2757	if (adapter->res != NULL)
2758		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2759
2760	for (int i = 0; i < adapter->num_queues; i++, que++) {
2761		if (que->tq != NULL) {
2762#if __FreeBSD_version >= 800000
2763			taskqueue_drain(que->tq, &que->txr->txq_task);
2764#endif
2765			taskqueue_drain(que->tq, &que->que_task);
2766			taskqueue_free(que->tq);
2767		}
2768	}
2769mem:
2770	if (adapter->msix)
2771		pci_release_msi(dev);
2772
2773	if (adapter->msix_mem != NULL)
2774		bus_release_resource(dev, SYS_RES_MEMORY,
2775		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2776
2777	if (adapter->pci_mem != NULL)
2778		bus_release_resource(dev, SYS_RES_MEMORY,
2779		    PCIR_BAR(0), adapter->pci_mem);
2780
2781}
2782
2783/*
2784 * Setup Either MSI/X or MSI
2785 */
2786static int
2787igb_setup_msix(struct adapter *adapter)
2788{
2789	device_t dev = adapter->dev;
2790	int rid, want, queues, msgs, maxqueues;
2791
2792	/* tuneable override */
2793	if (igb_enable_msix == 0)
2794		goto msi;
2795
2796	/* First try MSI/X */
2797	rid = PCIR_BAR(IGB_MSIX_BAR);
2798	adapter->msix_mem = bus_alloc_resource_any(dev,
2799	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2800       	if (!adapter->msix_mem) {
2801		/* May not be enabled */
2802		device_printf(adapter->dev,
2803		    "Unable to map MSIX table \n");
2804		goto msi;
2805	}
2806
2807	msgs = pci_msix_count(dev);
2808	if (msgs == 0) { /* system has msix disabled */
2809		bus_release_resource(dev, SYS_RES_MEMORY,
2810		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2811		adapter->msix_mem = NULL;
2812		goto msi;
2813	}
2814
2815	/* Figure out a reasonable auto config value */
2816	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2817
2818	/* Manual override */
2819	if (igb_num_queues != 0)
2820		queues = igb_num_queues;
2821
2822	/* Sanity check based on HW */
2823	switch (adapter->hw.mac.type) {
2824		case e1000_82575:
2825			maxqueues = 4;
2826			break;
2827		case e1000_82576:
2828		case e1000_82580:
2829		case e1000_i350:
2830			maxqueues = 8;
2831			break;
2832		case e1000_i210:
2833			maxqueues = 4;
2834			break;
2835		case e1000_i211:
2836			maxqueues = 2;
2837			break;
2838		default:  /* VF interfaces */
2839			maxqueues = 1;
2840			break;
2841	}
2842	if (queues > maxqueues)
2843		queues = maxqueues;
2844
2845	/*
2846	** One vector (RX/TX pair) per queue
2847	** plus an additional for Link interrupt
2848	*/
2849	want = queues + 1;
2850	if (msgs >= want)
2851		msgs = want;
2852	else {
2853               	device_printf(adapter->dev,
2854		    "MSIX Configuration Problem, "
2855		    "%d vectors configured, but %d queues wanted!\n",
2856		    msgs, want);
2857		return (0);
2858	}
2859	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2860               	device_printf(adapter->dev,
2861		    "Using MSIX interrupts with %d vectors\n", msgs);
2862		adapter->num_queues = queues;
2863		return (msgs);
2864	}
2865msi:
2866       	msgs = pci_msi_count(dev);
2867	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2868		device_printf(adapter->dev," Using MSI interrupt\n");
2869		return (msgs);
2870	}
2871	return (0);
2872}
2873
2874/*********************************************************************
2875 *
2876 *  Set up an fresh starting state
2877 *
2878 **********************************************************************/
2879static void
2880igb_reset(struct adapter *adapter)
2881{
2882	device_t	dev = adapter->dev;
2883	struct e1000_hw *hw = &adapter->hw;
2884	struct e1000_fc_info *fc = &hw->fc;
2885	struct ifnet	*ifp = adapter->ifp;
2886	u32		pba = 0;
2887	u16		hwm;
2888
2889	INIT_DEBUGOUT("igb_reset: begin");
2890
2891	/* Let the firmware know the OS is in control */
2892	igb_get_hw_control(adapter);
2893
2894	/*
2895	 * Packet Buffer Allocation (PBA)
2896	 * Writing PBA sets the receive portion of the buffer
2897	 * the remainder is used for the transmit buffer.
2898	 */
2899	switch (hw->mac.type) {
2900	case e1000_82575:
2901		pba = E1000_PBA_32K;
2902		break;
2903	case e1000_82576:
2904	case e1000_vfadapt:
2905		pba = E1000_READ_REG(hw, E1000_RXPBS);
2906		pba &= E1000_RXPBS_SIZE_MASK_82576;
2907		break;
2908	case e1000_82580:
2909	case e1000_i350:
2910	case e1000_vfadapt_i350:
2911		pba = E1000_READ_REG(hw, E1000_RXPBS);
2912		pba = e1000_rxpbs_adjust_82580(pba);
2913		break;
2914	case e1000_i210:
2915	case e1000_i211:
2916		pba = E1000_PBA_34K;
2917	default:
2918		break;
2919	}
2920
2921	/* Special needs in case of Jumbo frames */
2922	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2923		u32 tx_space, min_tx, min_rx;
2924		pba = E1000_READ_REG(hw, E1000_PBA);
2925		tx_space = pba >> 16;
2926		pba &= 0xffff;
2927		min_tx = (adapter->max_frame_size +
2928		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2929		min_tx = roundup2(min_tx, 1024);
2930		min_tx >>= 10;
2931                min_rx = adapter->max_frame_size;
2932                min_rx = roundup2(min_rx, 1024);
2933                min_rx >>= 10;
2934		if (tx_space < min_tx &&
2935		    ((min_tx - tx_space) < pba)) {
2936			pba = pba - (min_tx - tx_space);
2937			/*
2938                         * if short on rx space, rx wins
2939                         * and must trump tx adjustment
2940			 */
2941                        if (pba < min_rx)
2942                                pba = min_rx;
2943		}
2944		E1000_WRITE_REG(hw, E1000_PBA, pba);
2945	}
2946
2947	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2948
2949	/*
2950	 * These parameters control the automatic generation (Tx) and
2951	 * response (Rx) to Ethernet PAUSE frames.
2952	 * - High water mark should allow for at least two frames to be
2953	 *   received after sending an XOFF.
2954	 * - Low water mark works best when it is very near the high water mark.
2955	 *   This allows the receiver to restart by sending XON when it has
2956	 *   drained a bit.
2957	 */
2958	hwm = min(((pba << 10) * 9 / 10),
2959	    ((pba << 10) - 2 * adapter->max_frame_size));
2960
2961	if (hw->mac.type < e1000_82576) {
2962		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2963		fc->low_water = fc->high_water - 8;
2964	} else {
2965		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2966		fc->low_water = fc->high_water - 16;
2967	}
2968
2969	fc->pause_time = IGB_FC_PAUSE_TIME;
2970	fc->send_xon = TRUE;
2971	if (adapter->fc)
2972		fc->requested_mode = adapter->fc;
2973	else
2974		fc->requested_mode = e1000_fc_default;
2975
2976	/* Issue a global reset */
2977	e1000_reset_hw(hw);
2978	E1000_WRITE_REG(hw, E1000_WUC, 0);
2979
2980	if (e1000_init_hw(hw) < 0)
2981		device_printf(dev, "Hardware Initialization Failed\n");
2982
2983	/* Setup DMA Coalescing */
2984	if ((hw->mac.type > e1000_82580) &&
2985	    (hw->mac.type != e1000_i211)) {
2986		u32 dmac;
2987		u32 reg = ~E1000_DMACR_DMAC_EN;
2988
2989		if (adapter->dmac == 0) { /* Disabling it */
2990			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2991			goto reset_out;
2992		}
2993
2994		/* Set starting thresholds */
2995		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2996		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2997
2998		hwm = 64 * pba - adapter->max_frame_size / 16;
2999		if (hwm < 64 * (pba - 6))
3000			hwm = 64 * (pba - 6);
3001		reg = E1000_READ_REG(hw, E1000_FCRTC);
3002		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3003		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3004		    & E1000_FCRTC_RTH_COAL_MASK);
3005		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3006
3007
3008		dmac = pba - adapter->max_frame_size / 512;
3009		if (dmac < pba - 10)
3010			dmac = pba - 10;
3011		reg = E1000_READ_REG(hw, E1000_DMACR);
3012		reg &= ~E1000_DMACR_DMACTHR_MASK;
3013		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3014		    & E1000_DMACR_DMACTHR_MASK);
3015		/* transition to L0x or L1 if available..*/
3016		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3017		/* timer = value in adapter->dmac in 32usec intervals */
3018		reg |= (adapter->dmac >> 5);
3019		E1000_WRITE_REG(hw, E1000_DMACR, reg);
3020
3021		/* Set the interval before transition */
3022		reg = E1000_READ_REG(hw, E1000_DMCTLX);
3023		reg |= 0x80000004;
3024		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3025
3026		/* free space in tx packet buffer to wake from DMA coal */
3027		E1000_WRITE_REG(hw, E1000_DMCTXTH,
3028		    (20480 - (2 * adapter->max_frame_size)) >> 6);
3029
3030		/* make low power state decision controlled by DMA coal */
3031		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3032		reg &= ~E1000_PCIEMISC_LX_DECISION;
3033		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3034		device_printf(dev, "DMA Coalescing enabled\n");
3035
3036	} else if (hw->mac.type == e1000_82580) {
3037		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3038		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3039		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3040		    reg & ~E1000_PCIEMISC_LX_DECISION);
3041	}
3042
3043reset_out:
3044	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3045	e1000_get_phy_info(hw);
3046	e1000_check_for_link(hw);
3047	return;
3048}
3049
3050/*********************************************************************
3051 *
3052 *  Setup networking device structure and register an interface.
3053 *
3054 **********************************************************************/
3055static int
3056igb_setup_interface(device_t dev, struct adapter *adapter)
3057{
3058	struct ifnet   *ifp;
3059
3060	INIT_DEBUGOUT("igb_setup_interface: begin");
3061
3062	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3063	if (ifp == NULL) {
3064		device_printf(dev, "can not allocate ifnet structure\n");
3065		return (-1);
3066	}
3067	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3068	ifp->if_init =  igb_init;
3069	ifp->if_softc = adapter;
3070	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3071	ifp->if_ioctl = igb_ioctl;
3072#if __FreeBSD_version >= 800000
3073	ifp->if_transmit = igb_mq_start;
3074	ifp->if_qflush = igb_qflush;
3075#else
3076	ifp->if_start = igb_start;
3077	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3078	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3079	IFQ_SET_READY(&ifp->if_snd);
3080#endif
3081
3082	ether_ifattach(ifp, adapter->hw.mac.addr);
3083
3084	ifp->if_capabilities = ifp->if_capenable = 0;
3085
3086	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3087	ifp->if_capabilities |= IFCAP_TSO4;
3088	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3089	ifp->if_capenable = ifp->if_capabilities;
3090
3091	/* Don't enable LRO by default */
3092	ifp->if_capabilities |= IFCAP_LRO;
3093
3094#ifdef DEVICE_POLLING
3095	ifp->if_capabilities |= IFCAP_POLLING;
3096#endif
3097
3098	/*
3099	 * Tell the upper layer(s) we
3100	 * support full VLAN capability.
3101	 */
3102	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3103	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3104			     |  IFCAP_VLAN_HWTSO
3105			     |  IFCAP_VLAN_MTU;
3106	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3107			  |  IFCAP_VLAN_HWTSO
3108			  |  IFCAP_VLAN_MTU;
3109
3110	/*
3111	** Don't turn this on by default, if vlans are
3112	** created on another pseudo device (eg. lagg)
3113	** then vlan events are not passed thru, breaking
3114	** operation, but with HW FILTER off it works. If
3115	** using vlans directly on the igb driver you can
3116	** enable this and get full hardware tag filtering.
3117	*/
3118	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3119
3120	/*
3121	 * Specify the media types supported by this adapter and register
3122	 * callbacks to update media and link information
3123	 */
3124	ifmedia_init(&adapter->media, IFM_IMASK,
3125	    igb_media_change, igb_media_status);
3126	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3127	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3128		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3129			    0, NULL);
3130		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3131	} else {
3132		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3133		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3134			    0, NULL);
3135		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3136			    0, NULL);
3137		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3138			    0, NULL);
3139		if (adapter->hw.phy.type != e1000_phy_ife) {
3140			ifmedia_add(&adapter->media,
3141				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3142			ifmedia_add(&adapter->media,
3143				IFM_ETHER | IFM_1000_T, 0, NULL);
3144		}
3145	}
3146	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3147	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3148	return (0);
3149}
3150
3151
3152/*
3153 * Manage DMA'able memory.
3154 */
3155static void
3156igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3157{
3158	if (error)
3159		return;
3160	*(bus_addr_t *) arg = segs[0].ds_addr;
3161}
3162
3163static int
3164igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3165        struct igb_dma_alloc *dma, int mapflags)
3166{
3167	int error;
3168
3169	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3170				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3171				BUS_SPACE_MAXADDR,	/* lowaddr */
3172				BUS_SPACE_MAXADDR,	/* highaddr */
3173				NULL, NULL,		/* filter, filterarg */
3174				size,			/* maxsize */
3175				1,			/* nsegments */
3176				size,			/* maxsegsize */
3177				0,			/* flags */
3178				NULL,			/* lockfunc */
3179				NULL,			/* lockarg */
3180				&dma->dma_tag);
3181	if (error) {
3182		device_printf(adapter->dev,
3183		    "%s: bus_dma_tag_create failed: %d\n",
3184		    __func__, error);
3185		goto fail_0;
3186	}
3187
3188	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3189	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3190	if (error) {
3191		device_printf(adapter->dev,
3192		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3193		    __func__, (uintmax_t)size, error);
3194		goto fail_2;
3195	}
3196
3197	dma->dma_paddr = 0;
3198	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3199	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3200	if (error || dma->dma_paddr == 0) {
3201		device_printf(adapter->dev,
3202		    "%s: bus_dmamap_load failed: %d\n",
3203		    __func__, error);
3204		goto fail_3;
3205	}
3206
3207	return (0);
3208
3209fail_3:
3210	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3211fail_2:
3212	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3213	bus_dma_tag_destroy(dma->dma_tag);
3214fail_0:
3215	dma->dma_map = NULL;
3216	dma->dma_tag = NULL;
3217
3218	return (error);
3219}
3220
3221static void
3222igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3223{
3224	if (dma->dma_tag == NULL)
3225		return;
3226	if (dma->dma_map != NULL) {
3227		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3228		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3229		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3230		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3231		dma->dma_map = NULL;
3232	}
3233	bus_dma_tag_destroy(dma->dma_tag);
3234	dma->dma_tag = NULL;
3235}
3236
3237
3238/*********************************************************************
3239 *
3240 *  Allocate memory for the transmit and receive rings, and then
3241 *  the descriptors associated with each, called only once at attach.
3242 *
3243 **********************************************************************/
3244static int
3245igb_allocate_queues(struct adapter *adapter)
3246{
3247	device_t dev = adapter->dev;
3248	struct igb_queue	*que = NULL;
3249	struct tx_ring		*txr = NULL;
3250	struct rx_ring		*rxr = NULL;
3251	int rsize, tsize, error = E1000_SUCCESS;
3252	int txconf = 0, rxconf = 0;
3253
3254	/* First allocate the top level queue structs */
3255	if (!(adapter->queues =
3256	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3257	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3258		device_printf(dev, "Unable to allocate queue memory\n");
3259		error = ENOMEM;
3260		goto fail;
3261	}
3262
3263	/* Next allocate the TX ring struct memory */
3264	if (!(adapter->tx_rings =
3265	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3266	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3267		device_printf(dev, "Unable to allocate TX ring memory\n");
3268		error = ENOMEM;
3269		goto tx_fail;
3270	}
3271
3272	/* Now allocate the RX */
3273	if (!(adapter->rx_rings =
3274	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3275	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3276		device_printf(dev, "Unable to allocate RX ring memory\n");
3277		error = ENOMEM;
3278		goto rx_fail;
3279	}
3280
3281	tsize = roundup2(adapter->num_tx_desc *
3282	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3283	/*
3284	 * Now set up the TX queues, txconf is needed to handle the
3285	 * possibility that things fail midcourse and we need to
3286	 * undo memory gracefully
3287	 */
3288	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3289		/* Set up some basics */
3290		txr = &adapter->tx_rings[i];
3291		txr->adapter = adapter;
3292		txr->me = i;
3293
3294		/* Initialize the TX lock */
3295		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3296		    device_get_nameunit(dev), txr->me);
3297		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3298
3299		if (igb_dma_malloc(adapter, tsize,
3300			&txr->txdma, BUS_DMA_NOWAIT)) {
3301			device_printf(dev,
3302			    "Unable to allocate TX Descriptor memory\n");
3303			error = ENOMEM;
3304			goto err_tx_desc;
3305		}
3306		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3307		bzero((void *)txr->tx_base, tsize);
3308
3309        	/* Now allocate transmit buffers for the ring */
3310        	if (igb_allocate_transmit_buffers(txr)) {
3311			device_printf(dev,
3312			    "Critical Failure setting up transmit buffers\n");
3313			error = ENOMEM;
3314			goto err_tx_desc;
3315        	}
3316#if __FreeBSD_version >= 800000
3317		/* Allocate a buf ring */
3318		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3319		    M_WAITOK, &txr->tx_mtx);
3320#endif
3321	}
3322
3323	/*
3324	 * Next the RX queues...
3325	 */
3326	rsize = roundup2(adapter->num_rx_desc *
3327	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3328	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3329		rxr = &adapter->rx_rings[i];
3330		rxr->adapter = adapter;
3331		rxr->me = i;
3332
3333		/* Initialize the RX lock */
3334		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3335		    device_get_nameunit(dev), txr->me);
3336		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3337
3338		if (igb_dma_malloc(adapter, rsize,
3339			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3340			device_printf(dev,
3341			    "Unable to allocate RxDescriptor memory\n");
3342			error = ENOMEM;
3343			goto err_rx_desc;
3344		}
3345		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3346		bzero((void *)rxr->rx_base, rsize);
3347
3348        	/* Allocate receive buffers for the ring*/
3349		if (igb_allocate_receive_buffers(rxr)) {
3350			device_printf(dev,
3351			    "Critical Failure setting up receive buffers\n");
3352			error = ENOMEM;
3353			goto err_rx_desc;
3354		}
3355	}
3356
3357	/*
3358	** Finally set up the queue holding structs
3359	*/
3360	for (int i = 0; i < adapter->num_queues; i++) {
3361		que = &adapter->queues[i];
3362		que->adapter = adapter;
3363		que->txr = &adapter->tx_rings[i];
3364		que->rxr = &adapter->rx_rings[i];
3365	}
3366
3367	return (0);
3368
3369err_rx_desc:
3370	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3371		igb_dma_free(adapter, &rxr->rxdma);
3372err_tx_desc:
3373	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3374		igb_dma_free(adapter, &txr->txdma);
3375	free(adapter->rx_rings, M_DEVBUF);
3376rx_fail:
3377#if __FreeBSD_version >= 800000
3378	buf_ring_free(txr->br, M_DEVBUF);
3379#endif
3380	free(adapter->tx_rings, M_DEVBUF);
3381tx_fail:
3382	free(adapter->queues, M_DEVBUF);
3383fail:
3384	return (error);
3385}
3386
3387/*********************************************************************
3388 *
3389 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3390 *  the information needed to transmit a packet on the wire. This is
3391 *  called only once at attach, setup is done every reset.
3392 *
3393 **********************************************************************/
3394static int
3395igb_allocate_transmit_buffers(struct tx_ring *txr)
3396{
3397	struct adapter *adapter = txr->adapter;
3398	device_t dev = adapter->dev;
3399	struct igb_tx_buffer *txbuf;
3400	int error, i;
3401
3402	/*
3403	 * Setup DMA descriptor areas.
3404	 */
3405	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3406			       1, 0,			/* alignment, bounds */
3407			       BUS_SPACE_MAXADDR,	/* lowaddr */
3408			       BUS_SPACE_MAXADDR,	/* highaddr */
3409			       NULL, NULL,		/* filter, filterarg */
3410			       IGB_TSO_SIZE,		/* maxsize */
3411			       IGB_MAX_SCATTER,		/* nsegments */
3412			       PAGE_SIZE,		/* maxsegsize */
3413			       0,			/* flags */
3414			       NULL,			/* lockfunc */
3415			       NULL,			/* lockfuncarg */
3416			       &txr->txtag))) {
3417		device_printf(dev,"Unable to allocate TX DMA tag\n");
3418		goto fail;
3419	}
3420
3421	if (!(txr->tx_buffers =
3422	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3423	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3424		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3425		error = ENOMEM;
3426		goto fail;
3427	}
3428
3429        /* Create the descriptor buffer dma maps */
3430	txbuf = txr->tx_buffers;
3431	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3432		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3433		if (error != 0) {
3434			device_printf(dev, "Unable to create TX DMA map\n");
3435			goto fail;
3436		}
3437	}
3438
3439	return 0;
3440fail:
3441	/* We free all, it handles case where we are in the middle */
3442	igb_free_transmit_structures(adapter);
3443	return (error);
3444}
3445
3446/*********************************************************************
3447 *
3448 *  Initialize a transmit ring.
3449 *
3450 **********************************************************************/
3451static void
3452igb_setup_transmit_ring(struct tx_ring *txr)
3453{
3454	struct adapter *adapter = txr->adapter;
3455	struct igb_tx_buffer *txbuf;
3456	int i;
3457#ifdef DEV_NETMAP
3458	struct netmap_adapter *na = NA(adapter->ifp);
3459	struct netmap_slot *slot;
3460#endif /* DEV_NETMAP */
3461
3462	/* Clear the old descriptor contents */
3463	IGB_TX_LOCK(txr);
3464#ifdef DEV_NETMAP
3465	slot = netmap_reset(na, NR_TX, txr->me, 0);
3466#endif /* DEV_NETMAP */
3467	bzero((void *)txr->tx_base,
3468	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3469	/* Reset indices */
3470	txr->next_avail_desc = 0;
3471	txr->next_to_clean = 0;
3472
3473	/* Free any existing tx buffers. */
3474        txbuf = txr->tx_buffers;
3475	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3476		if (txbuf->m_head != NULL) {
3477			bus_dmamap_sync(txr->txtag, txbuf->map,
3478			    BUS_DMASYNC_POSTWRITE);
3479			bus_dmamap_unload(txr->txtag, txbuf->map);
3480			m_freem(txbuf->m_head);
3481			txbuf->m_head = NULL;
3482		}
3483#ifdef DEV_NETMAP
3484		if (slot) {
3485			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3486			/* no need to set the address */
3487			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3488		}
3489#endif /* DEV_NETMAP */
3490		/* clear the watch index */
3491		txbuf->next_eop = -1;
3492        }
3493
3494	/* Set number of descriptors available */
3495	txr->tx_avail = adapter->num_tx_desc;
3496
3497	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3498	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3499	IGB_TX_UNLOCK(txr);
3500}
3501
3502/*********************************************************************
3503 *
3504 *  Initialize all transmit rings.
3505 *
3506 **********************************************************************/
3507static void
3508igb_setup_transmit_structures(struct adapter *adapter)
3509{
3510	struct tx_ring *txr = adapter->tx_rings;
3511
3512	for (int i = 0; i < adapter->num_queues; i++, txr++)
3513		igb_setup_transmit_ring(txr);
3514
3515	return;
3516}
3517
3518/*********************************************************************
3519 *
3520 *  Enable transmit unit.
3521 *
3522 **********************************************************************/
3523static void
3524igb_initialize_transmit_units(struct adapter *adapter)
3525{
3526	struct tx_ring	*txr = adapter->tx_rings;
3527	struct e1000_hw *hw = &adapter->hw;
3528	u32		tctl, txdctl;
3529
3530	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3531	tctl = txdctl = 0;
3532
3533	/* Setup the Tx Descriptor Rings */
3534	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3535		u64 bus_addr = txr->txdma.dma_paddr;
3536
3537		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3538		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3539		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3540		    (uint32_t)(bus_addr >> 32));
3541		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3542		    (uint32_t)bus_addr);
3543
3544		/* Setup the HW Tx Head and Tail descriptor pointers */
3545		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3546		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3547
3548		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3549		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3550		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3551
3552		txr->queue_status = IGB_QUEUE_IDLE;
3553
3554		txdctl |= IGB_TX_PTHRESH;
3555		txdctl |= IGB_TX_HTHRESH << 8;
3556		txdctl |= IGB_TX_WTHRESH << 16;
3557		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3558		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3559	}
3560
3561	if (adapter->vf_ifp)
3562		return;
3563
3564	e1000_config_collision_dist(hw);
3565
3566	/* Program the Transmit Control Register */
3567	tctl = E1000_READ_REG(hw, E1000_TCTL);
3568	tctl &= ~E1000_TCTL_CT;
3569	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3570		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3571
3572	/* This write will effectively turn on the transmit unit. */
3573	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3574}
3575
3576/*********************************************************************
3577 *
3578 *  Free all transmit rings.
3579 *
3580 **********************************************************************/
3581static void
3582igb_free_transmit_structures(struct adapter *adapter)
3583{
3584	struct tx_ring *txr = adapter->tx_rings;
3585
3586	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3587		IGB_TX_LOCK(txr);
3588		igb_free_transmit_buffers(txr);
3589		igb_dma_free(adapter, &txr->txdma);
3590		IGB_TX_UNLOCK(txr);
3591		IGB_TX_LOCK_DESTROY(txr);
3592	}
3593	free(adapter->tx_rings, M_DEVBUF);
3594}
3595
3596/*********************************************************************
3597 *
3598 *  Free transmit ring related data structures.
3599 *
3600 **********************************************************************/
3601static void
3602igb_free_transmit_buffers(struct tx_ring *txr)
3603{
3604	struct adapter *adapter = txr->adapter;
3605	struct igb_tx_buffer *tx_buffer;
3606	int             i;
3607
3608	INIT_DEBUGOUT("free_transmit_ring: begin");
3609
3610	if (txr->tx_buffers == NULL)
3611		return;
3612
3613	tx_buffer = txr->tx_buffers;
3614	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3615		if (tx_buffer->m_head != NULL) {
3616			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3617			    BUS_DMASYNC_POSTWRITE);
3618			bus_dmamap_unload(txr->txtag,
3619			    tx_buffer->map);
3620			m_freem(tx_buffer->m_head);
3621			tx_buffer->m_head = NULL;
3622			if (tx_buffer->map != NULL) {
3623				bus_dmamap_destroy(txr->txtag,
3624				    tx_buffer->map);
3625				tx_buffer->map = NULL;
3626			}
3627		} else if (tx_buffer->map != NULL) {
3628			bus_dmamap_unload(txr->txtag,
3629			    tx_buffer->map);
3630			bus_dmamap_destroy(txr->txtag,
3631			    tx_buffer->map);
3632			tx_buffer->map = NULL;
3633		}
3634	}
3635#if __FreeBSD_version >= 800000
3636	if (txr->br != NULL)
3637		buf_ring_free(txr->br, M_DEVBUF);
3638#endif
3639	if (txr->tx_buffers != NULL) {
3640		free(txr->tx_buffers, M_DEVBUF);
3641		txr->tx_buffers = NULL;
3642	}
3643	if (txr->txtag != NULL) {
3644		bus_dma_tag_destroy(txr->txtag);
3645		txr->txtag = NULL;
3646	}
3647	return;
3648}
3649
3650/**********************************************************************
3651 *
3652 *  Setup work for hardware segmentation offload (TSO)
3653 *
3654 **********************************************************************/
3655static bool
3656igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3657	struct ip *ip, struct tcphdr *th)
3658{
3659	struct adapter *adapter = txr->adapter;
3660	struct e1000_adv_tx_context_desc *TXD;
3661	struct igb_tx_buffer        *tx_buffer;
3662	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3663	u32 mss_l4len_idx = 0;
3664	u16 vtag = 0;
3665	int ctxd, ip_hlen, tcp_hlen;
3666
3667	ctxd = txr->next_avail_desc;
3668	tx_buffer = &txr->tx_buffers[ctxd];
3669	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3670
3671	ip->ip_sum = 0;
3672	ip_hlen = ip->ip_hl << 2;
3673	tcp_hlen = th->th_off << 2;
3674
3675	/* VLAN MACLEN IPLEN */
3676	if (mp->m_flags & M_VLANTAG) {
3677		vtag = htole16(mp->m_pkthdr.ether_vtag);
3678		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3679	}
3680
3681	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3682	vlan_macip_lens |= ip_hlen;
3683	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3684
3685	/* ADV DTYPE TUCMD */
3686	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3687	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3688	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3689	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3690
3691	/* MSS L4LEN IDX */
3692	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3693	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3694	/* 82575 needs the queue index added */
3695	if (adapter->hw.mac.type == e1000_82575)
3696		mss_l4len_idx |= txr->me << 4;
3697	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3698
3699	TXD->seqnum_seed = htole32(0);
3700	tx_buffer->m_head = NULL;
3701	tx_buffer->next_eop = -1;
3702
3703	if (++ctxd == adapter->num_tx_desc)
3704		ctxd = 0;
3705
3706	txr->tx_avail--;
3707	txr->next_avail_desc = ctxd;
3708	return TRUE;
3709}
3710
3711
3712/*********************************************************************
3713 *
3714 *  Context Descriptor setup for VLAN or CSUM
3715 *
3716 **********************************************************************/
3717
3718static bool
3719igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3720{
3721	struct adapter *adapter = txr->adapter;
3722	struct e1000_adv_tx_context_desc *TXD;
3723	struct igb_tx_buffer        *tx_buffer;
3724	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3725	struct ether_vlan_header *eh;
3726	struct ip *ip = NULL;
3727	struct ip6_hdr *ip6;
3728	int  ehdrlen, ctxd, ip_hlen = 0;
3729	u16	etype, vtag = 0;
3730	u8	ipproto = 0;
3731	bool	offload = TRUE;
3732
3733	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3734		offload = FALSE;
3735
3736	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3737	ctxd = txr->next_avail_desc;
3738	tx_buffer = &txr->tx_buffers[ctxd];
3739	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3740
3741	/*
3742	** In advanced descriptors the vlan tag must
3743	** be placed into the context descriptor, thus
3744	** we need to be here just for that setup.
3745	*/
3746	if (mp->m_flags & M_VLANTAG) {
3747		vtag = htole16(mp->m_pkthdr.ether_vtag);
3748		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3749	} else if (offload == FALSE)
3750		return FALSE;
3751
3752	/*
3753	 * Determine where frame payload starts.
3754	 * Jump over vlan headers if already present,
3755	 * helpful for QinQ too.
3756	 */
3757	eh = mtod(mp, struct ether_vlan_header *);
3758	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3759		etype = ntohs(eh->evl_proto);
3760		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3761	} else {
3762		etype = ntohs(eh->evl_encap_proto);
3763		ehdrlen = ETHER_HDR_LEN;
3764	}
3765
3766	/* Set the ether header length */
3767	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3768
3769	switch (etype) {
3770		case ETHERTYPE_IP:
3771			ip = (struct ip *)(mp->m_data + ehdrlen);
3772			ip_hlen = ip->ip_hl << 2;
3773			if (mp->m_len < ehdrlen + ip_hlen) {
3774				offload = FALSE;
3775				break;
3776			}
3777			ipproto = ip->ip_p;
3778			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3779			break;
3780		case ETHERTYPE_IPV6:
3781			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3782			ip_hlen = sizeof(struct ip6_hdr);
3783			ipproto = ip6->ip6_nxt;
3784			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3785			break;
3786		default:
3787			offload = FALSE;
3788			break;
3789	}
3790
3791	vlan_macip_lens |= ip_hlen;
3792	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3793
3794	switch (ipproto) {
3795		case IPPROTO_TCP:
3796			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3797				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3798			break;
3799		case IPPROTO_UDP:
3800			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3801				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3802			break;
3803#if __FreeBSD_version >= 800000
3804		case IPPROTO_SCTP:
3805			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3806				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3807			break;
3808#endif
3809		default:
3810			offload = FALSE;
3811			break;
3812	}
3813
3814	/* 82575 needs the queue index added */
3815	if (adapter->hw.mac.type == e1000_82575)
3816		mss_l4len_idx = txr->me << 4;
3817
3818	/* Now copy bits into descriptor */
3819	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3820	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3821	TXD->seqnum_seed = htole32(0);
3822	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3823
3824	tx_buffer->m_head = NULL;
3825	tx_buffer->next_eop = -1;
3826
3827	/* We've consumed the first desc, adjust counters */
3828	if (++ctxd == adapter->num_tx_desc)
3829		ctxd = 0;
3830	txr->next_avail_desc = ctxd;
3831	--txr->tx_avail;
3832
3833        return (offload);
3834}
3835
3836
3837/**********************************************************************
3838 *
3839 *  Examine each tx_buffer in the used queue. If the hardware is done
3840 *  processing the packet then free associated resources. The
3841 *  tx_buffer is put back on the free queue.
3842 *
3843 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3844 **********************************************************************/
3845static bool
3846igb_txeof(struct tx_ring *txr)
3847{
3848	struct adapter	*adapter = txr->adapter;
3849        int first, last, done, processed;
3850        struct igb_tx_buffer *tx_buffer;
3851        struct e1000_tx_desc   *tx_desc, *eop_desc;
3852	struct ifnet   *ifp = adapter->ifp;
3853
3854	IGB_TX_LOCK_ASSERT(txr);
3855
3856#ifdef DEV_NETMAP
3857	if (ifp->if_capenable & IFCAP_NETMAP) {
3858		struct netmap_adapter *na = NA(ifp);
3859
3860		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3861		IGB_TX_UNLOCK(txr);
3862		IGB_CORE_LOCK(adapter);
3863		selwakeuppri(&na->tx_si, PI_NET);
3864		IGB_CORE_UNLOCK(adapter);
3865		IGB_TX_LOCK(txr);
3866		return FALSE;
3867	}
3868#endif /* DEV_NETMAP */
3869        if (txr->tx_avail == adapter->num_tx_desc) {
3870		txr->queue_status = IGB_QUEUE_IDLE;
3871                return FALSE;
3872	}
3873
3874	processed = 0;
3875        first = txr->next_to_clean;
3876        tx_desc = &txr->tx_base[first];
3877        tx_buffer = &txr->tx_buffers[first];
3878	last = tx_buffer->next_eop;
3879        eop_desc = &txr->tx_base[last];
3880
3881	/*
3882	 * What this does is get the index of the
3883	 * first descriptor AFTER the EOP of the
3884	 * first packet, that way we can do the
3885	 * simple comparison on the inner while loop.
3886	 */
3887	if (++last == adapter->num_tx_desc)
3888 		last = 0;
3889	done = last;
3890
3891        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3892            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3893
3894        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3895		/* We clean the range of the packet */
3896		while (first != done) {
3897                	tx_desc->upper.data = 0;
3898                	tx_desc->lower.data = 0;
3899                	tx_desc->buffer_addr = 0;
3900                	++txr->tx_avail;
3901			++processed;
3902
3903			if (tx_buffer->m_head) {
3904				txr->bytes +=
3905				    tx_buffer->m_head->m_pkthdr.len;
3906				bus_dmamap_sync(txr->txtag,
3907				    tx_buffer->map,
3908				    BUS_DMASYNC_POSTWRITE);
3909				bus_dmamap_unload(txr->txtag,
3910				    tx_buffer->map);
3911
3912                        	m_freem(tx_buffer->m_head);
3913                        	tx_buffer->m_head = NULL;
3914                	}
3915			tx_buffer->next_eop = -1;
3916			txr->watchdog_time = ticks;
3917
3918	                if (++first == adapter->num_tx_desc)
3919				first = 0;
3920
3921	                tx_buffer = &txr->tx_buffers[first];
3922			tx_desc = &txr->tx_base[first];
3923		}
3924		++txr->packets;
3925		++ifp->if_opackets;
3926		/* See if we can continue to the next packet */
3927		last = tx_buffer->next_eop;
3928		if (last != -1) {
3929        		eop_desc = &txr->tx_base[last];
3930			/* Get new done point */
3931			if (++last == adapter->num_tx_desc) last = 0;
3932			done = last;
3933		} else
3934			break;
3935        }
3936        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3937            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3938
3939        txr->next_to_clean = first;
3940
3941	/*
3942	** Watchdog calculation, we know there's
3943	** work outstanding or the first return
3944	** would have been taken, so none processed
3945	** for too long indicates a hang.
3946	*/
3947	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3948		txr->queue_status |= IGB_QUEUE_HUNG;
3949        /*
3950         * If we have a minimum free,
3951         * clear depleted state bit
3952         */
3953        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3954                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3955
3956	/* All clean, turn off the watchdog */
3957	if (txr->tx_avail == adapter->num_tx_desc) {
3958		txr->queue_status = IGB_QUEUE_IDLE;
3959		return (FALSE);
3960        }
3961
3962	return (TRUE);
3963}
3964
3965/*********************************************************************
3966 *
3967 *  Refresh mbuf buffers for RX descriptor rings
3968 *   - now keeps its own state so discards due to resource
3969 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3970 *     it just returns, keeping its placeholder, thus it can simply
3971 *     be recalled to try again.
3972 *
3973 **********************************************************************/
3974static void
3975igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3976{
3977	struct adapter		*adapter = rxr->adapter;
3978	bus_dma_segment_t	hseg[1];
3979	bus_dma_segment_t	pseg[1];
3980	struct igb_rx_buf	*rxbuf;
3981	struct mbuf		*mh, *mp;
3982	int			i, j, nsegs, error;
3983	bool			refreshed = FALSE;
3984
3985	i = j = rxr->next_to_refresh;
3986	/*
3987	** Get one descriptor beyond
3988	** our work mark to control
3989	** the loop.
3990        */
3991	if (++j == adapter->num_rx_desc)
3992		j = 0;
3993
3994	while (j != limit) {
3995		rxbuf = &rxr->rx_buffers[i];
3996		/* No hdr mbuf used with header split off */
3997		if (rxr->hdr_split == FALSE)
3998			goto no_split;
3999		if (rxbuf->m_head == NULL) {
4000			mh = m_gethdr(M_DONTWAIT, MT_DATA);
4001			if (mh == NULL)
4002				goto update;
4003		} else
4004			mh = rxbuf->m_head;
4005
4006		mh->m_pkthdr.len = mh->m_len = MHLEN;
4007		mh->m_len = MHLEN;
4008		mh->m_flags |= M_PKTHDR;
4009		/* Get the memory mapping */
4010		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4011		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4012		if (error != 0) {
4013			printf("Refresh mbufs: hdr dmamap load"
4014			    " failure - %d\n", error);
4015			m_free(mh);
4016			rxbuf->m_head = NULL;
4017			goto update;
4018		}
4019		rxbuf->m_head = mh;
4020		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4021		    BUS_DMASYNC_PREREAD);
4022		rxr->rx_base[i].read.hdr_addr =
4023		    htole64(hseg[0].ds_addr);
4024no_split:
4025		if (rxbuf->m_pack == NULL) {
4026			mp = m_getjcl(M_DONTWAIT, MT_DATA,
4027			    M_PKTHDR, adapter->rx_mbuf_sz);
4028			if (mp == NULL)
4029				goto update;
4030		} else
4031			mp = rxbuf->m_pack;
4032
4033		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4034		/* Get the memory mapping */
4035		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4036		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4037		if (error != 0) {
4038			printf("Refresh mbufs: payload dmamap load"
4039			    " failure - %d\n", error);
4040			m_free(mp);
4041			rxbuf->m_pack = NULL;
4042			goto update;
4043		}
4044		rxbuf->m_pack = mp;
4045		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4046		    BUS_DMASYNC_PREREAD);
4047		rxr->rx_base[i].read.pkt_addr =
4048		    htole64(pseg[0].ds_addr);
4049		refreshed = TRUE; /* I feel wefreshed :) */
4050
4051		i = j; /* our next is precalculated */
4052		rxr->next_to_refresh = i;
4053		if (++j == adapter->num_rx_desc)
4054			j = 0;
4055	}
4056update:
4057	if (refreshed) /* update tail */
4058		E1000_WRITE_REG(&adapter->hw,
4059		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4060	return;
4061}
4062
4063
4064/*********************************************************************
4065 *
4066 *  Allocate memory for rx_buffer structures. Since we use one
4067 *  rx_buffer per received packet, the maximum number of rx_buffer's
4068 *  that we'll need is equal to the number of receive descriptors
4069 *  that we've allocated.
4070 *
4071 **********************************************************************/
4072static int
4073igb_allocate_receive_buffers(struct rx_ring *rxr)
4074{
4075	struct	adapter 	*adapter = rxr->adapter;
4076	device_t 		dev = adapter->dev;
4077	struct igb_rx_buf	*rxbuf;
4078	int             	i, bsize, error;
4079
4080	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4081	if (!(rxr->rx_buffers =
4082	    (struct igb_rx_buf *) malloc(bsize,
4083	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4084		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4085		error = ENOMEM;
4086		goto fail;
4087	}
4088
4089	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4090				   1, 0,		/* alignment, bounds */
4091				   BUS_SPACE_MAXADDR,	/* lowaddr */
4092				   BUS_SPACE_MAXADDR,	/* highaddr */
4093				   NULL, NULL,		/* filter, filterarg */
4094				   MSIZE,		/* maxsize */
4095				   1,			/* nsegments */
4096				   MSIZE,		/* maxsegsize */
4097				   0,			/* flags */
4098				   NULL,		/* lockfunc */
4099				   NULL,		/* lockfuncarg */
4100				   &rxr->htag))) {
4101		device_printf(dev, "Unable to create RX DMA tag\n");
4102		goto fail;
4103	}
4104
4105	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4106				   1, 0,		/* alignment, bounds */
4107				   BUS_SPACE_MAXADDR,	/* lowaddr */
4108				   BUS_SPACE_MAXADDR,	/* highaddr */
4109				   NULL, NULL,		/* filter, filterarg */
4110				   MJUM9BYTES,		/* maxsize */
4111				   1,			/* nsegments */
4112				   MJUM9BYTES,		/* maxsegsize */
4113				   0,			/* flags */
4114				   NULL,		/* lockfunc */
4115				   NULL,		/* lockfuncarg */
4116				   &rxr->ptag))) {
4117		device_printf(dev, "Unable to create RX payload DMA tag\n");
4118		goto fail;
4119	}
4120
4121	for (i = 0; i < adapter->num_rx_desc; i++) {
4122		rxbuf = &rxr->rx_buffers[i];
4123		error = bus_dmamap_create(rxr->htag,
4124		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4125		if (error) {
4126			device_printf(dev,
4127			    "Unable to create RX head DMA maps\n");
4128			goto fail;
4129		}
4130		error = bus_dmamap_create(rxr->ptag,
4131		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4132		if (error) {
4133			device_printf(dev,
4134			    "Unable to create RX packet DMA maps\n");
4135			goto fail;
4136		}
4137	}
4138
4139	return (0);
4140
4141fail:
4142	/* Frees all, but can handle partial completion */
4143	igb_free_receive_structures(adapter);
4144	return (error);
4145}
4146
4147
4148static void
4149igb_free_receive_ring(struct rx_ring *rxr)
4150{
4151	struct	adapter		*adapter = rxr->adapter;
4152	struct igb_rx_buf	*rxbuf;
4153
4154
4155	for (int i = 0; i < adapter->num_rx_desc; i++) {
4156		rxbuf = &rxr->rx_buffers[i];
4157		if (rxbuf->m_head != NULL) {
4158			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4159			    BUS_DMASYNC_POSTREAD);
4160			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4161			rxbuf->m_head->m_flags |= M_PKTHDR;
4162			m_freem(rxbuf->m_head);
4163		}
4164		if (rxbuf->m_pack != NULL) {
4165			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4166			    BUS_DMASYNC_POSTREAD);
4167			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4168			rxbuf->m_pack->m_flags |= M_PKTHDR;
4169			m_freem(rxbuf->m_pack);
4170		}
4171		rxbuf->m_head = NULL;
4172		rxbuf->m_pack = NULL;
4173	}
4174}
4175
4176
4177/*********************************************************************
4178 *
4179 *  Initialize a receive ring and its buffers.
4180 *
4181 **********************************************************************/
4182static int
4183igb_setup_receive_ring(struct rx_ring *rxr)
4184{
4185	struct	adapter		*adapter;
4186	struct  ifnet		*ifp;
4187	device_t		dev;
4188	struct igb_rx_buf	*rxbuf;
4189	bus_dma_segment_t	pseg[1], hseg[1];
4190	struct lro_ctrl		*lro = &rxr->lro;
4191	int			rsize, nsegs, error = 0;
4192#ifdef DEV_NETMAP
4193	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4194	struct netmap_slot *slot;
4195#endif /* DEV_NETMAP */
4196
4197	adapter = rxr->adapter;
4198	dev = adapter->dev;
4199	ifp = adapter->ifp;
4200
4201	/* Clear the ring contents */
4202	IGB_RX_LOCK(rxr);
4203#ifdef DEV_NETMAP
4204	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4205#endif /* DEV_NETMAP */
4206	rsize = roundup2(adapter->num_rx_desc *
4207	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4208	bzero((void *)rxr->rx_base, rsize);
4209
4210	/*
4211	** Free current RX buffer structures and their mbufs
4212	*/
4213	igb_free_receive_ring(rxr);
4214
4215	/* Configure for header split? */
4216	if (igb_header_split)
4217		rxr->hdr_split = TRUE;
4218
4219        /* Now replenish the ring mbufs */
4220	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4221		struct mbuf	*mh, *mp;
4222
4223		rxbuf = &rxr->rx_buffers[j];
4224#ifdef DEV_NETMAP
4225		if (slot) {
4226			/* slot sj is mapped to the i-th NIC-ring entry */
4227			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4228			uint64_t paddr;
4229			void *addr;
4230
4231			addr = PNMB(slot + sj, &paddr);
4232			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4233			/* Update descriptor */
4234			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4235			continue;
4236		}
4237#endif /* DEV_NETMAP */
4238		if (rxr->hdr_split == FALSE)
4239			goto skip_head;
4240
4241		/* First the header */
4242		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4243		if (rxbuf->m_head == NULL) {
4244			error = ENOBUFS;
4245                        goto fail;
4246		}
4247		m_adj(rxbuf->m_head, ETHER_ALIGN);
4248		mh = rxbuf->m_head;
4249		mh->m_len = mh->m_pkthdr.len = MHLEN;
4250		mh->m_flags |= M_PKTHDR;
4251		/* Get the memory mapping */
4252		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4253		    rxbuf->hmap, rxbuf->m_head, hseg,
4254		    &nsegs, BUS_DMA_NOWAIT);
4255		if (error != 0) /* Nothing elegant to do here */
4256                        goto fail;
4257		bus_dmamap_sync(rxr->htag,
4258		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4259		/* Update descriptor */
4260		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4261
4262skip_head:
4263		/* Now the payload cluster */
4264		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4265		    M_PKTHDR, adapter->rx_mbuf_sz);
4266		if (rxbuf->m_pack == NULL) {
4267			error = ENOBUFS;
4268                        goto fail;
4269		}
4270		mp = rxbuf->m_pack;
4271		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4272		/* Get the memory mapping */
4273		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4274		    rxbuf->pmap, mp, pseg,
4275		    &nsegs, BUS_DMA_NOWAIT);
4276		if (error != 0)
4277                        goto fail;
4278		bus_dmamap_sync(rxr->ptag,
4279		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4280		/* Update descriptor */
4281		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4282        }
4283
4284	/* Setup our descriptor indices */
4285	rxr->next_to_check = 0;
4286	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4287	rxr->lro_enabled = FALSE;
4288	rxr->rx_split_packets = 0;
4289	rxr->rx_bytes = 0;
4290
4291	rxr->fmp = NULL;
4292	rxr->lmp = NULL;
4293	rxr->discard = FALSE;
4294
4295	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4296	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4297
4298	/*
4299	** Now set up the LRO interface, we
4300	** also only do head split when LRO
4301	** is enabled, since so often they
4302	** are undesireable in similar setups.
4303	*/
4304	if (ifp->if_capenable & IFCAP_LRO) {
4305		error = tcp_lro_init(lro);
4306		if (error) {
4307			device_printf(dev, "LRO Initialization failed!\n");
4308			goto fail;
4309		}
4310		INIT_DEBUGOUT("RX LRO Initialized\n");
4311		rxr->lro_enabled = TRUE;
4312		lro->ifp = adapter->ifp;
4313	}
4314
4315	IGB_RX_UNLOCK(rxr);
4316	return (0);
4317
4318fail:
4319	igb_free_receive_ring(rxr);
4320	IGB_RX_UNLOCK(rxr);
4321	return (error);
4322}
4323
4324
4325/*********************************************************************
4326 *
4327 *  Initialize all receive rings.
4328 *
4329 **********************************************************************/
4330static int
4331igb_setup_receive_structures(struct adapter *adapter)
4332{
4333	struct rx_ring *rxr = adapter->rx_rings;
4334	int i;
4335
4336	for (i = 0; i < adapter->num_queues; i++, rxr++)
4337		if (igb_setup_receive_ring(rxr))
4338			goto fail;
4339
4340	return (0);
4341fail:
4342	/*
4343	 * Free RX buffers allocated so far, we will only handle
4344	 * the rings that completed, the failing case will have
4345	 * cleaned up for itself. 'i' is the endpoint.
4346	 */
4347	for (int j = 0; j > i; ++j) {
4348		rxr = &adapter->rx_rings[i];
4349		IGB_RX_LOCK(rxr);
4350		igb_free_receive_ring(rxr);
4351		IGB_RX_UNLOCK(rxr);
4352	}
4353
4354	return (ENOBUFS);
4355}
4356
4357/*********************************************************************
4358 *
4359 *  Enable receive unit.
4360 *
4361 **********************************************************************/
4362static void
4363igb_initialize_receive_units(struct adapter *adapter)
4364{
4365	struct rx_ring	*rxr = adapter->rx_rings;
4366	struct ifnet	*ifp = adapter->ifp;
4367	struct e1000_hw *hw = &adapter->hw;
4368	u32		rctl, rxcsum, psize, srrctl = 0;
4369
4370	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4371
4372	/*
4373	 * Make sure receives are disabled while setting
4374	 * up the descriptor ring
4375	 */
4376	rctl = E1000_READ_REG(hw, E1000_RCTL);
4377	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4378
4379	/*
4380	** Set up for header split
4381	*/
4382	if (igb_header_split) {
4383		/* Use a standard mbuf for the header */
4384		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4385		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4386	} else
4387		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4388
4389	/*
4390	** Set up for jumbo frames
4391	*/
4392	if (ifp->if_mtu > ETHERMTU) {
4393		rctl |= E1000_RCTL_LPE;
4394		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4395			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4396			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4397		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4398			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4399			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4400		}
4401		/* Set maximum packet len */
4402		psize = adapter->max_frame_size;
4403		/* are we on a vlan? */
4404		if (adapter->ifp->if_vlantrunk != NULL)
4405			psize += VLAN_TAG_SIZE;
4406		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4407	} else {
4408		rctl &= ~E1000_RCTL_LPE;
4409		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4410		rctl |= E1000_RCTL_SZ_2048;
4411	}
4412
4413	/* Setup the Base and Length of the Rx Descriptor Rings */
4414	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4415		u64 bus_addr = rxr->rxdma.dma_paddr;
4416		u32 rxdctl;
4417
4418		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4419		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4420		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4421		    (uint32_t)(bus_addr >> 32));
4422		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4423		    (uint32_t)bus_addr);
4424		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4425		/* Enable this Queue */
4426		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4427		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4428		rxdctl &= 0xFFF00000;
4429		rxdctl |= IGB_RX_PTHRESH;
4430		rxdctl |= IGB_RX_HTHRESH << 8;
4431		rxdctl |= IGB_RX_WTHRESH << 16;
4432		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4433	}
4434
4435	/*
4436	** Setup for RX MultiQueue
4437	*/
4438	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4439	if (adapter->num_queues >1) {
4440		u32 random[10], mrqc, shift = 0;
4441		union igb_reta {
4442			u32 dword;
4443			u8  bytes[4];
4444		} reta;
4445
4446		arc4rand(&random, sizeof(random), 0);
4447		if (adapter->hw.mac.type == e1000_82575)
4448			shift = 6;
4449		/* Warning FM follows */
4450		for (int i = 0; i < 128; i++) {
4451			reta.bytes[i & 3] =
4452			    (i % adapter->num_queues) << shift;
4453			if ((i & 3) == 3)
4454				E1000_WRITE_REG(hw,
4455				    E1000_RETA(i >> 2), reta.dword);
4456		}
4457		/* Now fill in hash table */
4458		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4459		for (int i = 0; i < 10; i++)
4460			E1000_WRITE_REG_ARRAY(hw,
4461			    E1000_RSSRK(0), i, random[i]);
4462
4463		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4464		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4465		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4466		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4467		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4468		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4469		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4470		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4471
4472		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4473
4474		/*
4475		** NOTE: Receive Full-Packet Checksum Offload
4476		** is mutually exclusive with Multiqueue. However
4477		** this is not the same as TCP/IP checksums which
4478		** still work.
4479		*/
4480		rxcsum |= E1000_RXCSUM_PCSD;
4481#if __FreeBSD_version >= 800000
4482		/* For SCTP Offload */
4483		if ((hw->mac.type == e1000_82576)
4484		    && (ifp->if_capenable & IFCAP_RXCSUM))
4485			rxcsum |= E1000_RXCSUM_CRCOFL;
4486#endif
4487	} else {
4488		/* Non RSS setup */
4489		if (ifp->if_capenable & IFCAP_RXCSUM) {
4490			rxcsum |= E1000_RXCSUM_IPPCSE;
4491#if __FreeBSD_version >= 800000
4492			if (adapter->hw.mac.type == e1000_82576)
4493				rxcsum |= E1000_RXCSUM_CRCOFL;
4494#endif
4495		} else
4496			rxcsum &= ~E1000_RXCSUM_TUOFL;
4497	}
4498	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4499
4500	/* Setup the Receive Control Register */
4501	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4502	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4503		   E1000_RCTL_RDMTS_HALF |
4504		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4505	/* Strip CRC bytes. */
4506	rctl |= E1000_RCTL_SECRC;
4507	/* Make sure VLAN Filters are off */
4508	rctl &= ~E1000_RCTL_VFE;
4509	/* Don't store bad packets */
4510	rctl &= ~E1000_RCTL_SBP;
4511
4512	/* Enable Receives */
4513	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4514
4515	/*
4516	 * Setup the HW Rx Head and Tail Descriptor Pointers
4517	 *   - needs to be after enable
4518	 */
4519	for (int i = 0; i < adapter->num_queues; i++) {
4520		rxr = &adapter->rx_rings[i];
4521		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4522#ifdef DEV_NETMAP
4523		/*
4524		 * an init() while a netmap client is active must
4525		 * preserve the rx buffers passed to userspace.
4526		 * In this driver it means we adjust RDT to
4527		 * somthing different from next_to_refresh
4528		 * (which is not used in netmap mode).
4529		 */
4530		if (ifp->if_capenable & IFCAP_NETMAP) {
4531			struct netmap_adapter *na = NA(adapter->ifp);
4532			struct netmap_kring *kring = &na->rx_rings[i];
4533			int t = rxr->next_to_refresh - kring->nr_hwavail;
4534
4535			if (t >= adapter->num_rx_desc)
4536				t -= adapter->num_rx_desc;
4537			else if (t < 0)
4538				t += adapter->num_rx_desc;
4539			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4540		} else
4541#endif /* DEV_NETMAP */
4542		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4543	}
4544	return;
4545}
4546
4547/*********************************************************************
4548 *
4549 *  Free receive rings.
4550 *
4551 **********************************************************************/
4552static void
4553igb_free_receive_structures(struct adapter *adapter)
4554{
4555	struct rx_ring *rxr = adapter->rx_rings;
4556
4557	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4558		struct lro_ctrl	*lro = &rxr->lro;
4559		igb_free_receive_buffers(rxr);
4560		tcp_lro_free(lro);
4561		igb_dma_free(adapter, &rxr->rxdma);
4562	}
4563
4564	free(adapter->rx_rings, M_DEVBUF);
4565}
4566
4567/*********************************************************************
4568 *
4569 *  Free receive ring data structures.
4570 *
4571 **********************************************************************/
4572static void
4573igb_free_receive_buffers(struct rx_ring *rxr)
4574{
4575	struct adapter		*adapter = rxr->adapter;
4576	struct igb_rx_buf	*rxbuf;
4577	int i;
4578
4579	INIT_DEBUGOUT("free_receive_structures: begin");
4580
4581	/* Cleanup any existing buffers */
4582	if (rxr->rx_buffers != NULL) {
4583		for (i = 0; i < adapter->num_rx_desc; i++) {
4584			rxbuf = &rxr->rx_buffers[i];
4585			if (rxbuf->m_head != NULL) {
4586				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4587				    BUS_DMASYNC_POSTREAD);
4588				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4589				rxbuf->m_head->m_flags |= M_PKTHDR;
4590				m_freem(rxbuf->m_head);
4591			}
4592			if (rxbuf->m_pack != NULL) {
4593				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4594				    BUS_DMASYNC_POSTREAD);
4595				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4596				rxbuf->m_pack->m_flags |= M_PKTHDR;
4597				m_freem(rxbuf->m_pack);
4598			}
4599			rxbuf->m_head = NULL;
4600			rxbuf->m_pack = NULL;
4601			if (rxbuf->hmap != NULL) {
4602				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4603				rxbuf->hmap = NULL;
4604			}
4605			if (rxbuf->pmap != NULL) {
4606				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4607				rxbuf->pmap = NULL;
4608			}
4609		}
4610		if (rxr->rx_buffers != NULL) {
4611			free(rxr->rx_buffers, M_DEVBUF);
4612			rxr->rx_buffers = NULL;
4613		}
4614	}
4615
4616	if (rxr->htag != NULL) {
4617		bus_dma_tag_destroy(rxr->htag);
4618		rxr->htag = NULL;
4619	}
4620	if (rxr->ptag != NULL) {
4621		bus_dma_tag_destroy(rxr->ptag);
4622		rxr->ptag = NULL;
4623	}
4624}
4625
4626static __inline void
4627igb_rx_discard(struct rx_ring *rxr, int i)
4628{
4629	struct igb_rx_buf	*rbuf;
4630
4631	rbuf = &rxr->rx_buffers[i];
4632
4633	/* Partially received? Free the chain */
4634	if (rxr->fmp != NULL) {
4635		rxr->fmp->m_flags |= M_PKTHDR;
4636		m_freem(rxr->fmp);
4637		rxr->fmp = NULL;
4638		rxr->lmp = NULL;
4639	}
4640
4641	/*
4642	** With advanced descriptors the writeback
4643	** clobbers the buffer addrs, so its easier
4644	** to just free the existing mbufs and take
4645	** the normal refresh path to get new buffers
4646	** and mapping.
4647	*/
4648	if (rbuf->m_head) {
4649		m_free(rbuf->m_head);
4650		rbuf->m_head = NULL;
4651	}
4652
4653	if (rbuf->m_pack) {
4654		m_free(rbuf->m_pack);
4655		rbuf->m_pack = NULL;
4656	}
4657
4658	return;
4659}
4660
4661static __inline void
4662igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4663{
4664
4665	/*
4666	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4667	 * should be computed by hardware. Also it should not have VLAN tag in
4668	 * ethernet header.
4669	 */
4670	if (rxr->lro_enabled &&
4671	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4672	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4673	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4674	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4675	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4676	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4677		/*
4678		 * Send to the stack if:
4679		 **  - LRO not enabled, or
4680		 **  - no LRO resources, or
4681		 **  - lro enqueue fails
4682		 */
4683		if (rxr->lro.lro_cnt != 0)
4684			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4685				return;
4686	}
4687	IGB_RX_UNLOCK(rxr);
4688	(*ifp->if_input)(ifp, m);
4689	IGB_RX_LOCK(rxr);
4690}
4691
4692/*********************************************************************
4693 *
4694 *  This routine executes in interrupt context. It replenishes
4695 *  the mbufs in the descriptor and sends data which has been
4696 *  dma'ed into host memory to upper layer.
4697 *
4698 *  We loop at most count times if count is > 0, or until done if
4699 *  count < 0.
4700 *
4701 *  Return TRUE if more to clean, FALSE otherwise
4702 *********************************************************************/
4703static bool
4704igb_rxeof(struct igb_queue *que, int count, int *done)
4705{
4706	struct adapter		*adapter = que->adapter;
4707	struct rx_ring		*rxr = que->rxr;
4708	struct ifnet		*ifp = adapter->ifp;
4709	struct lro_ctrl		*lro = &rxr->lro;
4710	struct lro_entry	*queued;
4711	int			i, processed = 0, rxdone = 0;
4712	u32			ptype, staterr = 0;
4713	union e1000_adv_rx_desc	*cur;
4714
4715	IGB_RX_LOCK(rxr);
4716	/* Sync the ring. */
4717	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4718	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4719
4720#ifdef DEV_NETMAP
4721	if (ifp->if_capenable & IFCAP_NETMAP) {
4722		struct netmap_adapter *na = NA(ifp);
4723
4724		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4725		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4726		IGB_RX_UNLOCK(rxr);
4727		IGB_CORE_LOCK(adapter);
4728		selwakeuppri(&na->rx_si, PI_NET);
4729		IGB_CORE_UNLOCK(adapter);
4730		return (0);
4731	}
4732#endif /* DEV_NETMAP */
4733
4734	/* Main clean loop */
4735	for (i = rxr->next_to_check; count != 0;) {
4736		struct mbuf		*sendmp, *mh, *mp;
4737		struct igb_rx_buf	*rxbuf;
4738		u16			hlen, plen, hdr, vtag;
4739		bool			eop = FALSE;
4740
4741		cur = &rxr->rx_base[i];
4742		staterr = le32toh(cur->wb.upper.status_error);
4743		if ((staterr & E1000_RXD_STAT_DD) == 0)
4744			break;
4745		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4746			break;
4747		count--;
4748		sendmp = mh = mp = NULL;
4749		cur->wb.upper.status_error = 0;
4750		rxbuf = &rxr->rx_buffers[i];
4751		plen = le16toh(cur->wb.upper.length);
4752		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4753		if ((adapter->hw.mac.type == e1000_i350) &&
4754		    (staterr & E1000_RXDEXT_STATERR_LB))
4755			vtag = be16toh(cur->wb.upper.vlan);
4756		else
4757			vtag = le16toh(cur->wb.upper.vlan);
4758		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4759		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4760
4761		/* Make sure all segments of a bad packet are discarded */
4762		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4763		    (rxr->discard)) {
4764			adapter->dropped_pkts++;
4765			++rxr->rx_discarded;
4766			if (!eop) /* Catch subsequent segs */
4767				rxr->discard = TRUE;
4768			else
4769				rxr->discard = FALSE;
4770			igb_rx_discard(rxr, i);
4771			goto next_desc;
4772		}
4773
4774		/*
4775		** The way the hardware is configured to
4776		** split, it will ONLY use the header buffer
4777		** when header split is enabled, otherwise we
4778		** get normal behavior, ie, both header and
4779		** payload are DMA'd into the payload buffer.
4780		**
4781		** The fmp test is to catch the case where a
4782		** packet spans multiple descriptors, in that
4783		** case only the first header is valid.
4784		*/
4785		if (rxr->hdr_split && rxr->fmp == NULL) {
4786			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4787			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4788			if (hlen > IGB_HDR_BUF)
4789				hlen = IGB_HDR_BUF;
4790			mh = rxr->rx_buffers[i].m_head;
4791			mh->m_len = hlen;
4792			/* clear buf pointer for refresh */
4793			rxbuf->m_head = NULL;
4794			/*
4795			** Get the payload length, this
4796			** could be zero if its a small
4797			** packet.
4798			*/
4799			if (plen > 0) {
4800				mp = rxr->rx_buffers[i].m_pack;
4801				mp->m_len = plen;
4802				mh->m_next = mp;
4803				/* clear buf pointer */
4804				rxbuf->m_pack = NULL;
4805				rxr->rx_split_packets++;
4806			}
4807		} else {
4808			/*
4809			** Either no header split, or a
4810			** secondary piece of a fragmented
4811			** split packet.
4812			*/
4813			mh = rxr->rx_buffers[i].m_pack;
4814			mh->m_len = plen;
4815			/* clear buf info for refresh */
4816			rxbuf->m_pack = NULL;
4817		}
4818
4819		++processed; /* So we know when to refresh */
4820
4821		/* Initial frame - setup */
4822		if (rxr->fmp == NULL) {
4823			mh->m_pkthdr.len = mh->m_len;
4824			/* Save the head of the chain */
4825			rxr->fmp = mh;
4826			rxr->lmp = mh;
4827			if (mp != NULL) {
4828				/* Add payload if split */
4829				mh->m_pkthdr.len += mp->m_len;
4830				rxr->lmp = mh->m_next;
4831			}
4832		} else {
4833			/* Chain mbuf's together */
4834			rxr->lmp->m_next = mh;
4835			rxr->lmp = rxr->lmp->m_next;
4836			rxr->fmp->m_pkthdr.len += mh->m_len;
4837		}
4838
4839		if (eop) {
4840			rxr->fmp->m_pkthdr.rcvif = ifp;
4841			ifp->if_ipackets++;
4842			rxr->rx_packets++;
4843			/* capture data for AIM */
4844			rxr->packets++;
4845			rxr->bytes += rxr->fmp->m_pkthdr.len;
4846			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4847
4848			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4849				igb_rx_checksum(staterr, rxr->fmp, ptype);
4850
4851			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4852			    (staterr & E1000_RXD_STAT_VP) != 0) {
4853				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4854				rxr->fmp->m_flags |= M_VLANTAG;
4855			}
4856#if __FreeBSD_version >= 800000
4857			rxr->fmp->m_pkthdr.flowid = que->msix;
4858			rxr->fmp->m_flags |= M_FLOWID;
4859#endif
4860			sendmp = rxr->fmp;
4861			/* Make sure to set M_PKTHDR. */
4862			sendmp->m_flags |= M_PKTHDR;
4863			rxr->fmp = NULL;
4864			rxr->lmp = NULL;
4865		}
4866
4867next_desc:
4868		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4869		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4870
4871		/* Advance our pointers to the next descriptor. */
4872		if (++i == adapter->num_rx_desc)
4873			i = 0;
4874		/*
4875		** Send to the stack or LRO
4876		*/
4877		if (sendmp != NULL) {
4878			rxr->next_to_check = i;
4879			igb_rx_input(rxr, ifp, sendmp, ptype);
4880			i = rxr->next_to_check;
4881			rxdone++;
4882		}
4883
4884		/* Every 8 descriptors we go to refresh mbufs */
4885		if (processed == 8) {
4886                        igb_refresh_mbufs(rxr, i);
4887                        processed = 0;
4888		}
4889	}
4890
4891	/* Catch any remainders */
4892	if (igb_rx_unrefreshed(rxr))
4893		igb_refresh_mbufs(rxr, i);
4894
4895	rxr->next_to_check = i;
4896
4897	/*
4898	 * Flush any outstanding LRO work
4899	 */
4900	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4901		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4902		tcp_lro_flush(lro, queued);
4903	}
4904
4905	if (done != NULL)
4906		*done += rxdone;
4907
4908	IGB_RX_UNLOCK(rxr);
4909	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4910}
4911
4912/*********************************************************************
4913 *
4914 *  Verify that the hardware indicated that the checksum is valid.
4915 *  Inform the stack about the status of checksum so that stack
4916 *  doesn't spend time verifying the checksum.
4917 *
4918 *********************************************************************/
4919static void
4920igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4921{
4922	u16 status = (u16)staterr;
4923	u8  errors = (u8) (staterr >> 24);
4924	int sctp;
4925
4926	/* Ignore Checksum bit is set */
4927	if (status & E1000_RXD_STAT_IXSM) {
4928		mp->m_pkthdr.csum_flags = 0;
4929		return;
4930	}
4931
4932	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4933	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4934		sctp = 1;
4935	else
4936		sctp = 0;
4937	if (status & E1000_RXD_STAT_IPCS) {
4938		/* Did it pass? */
4939		if (!(errors & E1000_RXD_ERR_IPE)) {
4940			/* IP Checksum Good */
4941			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4942			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4943		} else
4944			mp->m_pkthdr.csum_flags = 0;
4945	}
4946
4947	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4948		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4949#if __FreeBSD_version >= 800000
4950		if (sctp) /* reassign */
4951			type = CSUM_SCTP_VALID;
4952#endif
4953		/* Did it pass? */
4954		if (!(errors & E1000_RXD_ERR_TCPE)) {
4955			mp->m_pkthdr.csum_flags |= type;
4956			if (sctp == 0)
4957				mp->m_pkthdr.csum_data = htons(0xffff);
4958		}
4959	}
4960	return;
4961}
4962
4963/*
4964 * This routine is run via an vlan
4965 * config EVENT
4966 */
4967static void
4968igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4969{
4970	struct adapter	*adapter = ifp->if_softc;
4971	u32		index, bit;
4972
4973	if (ifp->if_softc !=  arg)   /* Not our event */
4974		return;
4975
4976	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4977                return;
4978
4979	IGB_CORE_LOCK(adapter);
4980	index = (vtag >> 5) & 0x7F;
4981	bit = vtag & 0x1F;
4982	adapter->shadow_vfta[index] |= (1 << bit);
4983	++adapter->num_vlans;
4984	/* Change hw filter setting */
4985	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4986		igb_setup_vlan_hw_support(adapter);
4987	IGB_CORE_UNLOCK(adapter);
4988}
4989
4990/*
4991 * This routine is run via an vlan
4992 * unconfig EVENT
4993 */
4994static void
4995igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4996{
4997	struct adapter	*adapter = ifp->if_softc;
4998	u32		index, bit;
4999
5000	if (ifp->if_softc !=  arg)
5001		return;
5002
5003	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5004                return;
5005
5006	IGB_CORE_LOCK(adapter);
5007	index = (vtag >> 5) & 0x7F;
5008	bit = vtag & 0x1F;
5009	adapter->shadow_vfta[index] &= ~(1 << bit);
5010	--adapter->num_vlans;
5011	/* Change hw filter setting */
5012	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5013		igb_setup_vlan_hw_support(adapter);
5014	IGB_CORE_UNLOCK(adapter);
5015}
5016
5017static void
5018igb_setup_vlan_hw_support(struct adapter *adapter)
5019{
5020	struct e1000_hw *hw = &adapter->hw;
5021	struct ifnet	*ifp = adapter->ifp;
5022	u32             reg;
5023
5024	if (adapter->vf_ifp) {
5025		e1000_rlpml_set_vf(hw,
5026		    adapter->max_frame_size + VLAN_TAG_SIZE);
5027		return;
5028	}
5029
5030	reg = E1000_READ_REG(hw, E1000_CTRL);
5031	reg |= E1000_CTRL_VME;
5032	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5033
5034	/* Enable the Filter Table */
5035	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5036		reg = E1000_READ_REG(hw, E1000_RCTL);
5037		reg &= ~E1000_RCTL_CFIEN;
5038		reg |= E1000_RCTL_VFE;
5039		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5040	}
5041
5042	/* Update the frame size */
5043	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5044	    adapter->max_frame_size + VLAN_TAG_SIZE);
5045
5046	/* Don't bother with table if no vlans */
5047	if ((adapter->num_vlans == 0) ||
5048	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5049                return;
5050	/*
5051	** A soft reset zero's out the VFTA, so
5052	** we need to repopulate it now.
5053	*/
5054	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5055                if (adapter->shadow_vfta[i] != 0) {
5056			if (adapter->vf_ifp)
5057				e1000_vfta_set_vf(hw,
5058				    adapter->shadow_vfta[i], TRUE);
5059			else
5060				e1000_write_vfta(hw,
5061				    i, adapter->shadow_vfta[i]);
5062		}
5063}
5064
5065static void
5066igb_enable_intr(struct adapter *adapter)
5067{
5068	/* With RSS set up what to auto clear */
5069	if (adapter->msix_mem) {
5070		u32 mask = (adapter->que_mask | adapter->link_mask);
5071		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5072		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5073		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5074		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5075		    E1000_IMS_LSC);
5076	} else {
5077		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5078		    IMS_ENABLE_MASK);
5079	}
5080	E1000_WRITE_FLUSH(&adapter->hw);
5081
5082	return;
5083}
5084
5085static void
5086igb_disable_intr(struct adapter *adapter)
5087{
5088	if (adapter->msix_mem) {
5089		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5090		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5091	}
5092	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5093	E1000_WRITE_FLUSH(&adapter->hw);
5094	return;
5095}
5096
5097/*
5098 * Bit of a misnomer, what this really means is
5099 * to enable OS management of the system... aka
5100 * to disable special hardware management features
5101 */
5102static void
5103igb_init_manageability(struct adapter *adapter)
5104{
5105	if (adapter->has_manage) {
5106		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5107		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5108
5109		/* disable hardware interception of ARP */
5110		manc &= ~(E1000_MANC_ARP_EN);
5111
5112                /* enable receiving management packets to the host */
5113		manc |= E1000_MANC_EN_MNG2HOST;
5114		manc2h |= 1 << 5;  /* Mng Port 623 */
5115		manc2h |= 1 << 6;  /* Mng Port 664 */
5116		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5117		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5118	}
5119}
5120
5121/*
5122 * Give control back to hardware management
5123 * controller if there is one.
5124 */
5125static void
5126igb_release_manageability(struct adapter *adapter)
5127{
5128	if (adapter->has_manage) {
5129		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5130
5131		/* re-enable hardware interception of ARP */
5132		manc |= E1000_MANC_ARP_EN;
5133		manc &= ~E1000_MANC_EN_MNG2HOST;
5134
5135		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5136	}
5137}
5138
5139/*
5140 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5141 * For ASF and Pass Through versions of f/w this means that
5142 * the driver is loaded.
5143 *
5144 */
5145static void
5146igb_get_hw_control(struct adapter *adapter)
5147{
5148	u32 ctrl_ext;
5149
5150	if (adapter->vf_ifp)
5151		return;
5152
5153	/* Let firmware know the driver has taken over */
5154	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5155	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5156	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5157}
5158
5159/*
5160 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5161 * For ASF and Pass Through versions of f/w this means that the
5162 * driver is no longer loaded.
5163 *
5164 */
5165static void
5166igb_release_hw_control(struct adapter *adapter)
5167{
5168	u32 ctrl_ext;
5169
5170	if (adapter->vf_ifp)
5171		return;
5172
5173	/* Let firmware taken over control of h/w */
5174	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5175	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5176	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5177}
5178
5179static int
5180igb_is_valid_ether_addr(uint8_t *addr)
5181{
5182	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5183
5184	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5185		return (FALSE);
5186	}
5187
5188	return (TRUE);
5189}
5190
5191
5192/*
5193 * Enable PCI Wake On Lan capability
5194 */
5195static void
5196igb_enable_wakeup(device_t dev)
5197{
5198	u16     cap, status;
5199	u8      id;
5200
5201	/* First find the capabilities pointer*/
5202	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5203	/* Read the PM Capabilities */
5204	id = pci_read_config(dev, cap, 1);
5205	if (id != PCIY_PMG)     /* Something wrong */
5206		return;
5207	/* OK, we have the power capabilities, so
5208	   now get the status register */
5209	cap += PCIR_POWER_STATUS;
5210	status = pci_read_config(dev, cap, 2);
5211	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5212	pci_write_config(dev, cap, status, 2);
5213	return;
5214}
5215
5216static void
5217igb_led_func(void *arg, int onoff)
5218{
5219	struct adapter	*adapter = arg;
5220
5221	IGB_CORE_LOCK(adapter);
5222	if (onoff) {
5223		e1000_setup_led(&adapter->hw);
5224		e1000_led_on(&adapter->hw);
5225	} else {
5226		e1000_led_off(&adapter->hw);
5227		e1000_cleanup_led(&adapter->hw);
5228	}
5229	IGB_CORE_UNLOCK(adapter);
5230}
5231
5232/**********************************************************************
5233 *
5234 *  Update the board statistics counters.
5235 *
5236 **********************************************************************/
5237static void
5238igb_update_stats_counters(struct adapter *adapter)
5239{
5240	struct ifnet		*ifp;
5241        struct e1000_hw		*hw = &adapter->hw;
5242	struct e1000_hw_stats	*stats;
5243
5244	/*
5245	** The virtual function adapter has only a
5246	** small controlled set of stats, do only
5247	** those and return.
5248	*/
5249	if (adapter->vf_ifp) {
5250		igb_update_vf_stats_counters(adapter);
5251		return;
5252	}
5253
5254	stats = (struct e1000_hw_stats	*)adapter->stats;
5255
5256	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5257	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5258		stats->symerrs +=
5259		    E1000_READ_REG(hw,E1000_SYMERRS);
5260		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5261	}
5262
5263	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5264	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5265	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5266	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5267
5268	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5269	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5270	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5271	stats->dc += E1000_READ_REG(hw, E1000_DC);
5272	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5273	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5274	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5275	/*
5276	** For watchdog management we need to know if we have been
5277	** paused during the last interval, so capture that here.
5278	*/
5279        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5280        stats->xoffrxc += adapter->pause_frames;
5281	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5282	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5283	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5284	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5285	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5286	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5287	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5288	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5289	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5290	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5291	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5292	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5293
5294	/* For the 64-bit byte counters the low dword must be read first. */
5295	/* Both registers clear on the read of the high dword */
5296
5297	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5298	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5299	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5300	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5301
5302	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5303	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5304	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5305	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5306	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5307
5308	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5309	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5310
5311	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5312	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5313	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5314	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5315	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5316	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5317	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5318	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5319	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5320	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5321
5322	/* Interrupt Counts */
5323
5324	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5325	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5326	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5327	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5328	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5329	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5330	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5331	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5332	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5333
5334	/* Host to Card Statistics */
5335
5336	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5337	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5338	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5339	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5340	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5341	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5342	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5343	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5344	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5345	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5346	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5347	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5348	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5349	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5350
5351	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5352	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5353	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5354	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5355	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5356	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5357
5358	ifp = adapter->ifp;
5359	ifp->if_collisions = stats->colc;
5360
5361	/* Rx Errors */
5362	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5363	    stats->crcerrs + stats->algnerrc +
5364	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5365
5366	/* Tx Errors */
5367	ifp->if_oerrors = stats->ecol +
5368	    stats->latecol + adapter->watchdog_events;
5369
5370	/* Driver specific counters */
5371	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5372	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5373	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5374	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5375	adapter->packet_buf_alloc_tx =
5376	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5377	adapter->packet_buf_alloc_rx =
5378	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5379}
5380
5381
5382/**********************************************************************
5383 *
5384 *  Initialize the VF board statistics counters.
5385 *
5386 **********************************************************************/
5387static void
5388igb_vf_init_stats(struct adapter *adapter)
5389{
5390        struct e1000_hw *hw = &adapter->hw;
5391	struct e1000_vf_stats	*stats;
5392
5393	stats = (struct e1000_vf_stats	*)adapter->stats;
5394	if (stats == NULL)
5395		return;
5396        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5397        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5398        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5399        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5400        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5401}
5402
5403/**********************************************************************
5404 *
5405 *  Update the VF board statistics counters.
5406 *
5407 **********************************************************************/
5408static void
5409igb_update_vf_stats_counters(struct adapter *adapter)
5410{
5411	struct e1000_hw *hw = &adapter->hw;
5412	struct e1000_vf_stats	*stats;
5413
5414	if (adapter->link_speed == 0)
5415		return;
5416
5417	stats = (struct e1000_vf_stats	*)adapter->stats;
5418
5419	UPDATE_VF_REG(E1000_VFGPRC,
5420	    stats->last_gprc, stats->gprc);
5421	UPDATE_VF_REG(E1000_VFGORC,
5422	    stats->last_gorc, stats->gorc);
5423	UPDATE_VF_REG(E1000_VFGPTC,
5424	    stats->last_gptc, stats->gptc);
5425	UPDATE_VF_REG(E1000_VFGOTC,
5426	    stats->last_gotc, stats->gotc);
5427	UPDATE_VF_REG(E1000_VFMPRC,
5428	    stats->last_mprc, stats->mprc);
5429}
5430
5431/* Export a single 32-bit register via a read-only sysctl. */
5432static int
5433igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5434{
5435	struct adapter *adapter;
5436	u_int val;
5437
5438	adapter = oidp->oid_arg1;
5439	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5440	return (sysctl_handle_int(oidp, &val, 0, req));
5441}
5442
5443/*
5444**  Tuneable interrupt rate handler
5445*/
5446static int
5447igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5448{
5449	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5450	int			error;
5451	u32			reg, usec, rate;
5452
5453	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5454	usec = ((reg & 0x7FFC) >> 2);
5455	if (usec > 0)
5456		rate = 1000000 / usec;
5457	else
5458		rate = 0;
5459	error = sysctl_handle_int(oidp, &rate, 0, req);
5460	if (error || !req->newptr)
5461		return error;
5462	return 0;
5463}
5464
5465/*
5466 * Add sysctl variables, one per statistic, to the system.
5467 */
5468static void
5469igb_add_hw_stats(struct adapter *adapter)
5470{
5471	device_t dev = adapter->dev;
5472
5473	struct tx_ring *txr = adapter->tx_rings;
5474	struct rx_ring *rxr = adapter->rx_rings;
5475
5476	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5477	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5478	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5479	struct e1000_hw_stats *stats = adapter->stats;
5480
5481	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5482	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5483
5484#define QUEUE_NAME_LEN 32
5485	char namebuf[QUEUE_NAME_LEN];
5486
5487	/* Driver Statistics */
5488	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5489			CTLFLAG_RD, &adapter->link_irq, 0,
5490			"Link MSIX IRQ Handled");
5491	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5492			CTLFLAG_RD, &adapter->dropped_pkts,
5493			"Driver dropped packets");
5494	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5495			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5496			"Driver tx dma failure in xmit");
5497	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5498			CTLFLAG_RD, &adapter->rx_overruns,
5499			"RX overruns");
5500	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5501			CTLFLAG_RD, &adapter->watchdog_events,
5502			"Watchdog timeouts");
5503
5504	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5505			CTLFLAG_RD, &adapter->device_control,
5506			"Device Control Register");
5507	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5508			CTLFLAG_RD, &adapter->rx_control,
5509			"Receiver Control Register");
5510	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5511			CTLFLAG_RD, &adapter->int_mask,
5512			"Interrupt Mask");
5513	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5514			CTLFLAG_RD, &adapter->eint_mask,
5515			"Extended Interrupt Mask");
5516	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5517			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5518			"Transmit Buffer Packet Allocation");
5519	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5520			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5521			"Receive Buffer Packet Allocation");
5522	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5523			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5524			"Flow Control High Watermark");
5525	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5526			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5527			"Flow Control Low Watermark");
5528
5529	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5530		struct lro_ctrl *lro = &rxr->lro;
5531
5532		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5533		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5534					    CTLFLAG_RD, NULL, "Queue Name");
5535		queue_list = SYSCTL_CHILDREN(queue_node);
5536
5537		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5538				CTLFLAG_RD, &adapter->queues[i],
5539				sizeof(&adapter->queues[i]),
5540				igb_sysctl_interrupt_rate_handler,
5541				"IU", "Interrupt Rate");
5542
5543		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5544				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5545				igb_sysctl_reg_handler, "IU",
5546 				"Transmit Descriptor Head");
5547		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5548				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5549				igb_sysctl_reg_handler, "IU",
5550 				"Transmit Descriptor Tail");
5551		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5552				CTLFLAG_RD, &txr->no_desc_avail,
5553				"Queue No Descriptor Available");
5554		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5555				CTLFLAG_RD, &txr->tx_packets,
5556				"Queue Packets Transmitted");
5557
5558		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5559				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5560				igb_sysctl_reg_handler, "IU",
5561				"Receive Descriptor Head");
5562		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5563				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5564				igb_sysctl_reg_handler, "IU",
5565				"Receive Descriptor Tail");
5566		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5567				CTLFLAG_RD, &rxr->rx_packets,
5568				"Queue Packets Received");
5569		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5570				CTLFLAG_RD, &rxr->rx_bytes,
5571				"Queue Bytes Received");
5572		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5573				CTLFLAG_RD, &lro->lro_queued, 0,
5574				"LRO Queued");
5575		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5576				CTLFLAG_RD, &lro->lro_flushed, 0,
5577				"LRO Flushed");
5578	}
5579
5580	/* MAC stats get their own sub node */
5581
5582	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5583				    CTLFLAG_RD, NULL, "MAC Statistics");
5584	stat_list = SYSCTL_CHILDREN(stat_node);
5585
5586	/*
5587	** VF adapter has a very limited set of stats
5588	** since its not managing the metal, so to speak.
5589	*/
5590	if (adapter->vf_ifp) {
5591	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5592			CTLFLAG_RD, &stats->gprc,
5593			"Good Packets Received");
5594	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5595			CTLFLAG_RD, &stats->gptc,
5596			"Good Packets Transmitted");
5597 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5598 			CTLFLAG_RD, &stats->gorc,
5599 			"Good Octets Received");
5600 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5601 			CTLFLAG_RD, &stats->gotc,
5602 			"Good Octets Transmitted");
5603	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5604			CTLFLAG_RD, &stats->mprc,
5605			"Multicast Packets Received");
5606		return;
5607	}
5608
5609	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5610			CTLFLAG_RD, &stats->ecol,
5611			"Excessive collisions");
5612	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5613			CTLFLAG_RD, &stats->scc,
5614			"Single collisions");
5615	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5616			CTLFLAG_RD, &stats->mcc,
5617			"Multiple collisions");
5618	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5619			CTLFLAG_RD, &stats->latecol,
5620			"Late collisions");
5621	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5622			CTLFLAG_RD, &stats->colc,
5623			"Collision Count");
5624	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5625			CTLFLAG_RD, &stats->symerrs,
5626			"Symbol Errors");
5627	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5628			CTLFLAG_RD, &stats->sec,
5629			"Sequence Errors");
5630	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5631			CTLFLAG_RD, &stats->dc,
5632			"Defer Count");
5633	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5634			CTLFLAG_RD, &stats->mpc,
5635			"Missed Packets");
5636	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5637			CTLFLAG_RD, &stats->rnbc,
5638			"Receive No Buffers");
5639	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5640			CTLFLAG_RD, &stats->ruc,
5641			"Receive Undersize");
5642	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5643			CTLFLAG_RD, &stats->rfc,
5644			"Fragmented Packets Received ");
5645	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5646			CTLFLAG_RD, &stats->roc,
5647			"Oversized Packets Received");
5648	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5649			CTLFLAG_RD, &stats->rjc,
5650			"Recevied Jabber");
5651	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5652			CTLFLAG_RD, &stats->rxerrc,
5653			"Receive Errors");
5654	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5655			CTLFLAG_RD, &stats->crcerrs,
5656			"CRC errors");
5657	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5658			CTLFLAG_RD, &stats->algnerrc,
5659			"Alignment Errors");
5660	/* On 82575 these are collision counts */
5661	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5662			CTLFLAG_RD, &stats->cexterr,
5663			"Collision/Carrier extension errors");
5664	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5665			CTLFLAG_RD, &stats->xonrxc,
5666			"XON Received");
5667	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5668			CTLFLAG_RD, &stats->xontxc,
5669			"XON Transmitted");
5670	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5671			CTLFLAG_RD, &stats->xoffrxc,
5672			"XOFF Received");
5673	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5674			CTLFLAG_RD, &stats->xofftxc,
5675			"XOFF Transmitted");
5676	/* Packet Reception Stats */
5677	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5678			CTLFLAG_RD, &stats->tpr,
5679			"Total Packets Received ");
5680	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5681			CTLFLAG_RD, &stats->gprc,
5682			"Good Packets Received");
5683	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5684			CTLFLAG_RD, &stats->bprc,
5685			"Broadcast Packets Received");
5686	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5687			CTLFLAG_RD, &stats->mprc,
5688			"Multicast Packets Received");
5689	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5690			CTLFLAG_RD, &stats->prc64,
5691			"64 byte frames received ");
5692	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5693			CTLFLAG_RD, &stats->prc127,
5694			"65-127 byte frames received");
5695	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5696			CTLFLAG_RD, &stats->prc255,
5697			"128-255 byte frames received");
5698	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5699			CTLFLAG_RD, &stats->prc511,
5700			"256-511 byte frames received");
5701	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5702			CTLFLAG_RD, &stats->prc1023,
5703			"512-1023 byte frames received");
5704	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5705			CTLFLAG_RD, &stats->prc1522,
5706			"1023-1522 byte frames received");
5707 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5708 			CTLFLAG_RD, &stats->gorc,
5709 			"Good Octets Received");
5710
5711	/* Packet Transmission Stats */
5712 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5713 			CTLFLAG_RD, &stats->gotc,
5714 			"Good Octets Transmitted");
5715	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5716			CTLFLAG_RD, &stats->tpt,
5717			"Total Packets Transmitted");
5718	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5719			CTLFLAG_RD, &stats->gptc,
5720			"Good Packets Transmitted");
5721	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5722			CTLFLAG_RD, &stats->bptc,
5723			"Broadcast Packets Transmitted");
5724	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5725			CTLFLAG_RD, &stats->mptc,
5726			"Multicast Packets Transmitted");
5727	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5728			CTLFLAG_RD, &stats->ptc64,
5729			"64 byte frames transmitted ");
5730	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5731			CTLFLAG_RD, &stats->ptc127,
5732			"65-127 byte frames transmitted");
5733	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5734			CTLFLAG_RD, &stats->ptc255,
5735			"128-255 byte frames transmitted");
5736	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5737			CTLFLAG_RD, &stats->ptc511,
5738			"256-511 byte frames transmitted");
5739	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5740			CTLFLAG_RD, &stats->ptc1023,
5741			"512-1023 byte frames transmitted");
5742	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5743			CTLFLAG_RD, &stats->ptc1522,
5744			"1024-1522 byte frames transmitted");
5745	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5746			CTLFLAG_RD, &stats->tsctc,
5747			"TSO Contexts Transmitted");
5748	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5749			CTLFLAG_RD, &stats->tsctfc,
5750			"TSO Contexts Failed");
5751
5752
5753	/* Interrupt Stats */
5754
5755	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5756				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5757	int_list = SYSCTL_CHILDREN(int_node);
5758
5759	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5760			CTLFLAG_RD, &stats->iac,
5761			"Interrupt Assertion Count");
5762
5763	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5764			CTLFLAG_RD, &stats->icrxptc,
5765			"Interrupt Cause Rx Pkt Timer Expire Count");
5766
5767	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5768			CTLFLAG_RD, &stats->icrxatc,
5769			"Interrupt Cause Rx Abs Timer Expire Count");
5770
5771	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5772			CTLFLAG_RD, &stats->ictxptc,
5773			"Interrupt Cause Tx Pkt Timer Expire Count");
5774
5775	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5776			CTLFLAG_RD, &stats->ictxatc,
5777			"Interrupt Cause Tx Abs Timer Expire Count");
5778
5779	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5780			CTLFLAG_RD, &stats->ictxqec,
5781			"Interrupt Cause Tx Queue Empty Count");
5782
5783	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5784			CTLFLAG_RD, &stats->ictxqmtc,
5785			"Interrupt Cause Tx Queue Min Thresh Count");
5786
5787	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5788			CTLFLAG_RD, &stats->icrxdmtc,
5789			"Interrupt Cause Rx Desc Min Thresh Count");
5790
5791	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5792			CTLFLAG_RD, &stats->icrxoc,
5793			"Interrupt Cause Receiver Overrun Count");
5794
5795	/* Host to Card Stats */
5796
5797	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5798				    CTLFLAG_RD, NULL,
5799				    "Host to Card Statistics");
5800
5801	host_list = SYSCTL_CHILDREN(host_node);
5802
5803	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5804			CTLFLAG_RD, &stats->cbtmpc,
5805			"Circuit Breaker Tx Packet Count");
5806
5807	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5808			CTLFLAG_RD, &stats->htdpmc,
5809			"Host Transmit Discarded Packets");
5810
5811	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5812			CTLFLAG_RD, &stats->rpthc,
5813			"Rx Packets To Host");
5814
5815	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5816			CTLFLAG_RD, &stats->cbrmpc,
5817			"Circuit Breaker Rx Packet Count");
5818
5819	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5820			CTLFLAG_RD, &stats->cbrdpc,
5821			"Circuit Breaker Rx Dropped Count");
5822
5823	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5824			CTLFLAG_RD, &stats->hgptc,
5825			"Host Good Packets Tx Count");
5826
5827	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5828			CTLFLAG_RD, &stats->htcbdpc,
5829			"Host Tx Circuit Breaker Dropped Count");
5830
5831	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5832			CTLFLAG_RD, &stats->hgorc,
5833			"Host Good Octets Received Count");
5834
5835	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5836			CTLFLAG_RD, &stats->hgotc,
5837			"Host Good Octets Transmit Count");
5838
5839	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5840			CTLFLAG_RD, &stats->lenerrs,
5841			"Length Errors");
5842
5843	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5844			CTLFLAG_RD, &stats->scvpc,
5845			"SerDes/SGMII Code Violation Pkt Count");
5846
5847	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5848			CTLFLAG_RD, &stats->hrmpc,
5849			"Header Redirection Missed Packet Count");
5850}
5851
5852
5853/**********************************************************************
5854 *
5855 *  This routine provides a way to dump out the adapter eeprom,
5856 *  often a useful debug/service tool. This only dumps the first
5857 *  32 words, stuff that matters is in that extent.
5858 *
5859 **********************************************************************/
5860static int
5861igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5862{
5863	struct adapter *adapter;
5864	int error;
5865	int result;
5866
5867	result = -1;
5868	error = sysctl_handle_int(oidp, &result, 0, req);
5869
5870	if (error || !req->newptr)
5871		return (error);
5872
5873	/*
5874	 * This value will cause a hex dump of the
5875	 * first 32 16-bit words of the EEPROM to
5876	 * the screen.
5877	 */
5878	if (result == 1) {
5879		adapter = (struct adapter *)arg1;
5880		igb_print_nvm_info(adapter);
5881        }
5882
5883	return (error);
5884}
5885
5886static void
5887igb_print_nvm_info(struct adapter *adapter)
5888{
5889	u16	eeprom_data;
5890	int	i, j, row = 0;
5891
5892	/* Its a bit crude, but it gets the job done */
5893	printf("\nInterface EEPROM Dump:\n");
5894	printf("Offset\n0x0000  ");
5895	for (i = 0, j = 0; i < 32; i++, j++) {
5896		if (j == 8) { /* Make the offset block */
5897			j = 0; ++row;
5898			printf("\n0x00%x0  ",row);
5899		}
5900		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5901		printf("%04x ", eeprom_data);
5902	}
5903	printf("\n");
5904}
5905
5906static void
5907igb_set_sysctl_value(struct adapter *adapter, const char *name,
5908	const char *description, int *limit, int value)
5909{
5910	*limit = value;
5911	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5912	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5913	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5914}
5915
5916/*
5917** Set flow control using sysctl:
5918** Flow control values:
5919** 	0 - off
5920**	1 - rx pause
5921**	2 - tx pause
5922**	3 - full
5923*/
5924static int
5925igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5926{
5927	int		error;
5928	static int	input = 3; /* default is full */
5929	struct adapter	*adapter = (struct adapter *) arg1;
5930
5931	error = sysctl_handle_int(oidp, &input, 0, req);
5932
5933	if ((error) || (req->newptr == NULL))
5934		return (error);
5935
5936	switch (input) {
5937		case e1000_fc_rx_pause:
5938		case e1000_fc_tx_pause:
5939		case e1000_fc_full:
5940		case e1000_fc_none:
5941			adapter->hw.fc.requested_mode = input;
5942			adapter->fc = input;
5943			break;
5944		default:
5945			/* Do nothing */
5946			return (error);
5947	}
5948
5949	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5950	e1000_force_mac_fc(&adapter->hw);
5951	return (error);
5952}
5953
5954/*
5955** Manage DMA Coalesce:
5956** Control values:
5957** 	0/1 - off/on
5958**	Legal timer values are:
5959**	250,500,1000-10000 in thousands
5960*/
5961static int
5962igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5963{
5964	struct adapter *adapter = (struct adapter *) arg1;
5965	int		error;
5966
5967	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5968
5969	if ((error) || (req->newptr == NULL))
5970		return (error);
5971
5972	switch (adapter->dmac) {
5973		case 0:
5974			/*Disabling */
5975			break;
5976		case 1: /* Just enable and use default */
5977			adapter->dmac = 1000;
5978			break;
5979		case 250:
5980		case 500:
5981		case 1000:
5982		case 2000:
5983		case 3000:
5984		case 4000:
5985		case 5000:
5986		case 6000:
5987		case 7000:
5988		case 8000:
5989		case 9000:
5990		case 10000:
5991			/* Legal values - allow */
5992			break;
5993		default:
5994			/* Do nothing, illegal value */
5995			adapter->dmac = 0;
5996			return (error);
5997	}
5998	/* Reinit the interface */
5999	igb_init(adapter);
6000	return (error);
6001}
6002
6003/*
6004** Manage Energy Efficient Ethernet:
6005** Control values:
6006**     0/1 - enabled/disabled
6007*/
6008static int
6009igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6010{
6011	struct adapter	*adapter = (struct adapter *) arg1;
6012	int		error, value;
6013
6014	value = adapter->hw.dev_spec._82575.eee_disable;
6015	error = sysctl_handle_int(oidp, &value, 0, req);
6016	if (error || req->newptr == NULL)
6017		return (error);
6018	IGB_CORE_LOCK(adapter);
6019	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6020	igb_init_locked(adapter);
6021	IGB_CORE_UNLOCK(adapter);
6022	return (0);
6023}
6024