if_igb.c revision 249074
1/******************************************************************************
2
3  Copyright (c) 2001-2013, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 249074 2013-04-03 23:39:54Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#ifndef IGB_LEGACY_TX
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.3.10";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_I210_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
154	{ 0x8086, E1000_DEV_ID_I210_COPPER_IT,	PCI_ANY_ID, PCI_ANY_ID, 0},
155	{ 0x8086, E1000_DEV_ID_I210_COPPER_OEM1,
156						PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_I210_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_I210_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
159	{ 0x8086, E1000_DEV_ID_I210_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
160	{ 0x8086, E1000_DEV_ID_I211_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
161	/* required last entry */
162	{ 0, 0, 0, 0, 0}
163};
164
165/*********************************************************************
166 *  Table of branding strings for all supported NICs.
167 *********************************************************************/
168
169static char *igb_strings[] = {
170	"Intel(R) PRO/1000 Network Connection"
171};
172
173/*********************************************************************
174 *  Function prototypes
175 *********************************************************************/
176static int	igb_probe(device_t);
177static int	igb_attach(device_t);
178static int	igb_detach(device_t);
179static int	igb_shutdown(device_t);
180static int	igb_suspend(device_t);
181static int	igb_resume(device_t);
182#ifndef IGB_LEGACY_TX
183static int	igb_mq_start(struct ifnet *, struct mbuf *);
184static int	igb_mq_start_locked(struct ifnet *, struct tx_ring *);
185static void	igb_qflush(struct ifnet *);
186static void	igb_deferred_mq_start(void *, int);
187#else
188static void	igb_start(struct ifnet *);
189static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
190#endif
191static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
192static void	igb_init(void *);
193static void	igb_init_locked(struct adapter *);
194static void	igb_stop(void *);
195static void	igb_media_status(struct ifnet *, struct ifmediareq *);
196static int	igb_media_change(struct ifnet *);
197static void	igb_identify_hardware(struct adapter *);
198static int	igb_allocate_pci_resources(struct adapter *);
199static int	igb_allocate_msix(struct adapter *);
200static int	igb_allocate_legacy(struct adapter *);
201static int	igb_setup_msix(struct adapter *);
202static void	igb_free_pci_resources(struct adapter *);
203static void	igb_local_timer(void *);
204static void	igb_reset(struct adapter *);
205static int	igb_setup_interface(device_t, struct adapter *);
206static int	igb_allocate_queues(struct adapter *);
207static void	igb_configure_queues(struct adapter *);
208
209static int	igb_allocate_transmit_buffers(struct tx_ring *);
210static void	igb_setup_transmit_structures(struct adapter *);
211static void	igb_setup_transmit_ring(struct tx_ring *);
212static void	igb_initialize_transmit_units(struct adapter *);
213static void	igb_free_transmit_structures(struct adapter *);
214static void	igb_free_transmit_buffers(struct tx_ring *);
215
216static int	igb_allocate_receive_buffers(struct rx_ring *);
217static int	igb_setup_receive_structures(struct adapter *);
218static int	igb_setup_receive_ring(struct rx_ring *);
219static void	igb_initialize_receive_units(struct adapter *);
220static void	igb_free_receive_structures(struct adapter *);
221static void	igb_free_receive_buffers(struct rx_ring *);
222static void	igb_free_receive_ring(struct rx_ring *);
223
224static void	igb_enable_intr(struct adapter *);
225static void	igb_disable_intr(struct adapter *);
226static void	igb_update_stats_counters(struct adapter *);
227static bool	igb_txeof(struct tx_ring *);
228
229static __inline	void igb_rx_discard(struct rx_ring *, int);
230static __inline void igb_rx_input(struct rx_ring *,
231		    struct ifnet *, struct mbuf *, u32);
232
233static bool	igb_rxeof(struct igb_queue *, int, int *);
234static void	igb_rx_checksum(u32, struct mbuf *, u32);
235static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
236static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
237		    struct ip *, struct tcphdr *);
238static void	igb_set_promisc(struct adapter *);
239static void	igb_disable_promisc(struct adapter *);
240static void	igb_set_multi(struct adapter *);
241static void	igb_update_link_status(struct adapter *);
242static void	igb_refresh_mbufs(struct rx_ring *, int);
243
244static void	igb_register_vlan(void *, struct ifnet *, u16);
245static void	igb_unregister_vlan(void *, struct ifnet *, u16);
246static void	igb_setup_vlan_hw_support(struct adapter *);
247
248static int	igb_xmit(struct tx_ring *, struct mbuf **);
249static int	igb_dma_malloc(struct adapter *, bus_size_t,
250		    struct igb_dma_alloc *, int);
251static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
252static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
253static void	igb_print_nvm_info(struct adapter *);
254static int 	igb_is_valid_ether_addr(u8 *);
255static void     igb_add_hw_stats(struct adapter *);
256
257static void	igb_vf_init_stats(struct adapter *);
258static void	igb_update_vf_stats_counters(struct adapter *);
259
260/* Management and WOL Support */
261static void	igb_init_manageability(struct adapter *);
262static void	igb_release_manageability(struct adapter *);
263static void     igb_get_hw_control(struct adapter *);
264static void     igb_release_hw_control(struct adapter *);
265static void     igb_enable_wakeup(device_t);
266static void     igb_led_func(void *, int);
267
268static int	igb_irq_fast(void *);
269static void	igb_msix_que(void *);
270static void	igb_msix_link(void *);
271static void	igb_handle_que(void *context, int pending);
272static void	igb_handle_link(void *context, int pending);
273static void	igb_handle_link_locked(struct adapter *);
274
275static void	igb_set_sysctl_value(struct adapter *, const char *,
276		    const char *, int *, int);
277static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
278static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
279static int	igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
280
281#ifdef DEVICE_POLLING
282static poll_handler_t igb_poll;
283#endif /* POLLING */
284
285/*********************************************************************
286 *  FreeBSD Device Interface Entry Points
287 *********************************************************************/
288
289static device_method_t igb_methods[] = {
290	/* Device interface */
291	DEVMETHOD(device_probe, igb_probe),
292	DEVMETHOD(device_attach, igb_attach),
293	DEVMETHOD(device_detach, igb_detach),
294	DEVMETHOD(device_shutdown, igb_shutdown),
295	DEVMETHOD(device_suspend, igb_suspend),
296	DEVMETHOD(device_resume, igb_resume),
297	DEVMETHOD_END
298};
299
300static driver_t igb_driver = {
301	"igb", igb_methods, sizeof(struct adapter),
302};
303
304static devclass_t igb_devclass;
305DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
306MODULE_DEPEND(igb, pci, 1, 1, 1);
307MODULE_DEPEND(igb, ether, 1, 1, 1);
308
309/*********************************************************************
310 *  Tunable default values.
311 *********************************************************************/
312
313static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
314
315/* Descriptor defaults */
316static int igb_rxd = IGB_DEFAULT_RXD;
317static int igb_txd = IGB_DEFAULT_TXD;
318TUNABLE_INT("hw.igb.rxd", &igb_rxd);
319TUNABLE_INT("hw.igb.txd", &igb_txd);
320SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
321    "Number of receive descriptors per queue");
322SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
323    "Number of transmit descriptors per queue");
324
325/*
326** AIM: Adaptive Interrupt Moderation
327** which means that the interrupt rate
328** is varied over time based on the
329** traffic for that interrupt vector
330*/
331static int igb_enable_aim = TRUE;
332TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
333SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
334    "Enable adaptive interrupt moderation");
335
336/*
337 * MSIX should be the default for best performance,
338 * but this allows it to be forced off for testing.
339 */
340static int igb_enable_msix = 1;
341TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
342SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
343    "Enable MSI-X interrupts");
344
345/*
346** Tuneable Interrupt rate
347*/
348static int igb_max_interrupt_rate = 8000;
349TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
350SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
351    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
352
353#if __FreeBSD_version >= 800000
354/*
355** Tuneable number of buffers in the buf-ring (drbr_xxx)
356*/
357static int igb_buf_ring_size = IGB_BR_SIZE;
358TUNABLE_INT("hw.igb.buf_ring_size", &igb_buf_ring_size);
359SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
360    &igb_buf_ring_size, 0, "Size of the bufring");
361#endif
362
363/*
364** Header split causes the packet header to
365** be dma'd to a seperate mbuf from the payload.
366** this can have memory alignment benefits. But
367** another plus is that small packets often fit
368** into the header and thus use no cluster. Its
369** a very workload dependent type feature.
370*/
371static int igb_header_split = FALSE;
372TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
373SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
374    "Enable receive mbuf header split");
375
376/*
377** This will autoconfigure based on the
378** number of CPUs and max supported
379** MSIX messages if left at 0.
380*/
381static int igb_num_queues = 0;
382TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
383SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
384    "Number of queues to configure, 0 indicates autoconfigure");
385
386/*
387** Global variable to store last used CPU when binding queues
388** to CPUs in igb_allocate_msix.  Starts at CPU_FIRST and increments when a
389** queue is bound to a cpu.
390*/
391static int igb_last_bind_cpu = -1;
392
393/* How many packets rxeof tries to clean at a time */
394static int igb_rx_process_limit = 100;
395TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
396SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
397    &igb_rx_process_limit, 0,
398    "Maximum number of received packets to process at a time, -1 means unlimited");
399
400#ifdef DEV_NETMAP	/* see ixgbe.c for details */
401#include <dev/netmap/if_igb_netmap.h>
402#endif /* DEV_NETMAP */
403/*********************************************************************
404 *  Device identification routine
405 *
406 *  igb_probe determines if the driver should be loaded on
407 *  adapter based on PCI vendor/device id of the adapter.
408 *
409 *  return BUS_PROBE_DEFAULT on success, positive on failure
410 *********************************************************************/
411
412static int
413igb_probe(device_t dev)
414{
415	char		adapter_name[60];
416	uint16_t	pci_vendor_id = 0;
417	uint16_t	pci_device_id = 0;
418	uint16_t	pci_subvendor_id = 0;
419	uint16_t	pci_subdevice_id = 0;
420	igb_vendor_info_t *ent;
421
422	INIT_DEBUGOUT("igb_probe: begin");
423
424	pci_vendor_id = pci_get_vendor(dev);
425	if (pci_vendor_id != IGB_VENDOR_ID)
426		return (ENXIO);
427
428	pci_device_id = pci_get_device(dev);
429	pci_subvendor_id = pci_get_subvendor(dev);
430	pci_subdevice_id = pci_get_subdevice(dev);
431
432	ent = igb_vendor_info_array;
433	while (ent->vendor_id != 0) {
434		if ((pci_vendor_id == ent->vendor_id) &&
435		    (pci_device_id == ent->device_id) &&
436
437		    ((pci_subvendor_id == ent->subvendor_id) ||
438		    (ent->subvendor_id == PCI_ANY_ID)) &&
439
440		    ((pci_subdevice_id == ent->subdevice_id) ||
441		    (ent->subdevice_id == PCI_ANY_ID))) {
442			sprintf(adapter_name, "%s %s",
443				igb_strings[ent->index],
444				igb_driver_version);
445			device_set_desc_copy(dev, adapter_name);
446			return (BUS_PROBE_DEFAULT);
447		}
448		ent++;
449	}
450
451	return (ENXIO);
452}
453
454/*********************************************************************
455 *  Device initialization routine
456 *
457 *  The attach entry point is called when the driver is being loaded.
458 *  This routine identifies the type of hardware, allocates all resources
459 *  and initializes the hardware.
460 *
461 *  return 0 on success, positive on failure
462 *********************************************************************/
463
464static int
465igb_attach(device_t dev)
466{
467	struct adapter	*adapter;
468	int		error = 0;
469	u16		eeprom_data;
470
471	INIT_DEBUGOUT("igb_attach: begin");
472
473	if (resource_disabled("igb", device_get_unit(dev))) {
474		device_printf(dev, "Disabled by device hint\n");
475		return (ENXIO);
476	}
477
478	adapter = device_get_softc(dev);
479	adapter->dev = adapter->osdep.dev = dev;
480	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
481
482	/* SYSCTL stuff */
483	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
484	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
485	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
486	    igb_sysctl_nvm_info, "I", "NVM Information");
487
488	igb_set_sysctl_value(adapter, "enable_aim",
489	    "Interrupt Moderation", &adapter->enable_aim,
490	    igb_enable_aim);
491
492	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
493	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
494	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
495	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
496
497	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
498
499	/* Determine hardware and mac info */
500	igb_identify_hardware(adapter);
501
502	/* Setup PCI resources */
503	if (igb_allocate_pci_resources(adapter)) {
504		device_printf(dev, "Allocation of PCI resources failed\n");
505		error = ENXIO;
506		goto err_pci;
507	}
508
509	/* Do Shared Code initialization */
510	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
511		device_printf(dev, "Setup of Shared code failed\n");
512		error = ENXIO;
513		goto err_pci;
514	}
515
516	e1000_get_bus_info(&adapter->hw);
517
518	/* Sysctl for limiting the amount of work done in the taskqueue */
519	igb_set_sysctl_value(adapter, "rx_processing_limit",
520	    "max number of rx packets to process",
521	    &adapter->rx_process_limit, igb_rx_process_limit);
522
523	/*
524	 * Validate number of transmit and receive descriptors. It
525	 * must not exceed hardware maximum, and must be multiple
526	 * of E1000_DBA_ALIGN.
527	 */
528	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
529	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
530		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
531		    IGB_DEFAULT_TXD, igb_txd);
532		adapter->num_tx_desc = IGB_DEFAULT_TXD;
533	} else
534		adapter->num_tx_desc = igb_txd;
535	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
536	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
537		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
538		    IGB_DEFAULT_RXD, igb_rxd);
539		adapter->num_rx_desc = IGB_DEFAULT_RXD;
540	} else
541		adapter->num_rx_desc = igb_rxd;
542
543	adapter->hw.mac.autoneg = DO_AUTO_NEG;
544	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
545	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
546
547	/* Copper options */
548	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
549		adapter->hw.phy.mdix = AUTO_ALL_MODES;
550		adapter->hw.phy.disable_polarity_correction = FALSE;
551		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
552	}
553
554	/*
555	 * Set the frame limits assuming
556	 * standard ethernet sized frames.
557	 */
558	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
559	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
560
561	/*
562	** Allocate and Setup Queues
563	*/
564	if (igb_allocate_queues(adapter)) {
565		error = ENOMEM;
566		goto err_pci;
567	}
568
569	/* Allocate the appropriate stats memory */
570	if (adapter->vf_ifp) {
571		adapter->stats =
572		    (struct e1000_vf_stats *)malloc(sizeof \
573		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
574		igb_vf_init_stats(adapter);
575	} else
576		adapter->stats =
577		    (struct e1000_hw_stats *)malloc(sizeof \
578		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
579	if (adapter->stats == NULL) {
580		device_printf(dev, "Can not allocate stats memory\n");
581		error = ENOMEM;
582		goto err_late;
583	}
584
585	/* Allocate multicast array memory. */
586	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
587	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
588	if (adapter->mta == NULL) {
589		device_printf(dev, "Can not allocate multicast setup array\n");
590		error = ENOMEM;
591		goto err_late;
592	}
593
594	/* Some adapter-specific advanced features */
595	if (adapter->hw.mac.type >= e1000_i350) {
596		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
597		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
598		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
599		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
600		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
601		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
602		    OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
603		    adapter, 0, igb_sysctl_eee, "I",
604		    "Disable Energy Efficient Ethernet");
605		if (adapter->hw.phy.media_type == e1000_media_type_copper)
606			e1000_set_eee_i350(&adapter->hw);
607	}
608
609	/*
610	** Start from a known state, this is
611	** important in reading the nvm and
612	** mac from that.
613	*/
614	e1000_reset_hw(&adapter->hw);
615
616	/* Make sure we have a good EEPROM before we read from it */
617	if (((adapter->hw.mac.type != e1000_i210) &&
618	    (adapter->hw.mac.type != e1000_i211)) &&
619	    (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
620		/*
621		** Some PCI-E parts fail the first check due to
622		** the link being in sleep state, call it again,
623		** if it fails a second time its a real issue.
624		*/
625		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
626			device_printf(dev,
627			    "The EEPROM Checksum Is Not Valid\n");
628			error = EIO;
629			goto err_late;
630		}
631	}
632
633	/*
634	** Copy the permanent MAC address out of the EEPROM
635	*/
636	if (e1000_read_mac_addr(&adapter->hw) < 0) {
637		device_printf(dev, "EEPROM read error while reading MAC"
638		    " address\n");
639		error = EIO;
640		goto err_late;
641	}
642	/* Check its sanity */
643	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
644		device_printf(dev, "Invalid MAC address\n");
645		error = EIO;
646		goto err_late;
647	}
648
649	/* Setup OS specific network interface */
650	if (igb_setup_interface(dev, adapter) != 0)
651		goto err_late;
652
653	/* Now get a good starting state */
654	igb_reset(adapter);
655
656	/* Initialize statistics */
657	igb_update_stats_counters(adapter);
658
659	adapter->hw.mac.get_link_status = 1;
660	igb_update_link_status(adapter);
661
662	/* Indicate SOL/IDER usage */
663	if (e1000_check_reset_block(&adapter->hw))
664		device_printf(dev,
665		    "PHY reset is blocked due to SOL/IDER session.\n");
666
667	/* Determine if we have to control management hardware */
668	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
669
670	/*
671	 * Setup Wake-on-Lan
672	 */
673	/* APME bit in EEPROM is mapped to WUC.APME */
674	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
675	if (eeprom_data)
676		adapter->wol = E1000_WUFC_MAG;
677
678	/* Register for VLAN events */
679	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
680	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
681	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
682	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
683
684	igb_add_hw_stats(adapter);
685
686	/* Tell the stack that the interface is not active */
687	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
688	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
689
690	adapter->led_dev = led_create(igb_led_func, adapter,
691	    device_get_nameunit(dev));
692
693	/*
694	** Configure Interrupts
695	*/
696	if ((adapter->msix > 1) && (igb_enable_msix))
697		error = igb_allocate_msix(adapter);
698	else /* MSI or Legacy */
699		error = igb_allocate_legacy(adapter);
700	if (error)
701		goto err_late;
702
703#ifdef DEV_NETMAP
704	igb_netmap_attach(adapter);
705#endif /* DEV_NETMAP */
706	INIT_DEBUGOUT("igb_attach: end");
707
708	return (0);
709
710err_late:
711	igb_detach(dev);
712	igb_free_transmit_structures(adapter);
713	igb_free_receive_structures(adapter);
714	igb_release_hw_control(adapter);
715err_pci:
716	igb_free_pci_resources(adapter);
717	if (adapter->ifp != NULL)
718		if_free(adapter->ifp);
719	free(adapter->mta, M_DEVBUF);
720	IGB_CORE_LOCK_DESTROY(adapter);
721
722	return (error);
723}
724
725/*********************************************************************
726 *  Device removal routine
727 *
728 *  The detach entry point is called when the driver is being removed.
729 *  This routine stops the adapter and deallocates all the resources
730 *  that were allocated for driver operation.
731 *
732 *  return 0 on success, positive on failure
733 *********************************************************************/
734
735static int
736igb_detach(device_t dev)
737{
738	struct adapter	*adapter = device_get_softc(dev);
739	struct ifnet	*ifp = adapter->ifp;
740
741	INIT_DEBUGOUT("igb_detach: begin");
742
743	/* Make sure VLANS are not using driver */
744	if (adapter->ifp->if_vlantrunk != NULL) {
745		device_printf(dev,"Vlan in use, detach first\n");
746		return (EBUSY);
747	}
748
749	ether_ifdetach(adapter->ifp);
750
751	if (adapter->led_dev != NULL)
752		led_destroy(adapter->led_dev);
753
754#ifdef DEVICE_POLLING
755	if (ifp->if_capenable & IFCAP_POLLING)
756		ether_poll_deregister(ifp);
757#endif
758
759	IGB_CORE_LOCK(adapter);
760	adapter->in_detach = 1;
761	igb_stop(adapter);
762	IGB_CORE_UNLOCK(adapter);
763
764	e1000_phy_hw_reset(&adapter->hw);
765
766	/* Give control back to firmware */
767	igb_release_manageability(adapter);
768	igb_release_hw_control(adapter);
769
770	if (adapter->wol) {
771		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
772		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
773		igb_enable_wakeup(dev);
774	}
775
776	/* Unregister VLAN events */
777	if (adapter->vlan_attach != NULL)
778		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
779	if (adapter->vlan_detach != NULL)
780		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
781
782	callout_drain(&adapter->timer);
783
784#ifdef DEV_NETMAP
785	netmap_detach(adapter->ifp);
786#endif /* DEV_NETMAP */
787	igb_free_pci_resources(adapter);
788	bus_generic_detach(dev);
789	if_free(ifp);
790
791	igb_free_transmit_structures(adapter);
792	igb_free_receive_structures(adapter);
793	if (adapter->mta != NULL)
794		free(adapter->mta, M_DEVBUF);
795
796	IGB_CORE_LOCK_DESTROY(adapter);
797
798	return (0);
799}
800
801/*********************************************************************
802 *
803 *  Shutdown entry point
804 *
805 **********************************************************************/
806
807static int
808igb_shutdown(device_t dev)
809{
810	return igb_suspend(dev);
811}
812
813/*
814 * Suspend/resume device methods.
815 */
816static int
817igb_suspend(device_t dev)
818{
819	struct adapter *adapter = device_get_softc(dev);
820
821	IGB_CORE_LOCK(adapter);
822
823	igb_stop(adapter);
824
825        igb_release_manageability(adapter);
826	igb_release_hw_control(adapter);
827
828        if (adapter->wol) {
829                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
830                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
831                igb_enable_wakeup(dev);
832        }
833
834	IGB_CORE_UNLOCK(adapter);
835
836	return bus_generic_suspend(dev);
837}
838
839static int
840igb_resume(device_t dev)
841{
842	struct adapter *adapter = device_get_softc(dev);
843	struct tx_ring	*txr = adapter->tx_rings;
844	struct ifnet *ifp = adapter->ifp;
845
846	IGB_CORE_LOCK(adapter);
847	igb_init_locked(adapter);
848	igb_init_manageability(adapter);
849
850	if ((ifp->if_flags & IFF_UP) &&
851	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
852		for (int i = 0; i < adapter->num_queues; i++, txr++) {
853			IGB_TX_LOCK(txr);
854#ifndef IGB_LEGACY_TX
855			/* Process the stack queue only if not depleted */
856			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
857			    !drbr_empty(ifp, txr->br))
858				igb_mq_start_locked(ifp, txr);
859#else
860			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
861				igb_start_locked(txr, ifp);
862#endif
863			IGB_TX_UNLOCK(txr);
864		}
865	}
866	IGB_CORE_UNLOCK(adapter);
867
868	return bus_generic_resume(dev);
869}
870
871
872#ifdef IGB_LEGACY_TX
873
874/*********************************************************************
875 *  Transmit entry point
876 *
877 *  igb_start is called by the stack to initiate a transmit.
878 *  The driver will remain in this routine as long as there are
879 *  packets to transmit and transmit resources are available.
880 *  In case resources are not available stack is notified and
881 *  the packet is requeued.
882 **********************************************************************/
883
884static void
885igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
886{
887	struct adapter	*adapter = ifp->if_softc;
888	struct mbuf	*m_head;
889
890	IGB_TX_LOCK_ASSERT(txr);
891
892	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
893	    IFF_DRV_RUNNING)
894		return;
895	if (!adapter->link_active)
896		return;
897
898	/* Call cleanup if number of TX descriptors low */
899	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
900		igb_txeof(txr);
901
902	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
903		if (txr->tx_avail <= IGB_MAX_SCATTER) {
904			txr->queue_status |= IGB_QUEUE_DEPLETED;
905			break;
906		}
907		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
908		if (m_head == NULL)
909			break;
910		/*
911		 *  Encapsulation can modify our pointer, and or make it
912		 *  NULL on failure.  In that event, we can't requeue.
913		 */
914		if (igb_xmit(txr, &m_head)) {
915			if (m_head != NULL)
916				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
917			if (txr->tx_avail <= IGB_MAX_SCATTER)
918				txr->queue_status |= IGB_QUEUE_DEPLETED;
919			break;
920		}
921
922		/* Send a copy of the frame to the BPF listener */
923		ETHER_BPF_MTAP(ifp, m_head);
924
925		/* Set watchdog on */
926		txr->watchdog_time = ticks;
927		txr->queue_status |= IGB_QUEUE_WORKING;
928	}
929}
930
931/*
932 * Legacy TX driver routine, called from the
933 * stack, always uses tx[0], and spins for it.
934 * Should not be used with multiqueue tx
935 */
936static void
937igb_start(struct ifnet *ifp)
938{
939	struct adapter	*adapter = ifp->if_softc;
940	struct tx_ring	*txr = adapter->tx_rings;
941
942	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
943		IGB_TX_LOCK(txr);
944		igb_start_locked(txr, ifp);
945		IGB_TX_UNLOCK(txr);
946	}
947	return;
948}
949
950#else /* ~IGB_LEGACY_TX */
951
952/*
953** Multiqueue Transmit Entry:
954**  quick turnaround to the stack
955**
956*/
957static int
958igb_mq_start(struct ifnet *ifp, struct mbuf *m)
959{
960	struct adapter		*adapter = ifp->if_softc;
961	struct igb_queue	*que;
962	struct tx_ring		*txr;
963	int 			i, err = 0;
964
965	/* Which queue to use */
966	if ((m->m_flags & M_FLOWID) != 0)
967		i = m->m_pkthdr.flowid % adapter->num_queues;
968	else
969		i = curcpu % adapter->num_queues;
970	txr = &adapter->tx_rings[i];
971	que = &adapter->queues[i];
972
973	err = drbr_enqueue(ifp, txr->br, m);
974	taskqueue_enqueue(que->tq, &txr->txq_task);
975
976	return (err);
977}
978
979static int
980igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
981{
982	struct adapter  *adapter = txr->adapter;
983        struct mbuf     *next;
984        int             err = 0, enq;
985
986	IGB_TX_LOCK_ASSERT(txr);
987
988	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
989	    adapter->link_active == 0)
990		return (ENETDOWN);
991
992	enq = 0;
993
994	/* Process the queue */
995	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
996		if ((err = igb_xmit(txr, &next)) != 0) {
997			if (next == NULL) {
998				/* It was freed, move forward */
999				drbr_advance(ifp, txr->br);
1000			} else {
1001				/*
1002				 * Still have one left, it may not be
1003				 * the same since the transmit function
1004				 * may have changed it.
1005				 */
1006				drbr_putback(ifp, txr->br, next);
1007			}
1008			break;
1009		}
1010		drbr_advance(ifp, txr->br);
1011		enq++;
1012		ifp->if_obytes += next->m_pkthdr.len;
1013		if (next->m_flags & M_MCAST)
1014			ifp->if_omcasts++;
1015		ETHER_BPF_MTAP(ifp, next);
1016		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1017			break;
1018	}
1019	if (enq > 0) {
1020		/* Set the watchdog */
1021		txr->queue_status |= IGB_QUEUE_WORKING;
1022		txr->watchdog_time = ticks;
1023	}
1024	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1025		igb_txeof(txr);
1026	if (txr->tx_avail <= IGB_MAX_SCATTER)
1027		txr->queue_status |= IGB_QUEUE_DEPLETED;
1028	return (err);
1029}
1030
1031/*
1032 * Called from a taskqueue to drain queued transmit packets.
1033 */
1034static void
1035igb_deferred_mq_start(void *arg, int pending)
1036{
1037	struct tx_ring *txr = arg;
1038	struct adapter *adapter = txr->adapter;
1039	struct ifnet *ifp = adapter->ifp;
1040
1041	IGB_TX_LOCK(txr);
1042	if (!drbr_empty(ifp, txr->br))
1043		igb_mq_start_locked(ifp, txr);
1044	IGB_TX_UNLOCK(txr);
1045}
1046
1047/*
1048** Flush all ring buffers
1049*/
1050static void
1051igb_qflush(struct ifnet *ifp)
1052{
1053	struct adapter	*adapter = ifp->if_softc;
1054	struct tx_ring	*txr = adapter->tx_rings;
1055	struct mbuf	*m;
1056
1057	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1058		IGB_TX_LOCK(txr);
1059		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1060			m_freem(m);
1061		IGB_TX_UNLOCK(txr);
1062	}
1063	if_qflush(ifp);
1064}
1065#endif /* ~IGB_LEGACY_TX */
1066
1067/*********************************************************************
1068 *  Ioctl entry point
1069 *
1070 *  igb_ioctl is called when the user wants to configure the
1071 *  interface.
1072 *
1073 *  return 0 on success, positive on failure
1074 **********************************************************************/
1075
1076static int
1077igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1078{
1079	struct adapter	*adapter = ifp->if_softc;
1080	struct ifreq	*ifr = (struct ifreq *)data;
1081#if defined(INET) || defined(INET6)
1082	struct ifaddr	*ifa = (struct ifaddr *)data;
1083#endif
1084	bool		avoid_reset = FALSE;
1085	int		error = 0;
1086
1087	if (adapter->in_detach)
1088		return (error);
1089
1090	switch (command) {
1091	case SIOCSIFADDR:
1092#ifdef INET
1093		if (ifa->ifa_addr->sa_family == AF_INET)
1094			avoid_reset = TRUE;
1095#endif
1096#ifdef INET6
1097		if (ifa->ifa_addr->sa_family == AF_INET6)
1098			avoid_reset = TRUE;
1099#endif
1100		/*
1101		** Calling init results in link renegotiation,
1102		** so we avoid doing it when possible.
1103		*/
1104		if (avoid_reset) {
1105			ifp->if_flags |= IFF_UP;
1106			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1107				igb_init(adapter);
1108#ifdef INET
1109			if (!(ifp->if_flags & IFF_NOARP))
1110				arp_ifinit(ifp, ifa);
1111#endif
1112		} else
1113			error = ether_ioctl(ifp, command, data);
1114		break;
1115	case SIOCSIFMTU:
1116	    {
1117		int max_frame_size;
1118
1119		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1120
1121		IGB_CORE_LOCK(adapter);
1122		max_frame_size = 9234;
1123		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1124		    ETHER_CRC_LEN) {
1125			IGB_CORE_UNLOCK(adapter);
1126			error = EINVAL;
1127			break;
1128		}
1129
1130		ifp->if_mtu = ifr->ifr_mtu;
1131		adapter->max_frame_size =
1132		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1133		igb_init_locked(adapter);
1134		IGB_CORE_UNLOCK(adapter);
1135		break;
1136	    }
1137	case SIOCSIFFLAGS:
1138		IOCTL_DEBUGOUT("ioctl rcv'd:\
1139		    SIOCSIFFLAGS (Set Interface Flags)");
1140		IGB_CORE_LOCK(adapter);
1141		if (ifp->if_flags & IFF_UP) {
1142			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1143				if ((ifp->if_flags ^ adapter->if_flags) &
1144				    (IFF_PROMISC | IFF_ALLMULTI)) {
1145					igb_disable_promisc(adapter);
1146					igb_set_promisc(adapter);
1147				}
1148			} else
1149				igb_init_locked(adapter);
1150		} else
1151			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1152				igb_stop(adapter);
1153		adapter->if_flags = ifp->if_flags;
1154		IGB_CORE_UNLOCK(adapter);
1155		break;
1156	case SIOCADDMULTI:
1157	case SIOCDELMULTI:
1158		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1159		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1160			IGB_CORE_LOCK(adapter);
1161			igb_disable_intr(adapter);
1162			igb_set_multi(adapter);
1163#ifdef DEVICE_POLLING
1164			if (!(ifp->if_capenable & IFCAP_POLLING))
1165#endif
1166				igb_enable_intr(adapter);
1167			IGB_CORE_UNLOCK(adapter);
1168		}
1169		break;
1170	case SIOCSIFMEDIA:
1171		/* Check SOL/IDER usage */
1172		IGB_CORE_LOCK(adapter);
1173		if (e1000_check_reset_block(&adapter->hw)) {
1174			IGB_CORE_UNLOCK(adapter);
1175			device_printf(adapter->dev, "Media change is"
1176			    " blocked due to SOL/IDER session.\n");
1177			break;
1178		}
1179		IGB_CORE_UNLOCK(adapter);
1180	case SIOCGIFMEDIA:
1181		IOCTL_DEBUGOUT("ioctl rcv'd: \
1182		    SIOCxIFMEDIA (Get/Set Interface Media)");
1183		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1184		break;
1185	case SIOCSIFCAP:
1186	    {
1187		int mask, reinit;
1188
1189		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1190		reinit = 0;
1191		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1192#ifdef DEVICE_POLLING
1193		if (mask & IFCAP_POLLING) {
1194			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1195				error = ether_poll_register(igb_poll, ifp);
1196				if (error)
1197					return (error);
1198				IGB_CORE_LOCK(adapter);
1199				igb_disable_intr(adapter);
1200				ifp->if_capenable |= IFCAP_POLLING;
1201				IGB_CORE_UNLOCK(adapter);
1202			} else {
1203				error = ether_poll_deregister(ifp);
1204				/* Enable interrupt even in error case */
1205				IGB_CORE_LOCK(adapter);
1206				igb_enable_intr(adapter);
1207				ifp->if_capenable &= ~IFCAP_POLLING;
1208				IGB_CORE_UNLOCK(adapter);
1209			}
1210		}
1211#endif
1212		if (mask & IFCAP_HWCSUM) {
1213			ifp->if_capenable ^= IFCAP_HWCSUM;
1214			reinit = 1;
1215		}
1216		if (mask & IFCAP_TSO4) {
1217			ifp->if_capenable ^= IFCAP_TSO4;
1218			reinit = 1;
1219		}
1220		if (mask & IFCAP_VLAN_HWTAGGING) {
1221			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1222			reinit = 1;
1223		}
1224		if (mask & IFCAP_VLAN_HWFILTER) {
1225			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1226			reinit = 1;
1227		}
1228		if (mask & IFCAP_VLAN_HWTSO) {
1229			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1230			reinit = 1;
1231		}
1232		if (mask & IFCAP_LRO) {
1233			ifp->if_capenable ^= IFCAP_LRO;
1234			reinit = 1;
1235		}
1236		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1237			igb_init(adapter);
1238		VLAN_CAPABILITIES(ifp);
1239		break;
1240	    }
1241
1242	default:
1243		error = ether_ioctl(ifp, command, data);
1244		break;
1245	}
1246
1247	return (error);
1248}
1249
1250
1251/*********************************************************************
1252 *  Init entry point
1253 *
1254 *  This routine is used in two ways. It is used by the stack as
1255 *  init entry point in network interface structure. It is also used
1256 *  by the driver as a hw/sw initialization routine to get to a
1257 *  consistent state.
1258 *
1259 *  return 0 on success, positive on failure
1260 **********************************************************************/
1261
1262static void
1263igb_init_locked(struct adapter *adapter)
1264{
1265	struct ifnet	*ifp = adapter->ifp;
1266	device_t	dev = adapter->dev;
1267
1268	INIT_DEBUGOUT("igb_init: begin");
1269
1270	IGB_CORE_LOCK_ASSERT(adapter);
1271
1272	igb_disable_intr(adapter);
1273	callout_stop(&adapter->timer);
1274
1275	/* Get the latest mac address, User can use a LAA */
1276        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1277              ETHER_ADDR_LEN);
1278
1279	/* Put the address into the Receive Address Array */
1280	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1281
1282	igb_reset(adapter);
1283	igb_update_link_status(adapter);
1284
1285	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1286
1287	/* Set hardware offload abilities */
1288	ifp->if_hwassist = 0;
1289	if (ifp->if_capenable & IFCAP_TXCSUM) {
1290		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1291#if __FreeBSD_version >= 800000
1292		if (adapter->hw.mac.type == e1000_82576)
1293			ifp->if_hwassist |= CSUM_SCTP;
1294#endif
1295	}
1296
1297	if (ifp->if_capenable & IFCAP_TSO4)
1298		ifp->if_hwassist |= CSUM_TSO;
1299
1300	/* Configure for OS presence */
1301	igb_init_manageability(adapter);
1302
1303	/* Prepare transmit descriptors and buffers */
1304	igb_setup_transmit_structures(adapter);
1305	igb_initialize_transmit_units(adapter);
1306
1307	/* Setup Multicast table */
1308	igb_set_multi(adapter);
1309
1310	/*
1311	** Figure out the desired mbuf pool
1312	** for doing jumbo/packetsplit
1313	*/
1314	if (adapter->max_frame_size <= 2048)
1315		adapter->rx_mbuf_sz = MCLBYTES;
1316	else if (adapter->max_frame_size <= 4096)
1317		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1318	else
1319		adapter->rx_mbuf_sz = MJUM9BYTES;
1320
1321	/* Prepare receive descriptors and buffers */
1322	if (igb_setup_receive_structures(adapter)) {
1323		device_printf(dev, "Could not setup receive structures\n");
1324		return;
1325	}
1326	igb_initialize_receive_units(adapter);
1327
1328        /* Enable VLAN support */
1329	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1330		igb_setup_vlan_hw_support(adapter);
1331
1332	/* Don't lose promiscuous settings */
1333	igb_set_promisc(adapter);
1334
1335	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1336	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1337
1338	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1339	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1340
1341	if (adapter->msix > 1) /* Set up queue routing */
1342		igb_configure_queues(adapter);
1343
1344	/* this clears any pending interrupts */
1345	E1000_READ_REG(&adapter->hw, E1000_ICR);
1346#ifdef DEVICE_POLLING
1347	/*
1348	 * Only enable interrupts if we are not polling, make sure
1349	 * they are off otherwise.
1350	 */
1351	if (ifp->if_capenable & IFCAP_POLLING)
1352		igb_disable_intr(adapter);
1353	else
1354#endif /* DEVICE_POLLING */
1355	{
1356		igb_enable_intr(adapter);
1357		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1358	}
1359
1360	/* Set Energy Efficient Ethernet */
1361	if (adapter->hw.phy.media_type == e1000_media_type_copper)
1362		e1000_set_eee_i350(&adapter->hw);
1363}
1364
1365static void
1366igb_init(void *arg)
1367{
1368	struct adapter *adapter = arg;
1369
1370	IGB_CORE_LOCK(adapter);
1371	igb_init_locked(adapter);
1372	IGB_CORE_UNLOCK(adapter);
1373}
1374
1375
1376static void
1377igb_handle_que(void *context, int pending)
1378{
1379	struct igb_queue *que = context;
1380	struct adapter *adapter = que->adapter;
1381	struct tx_ring *txr = que->txr;
1382	struct ifnet	*ifp = adapter->ifp;
1383
1384	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1385		bool	more;
1386
1387		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1388
1389		IGB_TX_LOCK(txr);
1390		igb_txeof(txr);
1391#ifndef IGB_LEGACY_TX
1392		/* Process the stack queue only if not depleted */
1393		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1394		    !drbr_empty(ifp, txr->br))
1395			igb_mq_start_locked(ifp, txr);
1396#else
1397		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1398			igb_start_locked(txr, ifp);
1399#endif
1400		IGB_TX_UNLOCK(txr);
1401		/* Do we need another? */
1402		if (more) {
1403			taskqueue_enqueue(que->tq, &que->que_task);
1404			return;
1405		}
1406	}
1407
1408#ifdef DEVICE_POLLING
1409	if (ifp->if_capenable & IFCAP_POLLING)
1410		return;
1411#endif
1412	/* Reenable this interrupt */
1413	if (que->eims)
1414		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1415	else
1416		igb_enable_intr(adapter);
1417}
1418
1419/* Deal with link in a sleepable context */
1420static void
1421igb_handle_link(void *context, int pending)
1422{
1423	struct adapter *adapter = context;
1424
1425	IGB_CORE_LOCK(adapter);
1426	igb_handle_link_locked(adapter);
1427	IGB_CORE_UNLOCK(adapter);
1428}
1429
1430static void
1431igb_handle_link_locked(struct adapter *adapter)
1432{
1433	struct tx_ring	*txr = adapter->tx_rings;
1434	struct ifnet *ifp = adapter->ifp;
1435
1436	IGB_CORE_LOCK_ASSERT(adapter);
1437	adapter->hw.mac.get_link_status = 1;
1438	igb_update_link_status(adapter);
1439	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1440		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1441			IGB_TX_LOCK(txr);
1442#ifndef IGB_LEGACY_TX
1443			/* Process the stack queue only if not depleted */
1444			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1445			    !drbr_empty(ifp, txr->br))
1446				igb_mq_start_locked(ifp, txr);
1447#else
1448			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1449				igb_start_locked(txr, ifp);
1450#endif
1451			IGB_TX_UNLOCK(txr);
1452		}
1453	}
1454}
1455
1456/*********************************************************************
1457 *
1458 *  MSI/Legacy Deferred
1459 *  Interrupt Service routine
1460 *
1461 *********************************************************************/
1462static int
1463igb_irq_fast(void *arg)
1464{
1465	struct adapter		*adapter = arg;
1466	struct igb_queue	*que = adapter->queues;
1467	u32			reg_icr;
1468
1469
1470	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1471
1472	/* Hot eject?  */
1473	if (reg_icr == 0xffffffff)
1474		return FILTER_STRAY;
1475
1476	/* Definitely not our interrupt.  */
1477	if (reg_icr == 0x0)
1478		return FILTER_STRAY;
1479
1480	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1481		return FILTER_STRAY;
1482
1483	/*
1484	 * Mask interrupts until the taskqueue is finished running.  This is
1485	 * cheap, just assume that it is needed.  This also works around the
1486	 * MSI message reordering errata on certain systems.
1487	 */
1488	igb_disable_intr(adapter);
1489	taskqueue_enqueue(que->tq, &que->que_task);
1490
1491	/* Link status change */
1492	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1493		taskqueue_enqueue(que->tq, &adapter->link_task);
1494
1495	if (reg_icr & E1000_ICR_RXO)
1496		adapter->rx_overruns++;
1497	return FILTER_HANDLED;
1498}
1499
1500#ifdef DEVICE_POLLING
1501#if __FreeBSD_version >= 800000
1502#define POLL_RETURN_COUNT(a) (a)
1503static int
1504#else
1505#define POLL_RETURN_COUNT(a)
1506static void
1507#endif
1508igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1509{
1510	struct adapter		*adapter = ifp->if_softc;
1511	struct igb_queue	*que;
1512	struct tx_ring		*txr;
1513	u32			reg_icr, rx_done = 0;
1514	u32			loop = IGB_MAX_LOOP;
1515	bool			more;
1516
1517	IGB_CORE_LOCK(adapter);
1518	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1519		IGB_CORE_UNLOCK(adapter);
1520		return POLL_RETURN_COUNT(rx_done);
1521	}
1522
1523	if (cmd == POLL_AND_CHECK_STATUS) {
1524		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1525		/* Link status change */
1526		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1527			igb_handle_link_locked(adapter);
1528
1529		if (reg_icr & E1000_ICR_RXO)
1530			adapter->rx_overruns++;
1531	}
1532	IGB_CORE_UNLOCK(adapter);
1533
1534	for (int i = 0; i < adapter->num_queues; i++) {
1535		que = &adapter->queues[i];
1536		txr = que->txr;
1537
1538		igb_rxeof(que, count, &rx_done);
1539
1540		IGB_TX_LOCK(txr);
1541		do {
1542			more = igb_txeof(txr);
1543		} while (loop-- && more);
1544#ifndef IGB_LEGACY_TX
1545		if (!drbr_empty(ifp, txr->br))
1546			igb_mq_start_locked(ifp, txr);
1547#else
1548		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1549			igb_start_locked(txr, ifp);
1550#endif
1551		IGB_TX_UNLOCK(txr);
1552	}
1553
1554	return POLL_RETURN_COUNT(rx_done);
1555}
1556#endif /* DEVICE_POLLING */
1557
1558/*********************************************************************
1559 *
1560 *  MSIX Que Interrupt Service routine
1561 *
1562 **********************************************************************/
1563static void
1564igb_msix_que(void *arg)
1565{
1566	struct igb_queue *que = arg;
1567	struct adapter *adapter = que->adapter;
1568	struct ifnet   *ifp = adapter->ifp;
1569	struct tx_ring *txr = que->txr;
1570	struct rx_ring *rxr = que->rxr;
1571	u32		newitr = 0;
1572	bool		more_rx;
1573
1574	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1575	++que->irqs;
1576
1577	IGB_TX_LOCK(txr);
1578	igb_txeof(txr);
1579#ifndef IGB_LEGACY_TX
1580	/* Process the stack queue only if not depleted */
1581	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1582	    !drbr_empty(ifp, txr->br))
1583		igb_mq_start_locked(ifp, txr);
1584#else
1585	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1586		igb_start_locked(txr, ifp);
1587#endif
1588	IGB_TX_UNLOCK(txr);
1589
1590	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1591
1592	if (adapter->enable_aim == FALSE)
1593		goto no_calc;
1594	/*
1595	** Do Adaptive Interrupt Moderation:
1596        **  - Write out last calculated setting
1597	**  - Calculate based on average size over
1598	**    the last interval.
1599	*/
1600        if (que->eitr_setting)
1601                E1000_WRITE_REG(&adapter->hw,
1602                    E1000_EITR(que->msix), que->eitr_setting);
1603
1604        que->eitr_setting = 0;
1605
1606        /* Idle, do nothing */
1607        if ((txr->bytes == 0) && (rxr->bytes == 0))
1608                goto no_calc;
1609
1610        /* Used half Default if sub-gig */
1611        if (adapter->link_speed != 1000)
1612                newitr = IGB_DEFAULT_ITR / 2;
1613        else {
1614		if ((txr->bytes) && (txr->packets))
1615                	newitr = txr->bytes/txr->packets;
1616		if ((rxr->bytes) && (rxr->packets))
1617			newitr = max(newitr,
1618			    (rxr->bytes / rxr->packets));
1619                newitr += 24; /* account for hardware frame, crc */
1620		/* set an upper boundary */
1621		newitr = min(newitr, 3000);
1622		/* Be nice to the mid range */
1623                if ((newitr > 300) && (newitr < 1200))
1624                        newitr = (newitr / 3);
1625                else
1626                        newitr = (newitr / 2);
1627        }
1628        newitr &= 0x7FFC;  /* Mask invalid bits */
1629        if (adapter->hw.mac.type == e1000_82575)
1630                newitr |= newitr << 16;
1631        else
1632                newitr |= E1000_EITR_CNT_IGNR;
1633
1634        /* save for next interrupt */
1635        que->eitr_setting = newitr;
1636
1637        /* Reset state */
1638        txr->bytes = 0;
1639        txr->packets = 0;
1640        rxr->bytes = 0;
1641        rxr->packets = 0;
1642
1643no_calc:
1644	/* Schedule a clean task if needed*/
1645	if (more_rx)
1646		taskqueue_enqueue(que->tq, &que->que_task);
1647	else
1648		/* Reenable this interrupt */
1649		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1650	return;
1651}
1652
1653
1654/*********************************************************************
1655 *
1656 *  MSIX Link Interrupt Service routine
1657 *
1658 **********************************************************************/
1659
1660static void
1661igb_msix_link(void *arg)
1662{
1663	struct adapter	*adapter = arg;
1664	u32       	icr;
1665
1666	++adapter->link_irq;
1667	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1668	if (!(icr & E1000_ICR_LSC))
1669		goto spurious;
1670	igb_handle_link(adapter, 0);
1671
1672spurious:
1673	/* Rearm */
1674	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1675	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1676	return;
1677}
1678
1679
1680/*********************************************************************
1681 *
1682 *  Media Ioctl callback
1683 *
1684 *  This routine is called whenever the user queries the status of
1685 *  the interface using ifconfig.
1686 *
1687 **********************************************************************/
1688static void
1689igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1690{
1691	struct adapter *adapter = ifp->if_softc;
1692
1693	INIT_DEBUGOUT("igb_media_status: begin");
1694
1695	IGB_CORE_LOCK(adapter);
1696	igb_update_link_status(adapter);
1697
1698	ifmr->ifm_status = IFM_AVALID;
1699	ifmr->ifm_active = IFM_ETHER;
1700
1701	if (!adapter->link_active) {
1702		IGB_CORE_UNLOCK(adapter);
1703		return;
1704	}
1705
1706	ifmr->ifm_status |= IFM_ACTIVE;
1707
1708	switch (adapter->link_speed) {
1709	case 10:
1710		ifmr->ifm_active |= IFM_10_T;
1711		break;
1712	case 100:
1713		/*
1714		** Support for 100Mb SFP - these are Fiber
1715		** but the media type appears as serdes
1716		*/
1717		if (adapter->hw.phy.media_type ==
1718		    e1000_media_type_internal_serdes)
1719			ifmr->ifm_active |= IFM_100_FX;
1720		else
1721			ifmr->ifm_active |= IFM_100_TX;
1722		break;
1723	case 1000:
1724		ifmr->ifm_active |= IFM_1000_T;
1725		break;
1726	}
1727
1728	if (adapter->link_duplex == FULL_DUPLEX)
1729		ifmr->ifm_active |= IFM_FDX;
1730	else
1731		ifmr->ifm_active |= IFM_HDX;
1732
1733	IGB_CORE_UNLOCK(adapter);
1734}
1735
1736/*********************************************************************
1737 *
1738 *  Media Ioctl callback
1739 *
1740 *  This routine is called when the user changes speed/duplex using
1741 *  media/mediopt option with ifconfig.
1742 *
1743 **********************************************************************/
1744static int
1745igb_media_change(struct ifnet *ifp)
1746{
1747	struct adapter *adapter = ifp->if_softc;
1748	struct ifmedia  *ifm = &adapter->media;
1749
1750	INIT_DEBUGOUT("igb_media_change: begin");
1751
1752	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1753		return (EINVAL);
1754
1755	IGB_CORE_LOCK(adapter);
1756	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1757	case IFM_AUTO:
1758		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1759		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1760		break;
1761	case IFM_1000_LX:
1762	case IFM_1000_SX:
1763	case IFM_1000_T:
1764		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1765		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1766		break;
1767	case IFM_100_TX:
1768		adapter->hw.mac.autoneg = FALSE;
1769		adapter->hw.phy.autoneg_advertised = 0;
1770		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1771			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1772		else
1773			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1774		break;
1775	case IFM_10_T:
1776		adapter->hw.mac.autoneg = FALSE;
1777		adapter->hw.phy.autoneg_advertised = 0;
1778		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1779			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1780		else
1781			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1782		break;
1783	default:
1784		device_printf(adapter->dev, "Unsupported media type\n");
1785	}
1786
1787	igb_init_locked(adapter);
1788	IGB_CORE_UNLOCK(adapter);
1789
1790	return (0);
1791}
1792
1793
1794/*********************************************************************
1795 *
1796 *  This routine maps the mbufs to Advanced TX descriptors.
1797 *
1798 **********************************************************************/
1799static int
1800igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1801{
1802	struct adapter		*adapter = txr->adapter;
1803	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1804	bus_dmamap_t		map;
1805	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1806	union e1000_adv_tx_desc	*txd = NULL;
1807	struct mbuf		*m_head = *m_headp;
1808	struct ether_vlan_header *eh = NULL;
1809	struct ip		*ip = NULL;
1810	struct tcphdr		*th = NULL;
1811	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1812	int			ehdrlen, poff;
1813	int			nsegs, i, first, last = 0;
1814	int			error, do_tso, remap = 1;
1815
1816	/* Set basic descriptor constants */
1817	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1818	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1819	if (m_head->m_flags & M_VLANTAG)
1820		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1821
1822retry:
1823	m_head = *m_headp;
1824	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1825	hdrlen = ehdrlen = poff = 0;
1826
1827	/*
1828	 * Intel recommends entire IP/TCP header length reside in a single
1829	 * buffer. If multiple descriptors are used to describe the IP and
1830	 * TCP header, each descriptor should describe one or more
1831	 * complete headers; descriptors referencing only parts of headers
1832	 * are not supported. If all layer headers are not coalesced into
1833	 * a single buffer, each buffer should not cross a 4KB boundary,
1834	 * or be larger than the maximum read request size.
1835	 * Controller also requires modifing IP/TCP header to make TSO work
1836	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1837	 * IP/TCP header into a single buffer to meet the requirement of
1838	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1839	 * which also has similiar restrictions.
1840	 */
1841	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1842		if (do_tso || (m_head->m_next != NULL &&
1843		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1844			if (M_WRITABLE(*m_headp) == 0) {
1845				m_head = m_dup(*m_headp, M_NOWAIT);
1846				m_freem(*m_headp);
1847				if (m_head == NULL) {
1848					*m_headp = NULL;
1849					return (ENOBUFS);
1850				}
1851				*m_headp = m_head;
1852			}
1853		}
1854		/*
1855		 * Assume IPv4, we don't have TSO/checksum offload support
1856		 * for IPv6 yet.
1857		 */
1858		ehdrlen = sizeof(struct ether_header);
1859		m_head = m_pullup(m_head, ehdrlen);
1860		if (m_head == NULL) {
1861			*m_headp = NULL;
1862			return (ENOBUFS);
1863		}
1864		eh = mtod(m_head, struct ether_vlan_header *);
1865		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1866			ehdrlen = sizeof(struct ether_vlan_header);
1867			m_head = m_pullup(m_head, ehdrlen);
1868			if (m_head == NULL) {
1869				*m_headp = NULL;
1870				return (ENOBUFS);
1871			}
1872		}
1873		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1874		if (m_head == NULL) {
1875			*m_headp = NULL;
1876			return (ENOBUFS);
1877		}
1878		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1879		poff = ehdrlen + (ip->ip_hl << 2);
1880		if (do_tso) {
1881			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1882			if (m_head == NULL) {
1883				*m_headp = NULL;
1884				return (ENOBUFS);
1885			}
1886			/*
1887			 * The pseudo TCP checksum does not include TCP payload
1888			 * length so driver should recompute the checksum here
1889			 * what hardware expect to see. This is adherence of
1890			 * Microsoft's Large Send specification.
1891			 */
1892			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1893			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1894			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1895			/* Keep track of the full header length */
1896			hdrlen = poff + (th->th_off << 2);
1897		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1898			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1899			if (m_head == NULL) {
1900				*m_headp = NULL;
1901				return (ENOBUFS);
1902			}
1903			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1904			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1905			if (m_head == NULL) {
1906				*m_headp = NULL;
1907				return (ENOBUFS);
1908			}
1909			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1910			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1911		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1912			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1913			if (m_head == NULL) {
1914				*m_headp = NULL;
1915				return (ENOBUFS);
1916			}
1917			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1918		}
1919		*m_headp = m_head;
1920	}
1921
1922	/*
1923	 * Map the packet for DMA
1924	 *
1925	 * Capture the first descriptor index,
1926	 * this descriptor will have the index
1927	 * of the EOP which is the only one that
1928	 * now gets a DONE bit writeback.
1929	 */
1930	first = txr->next_avail_desc;
1931	tx_buffer = &txr->tx_buffers[first];
1932	tx_buffer_mapped = tx_buffer;
1933	map = tx_buffer->map;
1934
1935	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1936	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1937
1938	/*
1939	 * There are two types of errors we can (try) to handle:
1940	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1941	 *   out of segments.  Defragment the mbuf chain and try again.
1942	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1943	 *   at this point in time.  Defer sending and try again later.
1944	 * All other errors, in particular EINVAL, are fatal and prevent the
1945	 * mbuf chain from ever going through.  Drop it and report error.
1946	 */
1947	if (error == EFBIG && remap) {
1948		struct mbuf *m;
1949
1950		m = m_defrag(*m_headp, M_NOWAIT);
1951		if (m == NULL) {
1952			adapter->mbuf_defrag_failed++;
1953			m_freem(*m_headp);
1954			*m_headp = NULL;
1955			return (ENOBUFS);
1956		}
1957		*m_headp = m;
1958
1959		/* Try it again, but only once */
1960		remap = 0;
1961		goto retry;
1962	} else if (error == ENOMEM) {
1963		adapter->no_tx_dma_setup++;
1964		return (error);
1965	} else if (error != 0) {
1966		adapter->no_tx_dma_setup++;
1967		m_freem(*m_headp);
1968		*m_headp = NULL;
1969		return (error);
1970	}
1971
1972	/*
1973	** Make sure we don't overrun the ring,
1974	** we need nsegs descriptors and one for
1975	** the context descriptor used for the
1976	** offloads.
1977	*/
1978        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1979                txr->no_desc_avail++;
1980		bus_dmamap_unload(txr->txtag, map);
1981		return (ENOBUFS);
1982        }
1983	m_head = *m_headp;
1984
1985	/* Do hardware assists:
1986         * Set up the context descriptor, used
1987         * when any hardware offload is done.
1988         * This includes CSUM, VLAN, and TSO.
1989         * It will use the first descriptor.
1990         */
1991
1992	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1993		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1994			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1995			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1996			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1997		} else
1998			return (ENXIO);
1999	} else if (igb_tx_ctx_setup(txr, m_head))
2000			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
2001
2002	/* Calculate payload length */
2003	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
2004	    << E1000_ADVTXD_PAYLEN_SHIFT);
2005
2006	/* 82575 needs the queue index added */
2007	if (adapter->hw.mac.type == e1000_82575)
2008		olinfo_status |= txr->me << 4;
2009
2010	/* Set up our transmit descriptors */
2011	i = txr->next_avail_desc;
2012	for (int j = 0; j < nsegs; j++) {
2013		bus_size_t seg_len;
2014		bus_addr_t seg_addr;
2015
2016		tx_buffer = &txr->tx_buffers[i];
2017		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
2018		seg_addr = segs[j].ds_addr;
2019		seg_len  = segs[j].ds_len;
2020
2021		txd->read.buffer_addr = htole64(seg_addr);
2022		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
2023		txd->read.olinfo_status = htole32(olinfo_status);
2024		last = i;
2025		if (++i == adapter->num_tx_desc)
2026			i = 0;
2027		tx_buffer->m_head = NULL;
2028		tx_buffer->next_eop = -1;
2029	}
2030
2031	txr->next_avail_desc = i;
2032	txr->tx_avail -= nsegs;
2033        tx_buffer->m_head = m_head;
2034
2035	/*
2036	** Here we swap the map so the last descriptor,
2037	** which gets the completion interrupt has the
2038	** real map, and the first descriptor gets the
2039	** unused map from this descriptor.
2040	*/
2041	tx_buffer_mapped->map = tx_buffer->map;
2042	tx_buffer->map = map;
2043        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2044
2045        /*
2046         * Last Descriptor of Packet
2047	 * needs End Of Packet (EOP)
2048	 * and Report Status (RS)
2049         */
2050        txd->read.cmd_type_len |=
2051	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2052	/*
2053	 * Keep track in the first buffer which
2054	 * descriptor will be written back
2055	 */
2056	tx_buffer = &txr->tx_buffers[first];
2057	tx_buffer->next_eop = last;
2058	/* Update the watchdog time early and often */
2059	txr->watchdog_time = ticks;
2060
2061	/*
2062	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2063	 * that this frame is available to transmit.
2064	 */
2065	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2066	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2067	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2068	++txr->tx_packets;
2069
2070	return (0);
2071}
2072static void
2073igb_set_promisc(struct adapter *adapter)
2074{
2075	struct ifnet	*ifp = adapter->ifp;
2076	struct e1000_hw *hw = &adapter->hw;
2077	u32		reg;
2078
2079	if (adapter->vf_ifp) {
2080		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2081		return;
2082	}
2083
2084	reg = E1000_READ_REG(hw, E1000_RCTL);
2085	if (ifp->if_flags & IFF_PROMISC) {
2086		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2087		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2088	} else if (ifp->if_flags & IFF_ALLMULTI) {
2089		reg |= E1000_RCTL_MPE;
2090		reg &= ~E1000_RCTL_UPE;
2091		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2092	}
2093}
2094
2095static void
2096igb_disable_promisc(struct adapter *adapter)
2097{
2098	struct e1000_hw *hw = &adapter->hw;
2099	struct ifnet	*ifp = adapter->ifp;
2100	u32		reg;
2101	int		mcnt = 0;
2102
2103	if (adapter->vf_ifp) {
2104		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2105		return;
2106	}
2107	reg = E1000_READ_REG(hw, E1000_RCTL);
2108	reg &=  (~E1000_RCTL_UPE);
2109	if (ifp->if_flags & IFF_ALLMULTI)
2110		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2111	else {
2112		struct  ifmultiaddr *ifma;
2113#if __FreeBSD_version < 800000
2114		IF_ADDR_LOCK(ifp);
2115#else
2116		if_maddr_rlock(ifp);
2117#endif
2118		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2119			if (ifma->ifma_addr->sa_family != AF_LINK)
2120				continue;
2121			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2122				break;
2123			mcnt++;
2124		}
2125#if __FreeBSD_version < 800000
2126		IF_ADDR_UNLOCK(ifp);
2127#else
2128		if_maddr_runlock(ifp);
2129#endif
2130	}
2131	/* Don't disable if in MAX groups */
2132	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2133		reg &=  (~E1000_RCTL_MPE);
2134	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2135}
2136
2137
2138/*********************************************************************
2139 *  Multicast Update
2140 *
2141 *  This routine is called whenever multicast address list is updated.
2142 *
2143 **********************************************************************/
2144
2145static void
2146igb_set_multi(struct adapter *adapter)
2147{
2148	struct ifnet	*ifp = adapter->ifp;
2149	struct ifmultiaddr *ifma;
2150	u32 reg_rctl = 0;
2151	u8  *mta;
2152
2153	int mcnt = 0;
2154
2155	IOCTL_DEBUGOUT("igb_set_multi: begin");
2156
2157	mta = adapter->mta;
2158	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2159	    MAX_NUM_MULTICAST_ADDRESSES);
2160
2161#if __FreeBSD_version < 800000
2162	IF_ADDR_LOCK(ifp);
2163#else
2164	if_maddr_rlock(ifp);
2165#endif
2166	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2167		if (ifma->ifma_addr->sa_family != AF_LINK)
2168			continue;
2169
2170		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2171			break;
2172
2173		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2174		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2175		mcnt++;
2176	}
2177#if __FreeBSD_version < 800000
2178	IF_ADDR_UNLOCK(ifp);
2179#else
2180	if_maddr_runlock(ifp);
2181#endif
2182
2183	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2184		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2185		reg_rctl |= E1000_RCTL_MPE;
2186		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2187	} else
2188		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2189}
2190
2191
2192/*********************************************************************
2193 *  Timer routine:
2194 *  	This routine checks for link status,
2195 *	updates statistics, and does the watchdog.
2196 *
2197 **********************************************************************/
2198
2199static void
2200igb_local_timer(void *arg)
2201{
2202	struct adapter		*adapter = arg;
2203	device_t		dev = adapter->dev;
2204	struct ifnet		*ifp = adapter->ifp;
2205	struct tx_ring		*txr = adapter->tx_rings;
2206	struct igb_queue	*que = adapter->queues;
2207	int			hung = 0, busy = 0;
2208
2209
2210	IGB_CORE_LOCK_ASSERT(adapter);
2211
2212	igb_update_link_status(adapter);
2213	igb_update_stats_counters(adapter);
2214
2215        /*
2216        ** Check the TX queues status
2217	**	- central locked handling of OACTIVE
2218	**	- watchdog only if all queues show hung
2219        */
2220	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2221		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2222		    (adapter->pause_frames == 0))
2223			++hung;
2224		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2225			++busy;
2226		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2227			taskqueue_enqueue(que->tq, &que->que_task);
2228	}
2229	if (hung == adapter->num_queues)
2230		goto timeout;
2231	if (busy == adapter->num_queues)
2232		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2233	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2234	    (busy < adapter->num_queues))
2235		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2236
2237	adapter->pause_frames = 0;
2238	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2239#ifndef DEVICE_POLLING
2240	/* Schedule all queue interrupts - deadlock protection */
2241	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2242#endif
2243	return;
2244
2245timeout:
2246	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2247	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2248            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2249            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2250	device_printf(dev,"TX(%d) desc avail = %d,"
2251            "Next TX to Clean = %d\n",
2252            txr->me, txr->tx_avail, txr->next_to_clean);
2253	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2254	adapter->watchdog_events++;
2255	igb_init_locked(adapter);
2256}
2257
2258static void
2259igb_update_link_status(struct adapter *adapter)
2260{
2261	struct e1000_hw		*hw = &adapter->hw;
2262	struct e1000_fc_info	*fc = &hw->fc;
2263	struct ifnet		*ifp = adapter->ifp;
2264	device_t		dev = adapter->dev;
2265	struct tx_ring		*txr = adapter->tx_rings;
2266	u32			link_check, thstat, ctrl;
2267	char			*flowctl = NULL;
2268
2269	link_check = thstat = ctrl = 0;
2270
2271	/* Get the cached link value or read for real */
2272        switch (hw->phy.media_type) {
2273        case e1000_media_type_copper:
2274                if (hw->mac.get_link_status) {
2275			/* Do the work to read phy */
2276                        e1000_check_for_link(hw);
2277                        link_check = !hw->mac.get_link_status;
2278                } else
2279                        link_check = TRUE;
2280                break;
2281        case e1000_media_type_fiber:
2282                e1000_check_for_link(hw);
2283                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2284                                 E1000_STATUS_LU);
2285                break;
2286        case e1000_media_type_internal_serdes:
2287                e1000_check_for_link(hw);
2288                link_check = adapter->hw.mac.serdes_has_link;
2289                break;
2290	/* VF device is type_unknown */
2291        case e1000_media_type_unknown:
2292                e1000_check_for_link(hw);
2293		link_check = !hw->mac.get_link_status;
2294		/* Fall thru */
2295        default:
2296                break;
2297        }
2298
2299	/* Check for thermal downshift or shutdown */
2300	if (hw->mac.type == e1000_i350) {
2301		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2302		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2303	}
2304
2305	/* Get the flow control for display */
2306	switch (fc->current_mode) {
2307	case e1000_fc_rx_pause:
2308		flowctl = "RX";
2309		break;
2310	case e1000_fc_tx_pause:
2311		flowctl = "TX";
2312		break;
2313	case e1000_fc_full:
2314		flowctl = "Full";
2315		break;
2316	case e1000_fc_none:
2317	default:
2318		flowctl = "None";
2319		break;
2320	}
2321
2322	/* Now we check if a transition has happened */
2323	if (link_check && (adapter->link_active == 0)) {
2324		e1000_get_speed_and_duplex(&adapter->hw,
2325		    &adapter->link_speed, &adapter->link_duplex);
2326		if (bootverbose)
2327			device_printf(dev, "Link is up %d Mbps %s,"
2328			    " Flow Control: %s\n",
2329			    adapter->link_speed,
2330			    ((adapter->link_duplex == FULL_DUPLEX) ?
2331			    "Full Duplex" : "Half Duplex"), flowctl);
2332		adapter->link_active = 1;
2333		ifp->if_baudrate = adapter->link_speed * 1000000;
2334		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2335		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2336			device_printf(dev, "Link: thermal downshift\n");
2337		/* This can sleep */
2338		if_link_state_change(ifp, LINK_STATE_UP);
2339	} else if (!link_check && (adapter->link_active == 1)) {
2340		ifp->if_baudrate = adapter->link_speed = 0;
2341		adapter->link_duplex = 0;
2342		if (bootverbose)
2343			device_printf(dev, "Link is Down\n");
2344		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2345		    (thstat & E1000_THSTAT_PWR_DOWN))
2346			device_printf(dev, "Link: thermal shutdown\n");
2347		adapter->link_active = 0;
2348		/* This can sleep */
2349		if_link_state_change(ifp, LINK_STATE_DOWN);
2350		/* Reset queue state */
2351		for (int i = 0; i < adapter->num_queues; i++, txr++)
2352			txr->queue_status = IGB_QUEUE_IDLE;
2353	}
2354}
2355
2356/*********************************************************************
2357 *
2358 *  This routine disables all traffic on the adapter by issuing a
2359 *  global reset on the MAC and deallocates TX/RX buffers.
2360 *
2361 **********************************************************************/
2362
2363static void
2364igb_stop(void *arg)
2365{
2366	struct adapter	*adapter = arg;
2367	struct ifnet	*ifp = adapter->ifp;
2368	struct tx_ring *txr = adapter->tx_rings;
2369
2370	IGB_CORE_LOCK_ASSERT(adapter);
2371
2372	INIT_DEBUGOUT("igb_stop: begin");
2373
2374	igb_disable_intr(adapter);
2375
2376	callout_stop(&adapter->timer);
2377
2378	/* Tell the stack that the interface is no longer active */
2379	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2380	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2381
2382	/* Disarm watchdog timer. */
2383	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2384		IGB_TX_LOCK(txr);
2385		txr->queue_status = IGB_QUEUE_IDLE;
2386		IGB_TX_UNLOCK(txr);
2387	}
2388
2389	e1000_reset_hw(&adapter->hw);
2390	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2391
2392	e1000_led_off(&adapter->hw);
2393	e1000_cleanup_led(&adapter->hw);
2394}
2395
2396
2397/*********************************************************************
2398 *
2399 *  Determine hardware revision.
2400 *
2401 **********************************************************************/
2402static void
2403igb_identify_hardware(struct adapter *adapter)
2404{
2405	device_t dev = adapter->dev;
2406
2407	/* Make sure our PCI config space has the necessary stuff set */
2408	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2409	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2410	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2411		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2412		    "bits were not set!\n");
2413		adapter->hw.bus.pci_cmd_word |=
2414		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2415		pci_write_config(dev, PCIR_COMMAND,
2416		    adapter->hw.bus.pci_cmd_word, 2);
2417	}
2418
2419	/* Save off the information about this board */
2420	adapter->hw.vendor_id = pci_get_vendor(dev);
2421	adapter->hw.device_id = pci_get_device(dev);
2422	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2423	adapter->hw.subsystem_vendor_id =
2424	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2425	adapter->hw.subsystem_device_id =
2426	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2427
2428	/* Set MAC type early for PCI setup */
2429	e1000_set_mac_type(&adapter->hw);
2430
2431	/* Are we a VF device? */
2432	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2433	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2434		adapter->vf_ifp = 1;
2435	else
2436		adapter->vf_ifp = 0;
2437}
2438
2439static int
2440igb_allocate_pci_resources(struct adapter *adapter)
2441{
2442	device_t	dev = adapter->dev;
2443	int		rid;
2444
2445	rid = PCIR_BAR(0);
2446	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2447	    &rid, RF_ACTIVE);
2448	if (adapter->pci_mem == NULL) {
2449		device_printf(dev, "Unable to allocate bus resource: memory\n");
2450		return (ENXIO);
2451	}
2452	adapter->osdep.mem_bus_space_tag =
2453	    rman_get_bustag(adapter->pci_mem);
2454	adapter->osdep.mem_bus_space_handle =
2455	    rman_get_bushandle(adapter->pci_mem);
2456	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2457
2458	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2459
2460	/* This will setup either MSI/X or MSI */
2461	adapter->msix = igb_setup_msix(adapter);
2462	adapter->hw.back = &adapter->osdep;
2463
2464	return (0);
2465}
2466
2467/*********************************************************************
2468 *
2469 *  Setup the Legacy or MSI Interrupt handler
2470 *
2471 **********************************************************************/
2472static int
2473igb_allocate_legacy(struct adapter *adapter)
2474{
2475	device_t		dev = adapter->dev;
2476	struct igb_queue	*que = adapter->queues;
2477	struct tx_ring		*txr = adapter->tx_rings;
2478	int			error, rid = 0;
2479
2480	/* Turn off all interrupts */
2481	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2482
2483	/* MSI RID is 1 */
2484	if (adapter->msix == 1)
2485		rid = 1;
2486
2487	/* We allocate a single interrupt resource */
2488	adapter->res = bus_alloc_resource_any(dev,
2489	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2490	if (adapter->res == NULL) {
2491		device_printf(dev, "Unable to allocate bus resource: "
2492		    "interrupt\n");
2493		return (ENXIO);
2494	}
2495
2496#ifndef IGB_LEGACY_TX
2497	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2498#endif
2499
2500	/*
2501	 * Try allocating a fast interrupt and the associated deferred
2502	 * processing contexts.
2503	 */
2504	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2505	/* Make tasklet for deferred link handling */
2506	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2507	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2508	    taskqueue_thread_enqueue, &que->tq);
2509	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2510	    device_get_nameunit(adapter->dev));
2511	if ((error = bus_setup_intr(dev, adapter->res,
2512	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2513	    adapter, &adapter->tag)) != 0) {
2514		device_printf(dev, "Failed to register fast interrupt "
2515			    "handler: %d\n", error);
2516		taskqueue_free(que->tq);
2517		que->tq = NULL;
2518		return (error);
2519	}
2520
2521	return (0);
2522}
2523
2524
2525/*********************************************************************
2526 *
2527 *  Setup the MSIX Queue Interrupt handlers:
2528 *
2529 **********************************************************************/
2530static int
2531igb_allocate_msix(struct adapter *adapter)
2532{
2533	device_t		dev = adapter->dev;
2534	struct igb_queue	*que = adapter->queues;
2535	int			error, rid, vector = 0;
2536
2537	/* Be sure to start with all interrupts disabled */
2538	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2539	E1000_WRITE_FLUSH(&adapter->hw);
2540
2541	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2542		rid = vector +1;
2543		que->res = bus_alloc_resource_any(dev,
2544		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2545		if (que->res == NULL) {
2546			device_printf(dev,
2547			    "Unable to allocate bus resource: "
2548			    "MSIX Queue Interrupt\n");
2549			return (ENXIO);
2550		}
2551		error = bus_setup_intr(dev, que->res,
2552	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2553		    igb_msix_que, que, &que->tag);
2554		if (error) {
2555			que->res = NULL;
2556			device_printf(dev, "Failed to register Queue handler");
2557			return (error);
2558		}
2559#if __FreeBSD_version >= 800504
2560		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2561#endif
2562		que->msix = vector;
2563		if (adapter->hw.mac.type == e1000_82575)
2564			que->eims = E1000_EICR_TX_QUEUE0 << i;
2565		else
2566			que->eims = 1 << vector;
2567		/*
2568		** Bind the msix vector, and thus the
2569		** rings to the corresponding cpu.
2570		*/
2571		if (adapter->num_queues > 1) {
2572			if (igb_last_bind_cpu < 0)
2573				igb_last_bind_cpu = CPU_FIRST();
2574			bus_bind_intr(dev, que->res, igb_last_bind_cpu);
2575			device_printf(dev,
2576				"Bound queue %d to cpu %d\n",
2577				i,igb_last_bind_cpu);
2578			igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2579		}
2580#ifndef IGB_LEGACY_TX
2581		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2582		    que->txr);
2583#endif
2584		/* Make tasklet for deferred handling */
2585		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2586		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2587		    taskqueue_thread_enqueue, &que->tq);
2588		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2589		    device_get_nameunit(adapter->dev));
2590	}
2591
2592	/* And Link */
2593	rid = vector + 1;
2594	adapter->res = bus_alloc_resource_any(dev,
2595	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2596	if (adapter->res == NULL) {
2597		device_printf(dev,
2598		    "Unable to allocate bus resource: "
2599		    "MSIX Link Interrupt\n");
2600		return (ENXIO);
2601	}
2602	if ((error = bus_setup_intr(dev, adapter->res,
2603	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2604	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2605		device_printf(dev, "Failed to register Link handler");
2606		return (error);
2607	}
2608#if __FreeBSD_version >= 800504
2609	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2610#endif
2611	adapter->linkvec = vector;
2612
2613	return (0);
2614}
2615
2616
2617static void
2618igb_configure_queues(struct adapter *adapter)
2619{
2620	struct	e1000_hw	*hw = &adapter->hw;
2621	struct	igb_queue	*que;
2622	u32			tmp, ivar = 0, newitr = 0;
2623
2624	/* First turn on RSS capability */
2625	if (adapter->hw.mac.type != e1000_82575)
2626		E1000_WRITE_REG(hw, E1000_GPIE,
2627		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2628		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2629
2630	/* Turn on MSIX */
2631	switch (adapter->hw.mac.type) {
2632	case e1000_82580:
2633	case e1000_i350:
2634	case e1000_i210:
2635	case e1000_i211:
2636	case e1000_vfadapt:
2637	case e1000_vfadapt_i350:
2638		/* RX entries */
2639		for (int i = 0; i < adapter->num_queues; i++) {
2640			u32 index = i >> 1;
2641			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2642			que = &adapter->queues[i];
2643			if (i & 1) {
2644				ivar &= 0xFF00FFFF;
2645				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2646			} else {
2647				ivar &= 0xFFFFFF00;
2648				ivar |= que->msix | E1000_IVAR_VALID;
2649			}
2650			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2651		}
2652		/* TX entries */
2653		for (int i = 0; i < adapter->num_queues; i++) {
2654			u32 index = i >> 1;
2655			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2656			que = &adapter->queues[i];
2657			if (i & 1) {
2658				ivar &= 0x00FFFFFF;
2659				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2660			} else {
2661				ivar &= 0xFFFF00FF;
2662				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2663			}
2664			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2665			adapter->que_mask |= que->eims;
2666		}
2667
2668		/* And for the link interrupt */
2669		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2670		adapter->link_mask = 1 << adapter->linkvec;
2671		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2672		break;
2673	case e1000_82576:
2674		/* RX entries */
2675		for (int i = 0; i < adapter->num_queues; i++) {
2676			u32 index = i & 0x7; /* Each IVAR has two entries */
2677			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2678			que = &adapter->queues[i];
2679			if (i < 8) {
2680				ivar &= 0xFFFFFF00;
2681				ivar |= que->msix | E1000_IVAR_VALID;
2682			} else {
2683				ivar &= 0xFF00FFFF;
2684				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2685			}
2686			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2687			adapter->que_mask |= que->eims;
2688		}
2689		/* TX entries */
2690		for (int i = 0; i < adapter->num_queues; i++) {
2691			u32 index = i & 0x7; /* Each IVAR has two entries */
2692			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2693			que = &adapter->queues[i];
2694			if (i < 8) {
2695				ivar &= 0xFFFF00FF;
2696				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2697			} else {
2698				ivar &= 0x00FFFFFF;
2699				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2700			}
2701			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2702			adapter->que_mask |= que->eims;
2703		}
2704
2705		/* And for the link interrupt */
2706		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2707		adapter->link_mask = 1 << adapter->linkvec;
2708		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2709		break;
2710
2711	case e1000_82575:
2712                /* enable MSI-X support*/
2713		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2714                tmp |= E1000_CTRL_EXT_PBA_CLR;
2715                /* Auto-Mask interrupts upon ICR read. */
2716                tmp |= E1000_CTRL_EXT_EIAME;
2717                tmp |= E1000_CTRL_EXT_IRCA;
2718                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2719
2720		/* Queues */
2721		for (int i = 0; i < adapter->num_queues; i++) {
2722			que = &adapter->queues[i];
2723			tmp = E1000_EICR_RX_QUEUE0 << i;
2724			tmp |= E1000_EICR_TX_QUEUE0 << i;
2725			que->eims = tmp;
2726			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2727			    i, que->eims);
2728			adapter->que_mask |= que->eims;
2729		}
2730
2731		/* Link */
2732		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2733		    E1000_EIMS_OTHER);
2734		adapter->link_mask |= E1000_EIMS_OTHER;
2735	default:
2736		break;
2737	}
2738
2739	/* Set the starting interrupt rate */
2740	if (igb_max_interrupt_rate > 0)
2741		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2742
2743        if (hw->mac.type == e1000_82575)
2744                newitr |= newitr << 16;
2745        else
2746                newitr |= E1000_EITR_CNT_IGNR;
2747
2748	for (int i = 0; i < adapter->num_queues; i++) {
2749		que = &adapter->queues[i];
2750		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2751	}
2752
2753	return;
2754}
2755
2756
2757static void
2758igb_free_pci_resources(struct adapter *adapter)
2759{
2760	struct		igb_queue *que = adapter->queues;
2761	device_t	dev = adapter->dev;
2762	int		rid;
2763
2764	/*
2765	** There is a slight possibility of a failure mode
2766	** in attach that will result in entering this function
2767	** before interrupt resources have been initialized, and
2768	** in that case we do not want to execute the loops below
2769	** We can detect this reliably by the state of the adapter
2770	** res pointer.
2771	*/
2772	if (adapter->res == NULL)
2773		goto mem;
2774
2775	/*
2776	 * First release all the interrupt resources:
2777	 */
2778	for (int i = 0; i < adapter->num_queues; i++, que++) {
2779		rid = que->msix + 1;
2780		if (que->tag != NULL) {
2781			bus_teardown_intr(dev, que->res, que->tag);
2782			que->tag = NULL;
2783		}
2784		if (que->res != NULL)
2785			bus_release_resource(dev,
2786			    SYS_RES_IRQ, rid, que->res);
2787	}
2788
2789	/* Clean the Legacy or Link interrupt last */
2790	if (adapter->linkvec) /* we are doing MSIX */
2791		rid = adapter->linkvec + 1;
2792	else
2793		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2794
2795	que = adapter->queues;
2796	if (adapter->tag != NULL) {
2797		taskqueue_drain(que->tq, &adapter->link_task);
2798		bus_teardown_intr(dev, adapter->res, adapter->tag);
2799		adapter->tag = NULL;
2800	}
2801	if (adapter->res != NULL)
2802		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2803
2804	for (int i = 0; i < adapter->num_queues; i++, que++) {
2805		if (que->tq != NULL) {
2806#ifndef IGB_LEGACY_TX
2807			taskqueue_drain(que->tq, &que->txr->txq_task);
2808#endif
2809			taskqueue_drain(que->tq, &que->que_task);
2810			taskqueue_free(que->tq);
2811		}
2812	}
2813mem:
2814	if (adapter->msix)
2815		pci_release_msi(dev);
2816
2817	if (adapter->msix_mem != NULL)
2818		bus_release_resource(dev, SYS_RES_MEMORY,
2819		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2820
2821	if (adapter->pci_mem != NULL)
2822		bus_release_resource(dev, SYS_RES_MEMORY,
2823		    PCIR_BAR(0), adapter->pci_mem);
2824
2825}
2826
2827/*
2828 * Setup Either MSI/X or MSI
2829 */
2830static int
2831igb_setup_msix(struct adapter *adapter)
2832{
2833	device_t dev = adapter->dev;
2834	int rid, want, queues, msgs, maxqueues;
2835
2836	/* tuneable override */
2837	if (igb_enable_msix == 0)
2838		goto msi;
2839
2840	/* First try MSI/X */
2841	rid = PCIR_BAR(IGB_MSIX_BAR);
2842	adapter->msix_mem = bus_alloc_resource_any(dev,
2843	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2844       	if (!adapter->msix_mem) {
2845		/* May not be enabled */
2846		device_printf(adapter->dev,
2847		    "Unable to map MSIX table \n");
2848		goto msi;
2849	}
2850
2851	msgs = pci_msix_count(dev);
2852	if (msgs == 0) { /* system has msix disabled */
2853		bus_release_resource(dev, SYS_RES_MEMORY,
2854		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2855		adapter->msix_mem = NULL;
2856		goto msi;
2857	}
2858
2859	/* Figure out a reasonable auto config value */
2860	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2861
2862	/* Manual override */
2863	if (igb_num_queues != 0)
2864		queues = igb_num_queues;
2865
2866	/* Sanity check based on HW */
2867	switch (adapter->hw.mac.type) {
2868		case e1000_82575:
2869			maxqueues = 4;
2870			break;
2871		case e1000_82576:
2872		case e1000_82580:
2873		case e1000_i350:
2874			maxqueues = 8;
2875			break;
2876		case e1000_i210:
2877			maxqueues = 4;
2878			break;
2879		case e1000_i211:
2880			maxqueues = 2;
2881			break;
2882		default:  /* VF interfaces */
2883			maxqueues = 1;
2884			break;
2885	}
2886	if (queues > maxqueues)
2887		queues = maxqueues;
2888
2889	/*
2890	** One vector (RX/TX pair) per queue
2891	** plus an additional for Link interrupt
2892	*/
2893	want = queues + 1;
2894	if (msgs >= want)
2895		msgs = want;
2896	else {
2897               	device_printf(adapter->dev,
2898		    "MSIX Configuration Problem, "
2899		    "%d vectors configured, but %d queues wanted!\n",
2900		    msgs, want);
2901		return (0);
2902	}
2903	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2904               	device_printf(adapter->dev,
2905		    "Using MSIX interrupts with %d vectors\n", msgs);
2906		adapter->num_queues = queues;
2907		return (msgs);
2908	}
2909msi:
2910       	msgs = pci_msi_count(dev);
2911	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2912		device_printf(adapter->dev," Using MSI interrupt\n");
2913		return (msgs);
2914	}
2915	return (0);
2916}
2917
2918/*********************************************************************
2919 *
2920 *  Set up an fresh starting state
2921 *
2922 **********************************************************************/
2923static void
2924igb_reset(struct adapter *adapter)
2925{
2926	device_t	dev = adapter->dev;
2927	struct e1000_hw *hw = &adapter->hw;
2928	struct e1000_fc_info *fc = &hw->fc;
2929	struct ifnet	*ifp = adapter->ifp;
2930	u32		pba = 0;
2931	u16		hwm;
2932
2933	INIT_DEBUGOUT("igb_reset: begin");
2934
2935	/* Let the firmware know the OS is in control */
2936	igb_get_hw_control(adapter);
2937
2938	/*
2939	 * Packet Buffer Allocation (PBA)
2940	 * Writing PBA sets the receive portion of the buffer
2941	 * the remainder is used for the transmit buffer.
2942	 */
2943	switch (hw->mac.type) {
2944	case e1000_82575:
2945		pba = E1000_PBA_32K;
2946		break;
2947	case e1000_82576:
2948	case e1000_vfadapt:
2949		pba = E1000_READ_REG(hw, E1000_RXPBS);
2950		pba &= E1000_RXPBS_SIZE_MASK_82576;
2951		break;
2952	case e1000_82580:
2953	case e1000_i350:
2954	case e1000_vfadapt_i350:
2955		pba = E1000_READ_REG(hw, E1000_RXPBS);
2956		pba = e1000_rxpbs_adjust_82580(pba);
2957		break;
2958	case e1000_i210:
2959	case e1000_i211:
2960		pba = E1000_PBA_34K;
2961	default:
2962		break;
2963	}
2964
2965	/* Special needs in case of Jumbo frames */
2966	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2967		u32 tx_space, min_tx, min_rx;
2968		pba = E1000_READ_REG(hw, E1000_PBA);
2969		tx_space = pba >> 16;
2970		pba &= 0xffff;
2971		min_tx = (adapter->max_frame_size +
2972		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2973		min_tx = roundup2(min_tx, 1024);
2974		min_tx >>= 10;
2975                min_rx = adapter->max_frame_size;
2976                min_rx = roundup2(min_rx, 1024);
2977                min_rx >>= 10;
2978		if (tx_space < min_tx &&
2979		    ((min_tx - tx_space) < pba)) {
2980			pba = pba - (min_tx - tx_space);
2981			/*
2982                         * if short on rx space, rx wins
2983                         * and must trump tx adjustment
2984			 */
2985                        if (pba < min_rx)
2986                                pba = min_rx;
2987		}
2988		E1000_WRITE_REG(hw, E1000_PBA, pba);
2989	}
2990
2991	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2992
2993	/*
2994	 * These parameters control the automatic generation (Tx) and
2995	 * response (Rx) to Ethernet PAUSE frames.
2996	 * - High water mark should allow for at least two frames to be
2997	 *   received after sending an XOFF.
2998	 * - Low water mark works best when it is very near the high water mark.
2999	 *   This allows the receiver to restart by sending XON when it has
3000	 *   drained a bit.
3001	 */
3002	hwm = min(((pba << 10) * 9 / 10),
3003	    ((pba << 10) - 2 * adapter->max_frame_size));
3004
3005	if (hw->mac.type < e1000_82576) {
3006		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
3007		fc->low_water = fc->high_water - 8;
3008	} else {
3009		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
3010		fc->low_water = fc->high_water - 16;
3011	}
3012
3013	fc->pause_time = IGB_FC_PAUSE_TIME;
3014	fc->send_xon = TRUE;
3015	if (adapter->fc)
3016		fc->requested_mode = adapter->fc;
3017	else
3018		fc->requested_mode = e1000_fc_default;
3019
3020	/* Issue a global reset */
3021	e1000_reset_hw(hw);
3022	E1000_WRITE_REG(hw, E1000_WUC, 0);
3023
3024	if (e1000_init_hw(hw) < 0)
3025		device_printf(dev, "Hardware Initialization Failed\n");
3026
3027	/* Setup DMA Coalescing */
3028	if ((hw->mac.type > e1000_82580) &&
3029	    (hw->mac.type != e1000_i211)) {
3030		u32 dmac;
3031		u32 reg = ~E1000_DMACR_DMAC_EN;
3032
3033		if (adapter->dmac == 0) { /* Disabling it */
3034			E1000_WRITE_REG(hw, E1000_DMACR, reg);
3035			goto reset_out;
3036		}
3037
3038		/* Set starting thresholds */
3039		E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
3040		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
3041
3042		hwm = 64 * pba - adapter->max_frame_size / 16;
3043		if (hwm < 64 * (pba - 6))
3044			hwm = 64 * (pba - 6);
3045		reg = E1000_READ_REG(hw, E1000_FCRTC);
3046		reg &= ~E1000_FCRTC_RTH_COAL_MASK;
3047		reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
3048		    & E1000_FCRTC_RTH_COAL_MASK);
3049		E1000_WRITE_REG(hw, E1000_FCRTC, reg);
3050
3051
3052		dmac = pba - adapter->max_frame_size / 512;
3053		if (dmac < pba - 10)
3054			dmac = pba - 10;
3055		reg = E1000_READ_REG(hw, E1000_DMACR);
3056		reg &= ~E1000_DMACR_DMACTHR_MASK;
3057		reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
3058		    & E1000_DMACR_DMACTHR_MASK);
3059		/* transition to L0x or L1 if available..*/
3060		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
3061		/* timer = value in adapter->dmac in 32usec intervals */
3062		reg |= (adapter->dmac >> 5);
3063		E1000_WRITE_REG(hw, E1000_DMACR, reg);
3064
3065		/* Set the interval before transition */
3066		reg = E1000_READ_REG(hw, E1000_DMCTLX);
3067		reg |= 0x80000004;
3068		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
3069
3070		/* free space in tx packet buffer to wake from DMA coal */
3071		E1000_WRITE_REG(hw, E1000_DMCTXTH,
3072		    (20480 - (2 * adapter->max_frame_size)) >> 6);
3073
3074		/* make low power state decision controlled by DMA coal */
3075		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3076		reg &= ~E1000_PCIEMISC_LX_DECISION;
3077		E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
3078		device_printf(dev, "DMA Coalescing enabled\n");
3079
3080	} else if (hw->mac.type == e1000_82580) {
3081		u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
3082		E1000_WRITE_REG(hw, E1000_DMACR, 0);
3083		E1000_WRITE_REG(hw, E1000_PCIEMISC,
3084		    reg & ~E1000_PCIEMISC_LX_DECISION);
3085	}
3086
3087reset_out:
3088	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3089	e1000_get_phy_info(hw);
3090	e1000_check_for_link(hw);
3091	return;
3092}
3093
3094/*********************************************************************
3095 *
3096 *  Setup networking device structure and register an interface.
3097 *
3098 **********************************************************************/
3099static int
3100igb_setup_interface(device_t dev, struct adapter *adapter)
3101{
3102	struct ifnet   *ifp;
3103
3104	INIT_DEBUGOUT("igb_setup_interface: begin");
3105
3106	ifp = adapter->ifp = if_alloc(IFT_ETHER);
3107	if (ifp == NULL) {
3108		device_printf(dev, "can not allocate ifnet structure\n");
3109		return (-1);
3110	}
3111	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3112	ifp->if_init =  igb_init;
3113	ifp->if_softc = adapter;
3114	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3115	ifp->if_ioctl = igb_ioctl;
3116#ifndef IGB_LEGACY_TX
3117	ifp->if_transmit = igb_mq_start;
3118	ifp->if_qflush = igb_qflush;
3119#else
3120	ifp->if_start = igb_start;
3121	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3122	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3123	IFQ_SET_READY(&ifp->if_snd);
3124#endif
3125
3126	ether_ifattach(ifp, adapter->hw.mac.addr);
3127
3128	ifp->if_capabilities = ifp->if_capenable = 0;
3129
3130	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3131	ifp->if_capabilities |= IFCAP_TSO4;
3132	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3133	ifp->if_capenable = ifp->if_capabilities;
3134
3135	/* Don't enable LRO by default */
3136	ifp->if_capabilities |= IFCAP_LRO;
3137
3138#ifdef DEVICE_POLLING
3139	ifp->if_capabilities |= IFCAP_POLLING;
3140#endif
3141
3142	/*
3143	 * Tell the upper layer(s) we
3144	 * support full VLAN capability.
3145	 */
3146	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3147	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3148			     |  IFCAP_VLAN_HWTSO
3149			     |  IFCAP_VLAN_MTU;
3150	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3151			  |  IFCAP_VLAN_HWTSO
3152			  |  IFCAP_VLAN_MTU;
3153
3154	/*
3155	** Don't turn this on by default, if vlans are
3156	** created on another pseudo device (eg. lagg)
3157	** then vlan events are not passed thru, breaking
3158	** operation, but with HW FILTER off it works. If
3159	** using vlans directly on the igb driver you can
3160	** enable this and get full hardware tag filtering.
3161	*/
3162	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3163
3164	/*
3165	 * Specify the media types supported by this adapter and register
3166	 * callbacks to update media and link information
3167	 */
3168	ifmedia_init(&adapter->media, IFM_IMASK,
3169	    igb_media_change, igb_media_status);
3170	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3171	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3172		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3173			    0, NULL);
3174		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3175	} else {
3176		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3177		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3178			    0, NULL);
3179		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3180			    0, NULL);
3181		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3182			    0, NULL);
3183		if (adapter->hw.phy.type != e1000_phy_ife) {
3184			ifmedia_add(&adapter->media,
3185				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3186			ifmedia_add(&adapter->media,
3187				IFM_ETHER | IFM_1000_T, 0, NULL);
3188		}
3189	}
3190	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3191	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3192	return (0);
3193}
3194
3195
3196/*
3197 * Manage DMA'able memory.
3198 */
3199static void
3200igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3201{
3202	if (error)
3203		return;
3204	*(bus_addr_t *) arg = segs[0].ds_addr;
3205}
3206
3207static int
3208igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3209        struct igb_dma_alloc *dma, int mapflags)
3210{
3211	int error;
3212
3213	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3214				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3215				BUS_SPACE_MAXADDR,	/* lowaddr */
3216				BUS_SPACE_MAXADDR,	/* highaddr */
3217				NULL, NULL,		/* filter, filterarg */
3218				size,			/* maxsize */
3219				1,			/* nsegments */
3220				size,			/* maxsegsize */
3221				0,			/* flags */
3222				NULL,			/* lockfunc */
3223				NULL,			/* lockarg */
3224				&dma->dma_tag);
3225	if (error) {
3226		device_printf(adapter->dev,
3227		    "%s: bus_dma_tag_create failed: %d\n",
3228		    __func__, error);
3229		goto fail_0;
3230	}
3231
3232	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3233	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3234	if (error) {
3235		device_printf(adapter->dev,
3236		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3237		    __func__, (uintmax_t)size, error);
3238		goto fail_2;
3239	}
3240
3241	dma->dma_paddr = 0;
3242	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3243	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3244	if (error || dma->dma_paddr == 0) {
3245		device_printf(adapter->dev,
3246		    "%s: bus_dmamap_load failed: %d\n",
3247		    __func__, error);
3248		goto fail_3;
3249	}
3250
3251	return (0);
3252
3253fail_3:
3254	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3255fail_2:
3256	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3257	bus_dma_tag_destroy(dma->dma_tag);
3258fail_0:
3259	dma->dma_map = NULL;
3260	dma->dma_tag = NULL;
3261
3262	return (error);
3263}
3264
3265static void
3266igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3267{
3268	if (dma->dma_tag == NULL)
3269		return;
3270	if (dma->dma_map != NULL) {
3271		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3272		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3273		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3274		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3275		dma->dma_map = NULL;
3276	}
3277	bus_dma_tag_destroy(dma->dma_tag);
3278	dma->dma_tag = NULL;
3279}
3280
3281
3282/*********************************************************************
3283 *
3284 *  Allocate memory for the transmit and receive rings, and then
3285 *  the descriptors associated with each, called only once at attach.
3286 *
3287 **********************************************************************/
3288static int
3289igb_allocate_queues(struct adapter *adapter)
3290{
3291	device_t dev = adapter->dev;
3292	struct igb_queue	*que = NULL;
3293	struct tx_ring		*txr = NULL;
3294	struct rx_ring		*rxr = NULL;
3295	int rsize, tsize, error = E1000_SUCCESS;
3296	int txconf = 0, rxconf = 0;
3297
3298	/* First allocate the top level queue structs */
3299	if (!(adapter->queues =
3300	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3301	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3302		device_printf(dev, "Unable to allocate queue memory\n");
3303		error = ENOMEM;
3304		goto fail;
3305	}
3306
3307	/* Next allocate the TX ring struct memory */
3308	if (!(adapter->tx_rings =
3309	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3310	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3311		device_printf(dev, "Unable to allocate TX ring memory\n");
3312		error = ENOMEM;
3313		goto tx_fail;
3314	}
3315
3316	/* Now allocate the RX */
3317	if (!(adapter->rx_rings =
3318	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3319	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3320		device_printf(dev, "Unable to allocate RX ring memory\n");
3321		error = ENOMEM;
3322		goto rx_fail;
3323	}
3324
3325	tsize = roundup2(adapter->num_tx_desc *
3326	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3327	/*
3328	 * Now set up the TX queues, txconf is needed to handle the
3329	 * possibility that things fail midcourse and we need to
3330	 * undo memory gracefully
3331	 */
3332	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3333		/* Set up some basics */
3334		txr = &adapter->tx_rings[i];
3335		txr->adapter = adapter;
3336		txr->me = i;
3337
3338		/* Initialize the TX lock */
3339		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3340		    device_get_nameunit(dev), txr->me);
3341		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3342
3343		if (igb_dma_malloc(adapter, tsize,
3344			&txr->txdma, BUS_DMA_NOWAIT)) {
3345			device_printf(dev,
3346			    "Unable to allocate TX Descriptor memory\n");
3347			error = ENOMEM;
3348			goto err_tx_desc;
3349		}
3350		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3351		bzero((void *)txr->tx_base, tsize);
3352
3353        	/* Now allocate transmit buffers for the ring */
3354        	if (igb_allocate_transmit_buffers(txr)) {
3355			device_printf(dev,
3356			    "Critical Failure setting up transmit buffers\n");
3357			error = ENOMEM;
3358			goto err_tx_desc;
3359        	}
3360#ifndef IGB_LEGACY_TX
3361		/* Allocate a buf ring */
3362		txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3363		    M_WAITOK, &txr->tx_mtx);
3364#endif
3365	}
3366
3367	/*
3368	 * Next the RX queues...
3369	 */
3370	rsize = roundup2(adapter->num_rx_desc *
3371	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3372	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3373		rxr = &adapter->rx_rings[i];
3374		rxr->adapter = adapter;
3375		rxr->me = i;
3376
3377		/* Initialize the RX lock */
3378		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3379		    device_get_nameunit(dev), txr->me);
3380		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3381
3382		if (igb_dma_malloc(adapter, rsize,
3383			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3384			device_printf(dev,
3385			    "Unable to allocate RxDescriptor memory\n");
3386			error = ENOMEM;
3387			goto err_rx_desc;
3388		}
3389		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3390		bzero((void *)rxr->rx_base, rsize);
3391
3392        	/* Allocate receive buffers for the ring*/
3393		if (igb_allocate_receive_buffers(rxr)) {
3394			device_printf(dev,
3395			    "Critical Failure setting up receive buffers\n");
3396			error = ENOMEM;
3397			goto err_rx_desc;
3398		}
3399	}
3400
3401	/*
3402	** Finally set up the queue holding structs
3403	*/
3404	for (int i = 0; i < adapter->num_queues; i++) {
3405		que = &adapter->queues[i];
3406		que->adapter = adapter;
3407		que->txr = &adapter->tx_rings[i];
3408		que->rxr = &adapter->rx_rings[i];
3409	}
3410
3411	return (0);
3412
3413err_rx_desc:
3414	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3415		igb_dma_free(adapter, &rxr->rxdma);
3416err_tx_desc:
3417	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3418		igb_dma_free(adapter, &txr->txdma);
3419	free(adapter->rx_rings, M_DEVBUF);
3420rx_fail:
3421#ifndef IGB_LEGACY_TX
3422	buf_ring_free(txr->br, M_DEVBUF);
3423#endif
3424	free(adapter->tx_rings, M_DEVBUF);
3425tx_fail:
3426	free(adapter->queues, M_DEVBUF);
3427fail:
3428	return (error);
3429}
3430
3431/*********************************************************************
3432 *
3433 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3434 *  the information needed to transmit a packet on the wire. This is
3435 *  called only once at attach, setup is done every reset.
3436 *
3437 **********************************************************************/
3438static int
3439igb_allocate_transmit_buffers(struct tx_ring *txr)
3440{
3441	struct adapter *adapter = txr->adapter;
3442	device_t dev = adapter->dev;
3443	struct igb_tx_buffer *txbuf;
3444	int error, i;
3445
3446	/*
3447	 * Setup DMA descriptor areas.
3448	 */
3449	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3450			       1, 0,			/* alignment, bounds */
3451			       BUS_SPACE_MAXADDR,	/* lowaddr */
3452			       BUS_SPACE_MAXADDR,	/* highaddr */
3453			       NULL, NULL,		/* filter, filterarg */
3454			       IGB_TSO_SIZE,		/* maxsize */
3455			       IGB_MAX_SCATTER,		/* nsegments */
3456			       PAGE_SIZE,		/* maxsegsize */
3457			       0,			/* flags */
3458			       NULL,			/* lockfunc */
3459			       NULL,			/* lockfuncarg */
3460			       &txr->txtag))) {
3461		device_printf(dev,"Unable to allocate TX DMA tag\n");
3462		goto fail;
3463	}
3464
3465	if (!(txr->tx_buffers =
3466	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3467	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3468		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3469		error = ENOMEM;
3470		goto fail;
3471	}
3472
3473        /* Create the descriptor buffer dma maps */
3474	txbuf = txr->tx_buffers;
3475	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3476		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3477		if (error != 0) {
3478			device_printf(dev, "Unable to create TX DMA map\n");
3479			goto fail;
3480		}
3481	}
3482
3483	return 0;
3484fail:
3485	/* We free all, it handles case where we are in the middle */
3486	igb_free_transmit_structures(adapter);
3487	return (error);
3488}
3489
3490/*********************************************************************
3491 *
3492 *  Initialize a transmit ring.
3493 *
3494 **********************************************************************/
3495static void
3496igb_setup_transmit_ring(struct tx_ring *txr)
3497{
3498	struct adapter *adapter = txr->adapter;
3499	struct igb_tx_buffer *txbuf;
3500	int i;
3501#ifdef DEV_NETMAP
3502	struct netmap_adapter *na = NA(adapter->ifp);
3503	struct netmap_slot *slot;
3504#endif /* DEV_NETMAP */
3505
3506	/* Clear the old descriptor contents */
3507	IGB_TX_LOCK(txr);
3508#ifdef DEV_NETMAP
3509	slot = netmap_reset(na, NR_TX, txr->me, 0);
3510#endif /* DEV_NETMAP */
3511	bzero((void *)txr->tx_base,
3512	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3513	/* Reset indices */
3514	txr->next_avail_desc = 0;
3515	txr->next_to_clean = 0;
3516
3517	/* Free any existing tx buffers. */
3518        txbuf = txr->tx_buffers;
3519	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3520		if (txbuf->m_head != NULL) {
3521			bus_dmamap_sync(txr->txtag, txbuf->map,
3522			    BUS_DMASYNC_POSTWRITE);
3523			bus_dmamap_unload(txr->txtag, txbuf->map);
3524			m_freem(txbuf->m_head);
3525			txbuf->m_head = NULL;
3526		}
3527#ifdef DEV_NETMAP
3528		if (slot) {
3529			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3530			/* no need to set the address */
3531			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3532		}
3533#endif /* DEV_NETMAP */
3534		/* clear the watch index */
3535		txbuf->next_eop = -1;
3536        }
3537
3538	/* Set number of descriptors available */
3539	txr->tx_avail = adapter->num_tx_desc;
3540
3541	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3542	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3543	IGB_TX_UNLOCK(txr);
3544}
3545
3546/*********************************************************************
3547 *
3548 *  Initialize all transmit rings.
3549 *
3550 **********************************************************************/
3551static void
3552igb_setup_transmit_structures(struct adapter *adapter)
3553{
3554	struct tx_ring *txr = adapter->tx_rings;
3555
3556	for (int i = 0; i < adapter->num_queues; i++, txr++)
3557		igb_setup_transmit_ring(txr);
3558
3559	return;
3560}
3561
3562/*********************************************************************
3563 *
3564 *  Enable transmit unit.
3565 *
3566 **********************************************************************/
3567static void
3568igb_initialize_transmit_units(struct adapter *adapter)
3569{
3570	struct tx_ring	*txr = adapter->tx_rings;
3571	struct e1000_hw *hw = &adapter->hw;
3572	u32		tctl, txdctl;
3573
3574	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3575	tctl = txdctl = 0;
3576
3577	/* Setup the Tx Descriptor Rings */
3578	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3579		u64 bus_addr = txr->txdma.dma_paddr;
3580
3581		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3582		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3583		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3584		    (uint32_t)(bus_addr >> 32));
3585		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3586		    (uint32_t)bus_addr);
3587
3588		/* Setup the HW Tx Head and Tail descriptor pointers */
3589		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3590		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3591
3592		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3593		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3594		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3595
3596		txr->queue_status = IGB_QUEUE_IDLE;
3597
3598		txdctl |= IGB_TX_PTHRESH;
3599		txdctl |= IGB_TX_HTHRESH << 8;
3600		txdctl |= IGB_TX_WTHRESH << 16;
3601		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3602		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3603	}
3604
3605	if (adapter->vf_ifp)
3606		return;
3607
3608	e1000_config_collision_dist(hw);
3609
3610	/* Program the Transmit Control Register */
3611	tctl = E1000_READ_REG(hw, E1000_TCTL);
3612	tctl &= ~E1000_TCTL_CT;
3613	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3614		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3615
3616	/* This write will effectively turn on the transmit unit. */
3617	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3618}
3619
3620/*********************************************************************
3621 *
3622 *  Free all transmit rings.
3623 *
3624 **********************************************************************/
3625static void
3626igb_free_transmit_structures(struct adapter *adapter)
3627{
3628	struct tx_ring *txr = adapter->tx_rings;
3629
3630	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3631		IGB_TX_LOCK(txr);
3632		igb_free_transmit_buffers(txr);
3633		igb_dma_free(adapter, &txr->txdma);
3634		IGB_TX_UNLOCK(txr);
3635		IGB_TX_LOCK_DESTROY(txr);
3636	}
3637	free(adapter->tx_rings, M_DEVBUF);
3638}
3639
3640/*********************************************************************
3641 *
3642 *  Free transmit ring related data structures.
3643 *
3644 **********************************************************************/
3645static void
3646igb_free_transmit_buffers(struct tx_ring *txr)
3647{
3648	struct adapter *adapter = txr->adapter;
3649	struct igb_tx_buffer *tx_buffer;
3650	int             i;
3651
3652	INIT_DEBUGOUT("free_transmit_ring: begin");
3653
3654	if (txr->tx_buffers == NULL)
3655		return;
3656
3657	tx_buffer = txr->tx_buffers;
3658	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3659		if (tx_buffer->m_head != NULL) {
3660			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3661			    BUS_DMASYNC_POSTWRITE);
3662			bus_dmamap_unload(txr->txtag,
3663			    tx_buffer->map);
3664			m_freem(tx_buffer->m_head);
3665			tx_buffer->m_head = NULL;
3666			if (tx_buffer->map != NULL) {
3667				bus_dmamap_destroy(txr->txtag,
3668				    tx_buffer->map);
3669				tx_buffer->map = NULL;
3670			}
3671		} else if (tx_buffer->map != NULL) {
3672			bus_dmamap_unload(txr->txtag,
3673			    tx_buffer->map);
3674			bus_dmamap_destroy(txr->txtag,
3675			    tx_buffer->map);
3676			tx_buffer->map = NULL;
3677		}
3678	}
3679#ifndef IGB_LEGACY_TX
3680	if (txr->br != NULL)
3681		buf_ring_free(txr->br, M_DEVBUF);
3682#endif
3683	if (txr->tx_buffers != NULL) {
3684		free(txr->tx_buffers, M_DEVBUF);
3685		txr->tx_buffers = NULL;
3686	}
3687	if (txr->txtag != NULL) {
3688		bus_dma_tag_destroy(txr->txtag);
3689		txr->txtag = NULL;
3690	}
3691	return;
3692}
3693
3694/**********************************************************************
3695 *
3696 *  Setup work for hardware segmentation offload (TSO)
3697 *
3698 **********************************************************************/
3699static bool
3700igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3701	struct ip *ip, struct tcphdr *th)
3702{
3703	struct adapter *adapter = txr->adapter;
3704	struct e1000_adv_tx_context_desc *TXD;
3705	struct igb_tx_buffer        *tx_buffer;
3706	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3707	u32 mss_l4len_idx = 0;
3708	u16 vtag = 0;
3709	int ctxd, ip_hlen, tcp_hlen;
3710
3711	ctxd = txr->next_avail_desc;
3712	tx_buffer = &txr->tx_buffers[ctxd];
3713	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3714
3715	ip->ip_sum = 0;
3716	ip_hlen = ip->ip_hl << 2;
3717	tcp_hlen = th->th_off << 2;
3718
3719	/* VLAN MACLEN IPLEN */
3720	if (mp->m_flags & M_VLANTAG) {
3721		vtag = htole16(mp->m_pkthdr.ether_vtag);
3722		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3723	}
3724
3725	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3726	vlan_macip_lens |= ip_hlen;
3727	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3728
3729	/* ADV DTYPE TUCMD */
3730	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3731	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3732	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3733	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3734
3735	/* MSS L4LEN IDX */
3736	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3737	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3738	/* 82575 needs the queue index added */
3739	if (adapter->hw.mac.type == e1000_82575)
3740		mss_l4len_idx |= txr->me << 4;
3741	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3742
3743	TXD->seqnum_seed = htole32(0);
3744	tx_buffer->m_head = NULL;
3745	tx_buffer->next_eop = -1;
3746
3747	if (++ctxd == adapter->num_tx_desc)
3748		ctxd = 0;
3749
3750	txr->tx_avail--;
3751	txr->next_avail_desc = ctxd;
3752	return TRUE;
3753}
3754
3755
3756/*********************************************************************
3757 *
3758 *  Context Descriptor setup for VLAN or CSUM
3759 *
3760 **********************************************************************/
3761
3762static bool
3763igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3764{
3765	struct adapter *adapter = txr->adapter;
3766	struct e1000_adv_tx_context_desc *TXD;
3767	struct igb_tx_buffer        *tx_buffer;
3768	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3769	struct ether_vlan_header *eh;
3770	struct ip *ip = NULL;
3771	struct ip6_hdr *ip6;
3772	int  ehdrlen, ctxd, ip_hlen = 0;
3773	u16	etype, vtag = 0;
3774	u8	ipproto = 0;
3775	bool	offload = TRUE;
3776
3777	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3778		offload = FALSE;
3779
3780	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3781	ctxd = txr->next_avail_desc;
3782	tx_buffer = &txr->tx_buffers[ctxd];
3783	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3784
3785	/*
3786	** In advanced descriptors the vlan tag must
3787	** be placed into the context descriptor, thus
3788	** we need to be here just for that setup.
3789	*/
3790	if (mp->m_flags & M_VLANTAG) {
3791		vtag = htole16(mp->m_pkthdr.ether_vtag);
3792		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3793	} else if (offload == FALSE)
3794		return FALSE;
3795
3796	/*
3797	 * Determine where frame payload starts.
3798	 * Jump over vlan headers if already present,
3799	 * helpful for QinQ too.
3800	 */
3801	eh = mtod(mp, struct ether_vlan_header *);
3802	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3803		etype = ntohs(eh->evl_proto);
3804		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3805	} else {
3806		etype = ntohs(eh->evl_encap_proto);
3807		ehdrlen = ETHER_HDR_LEN;
3808	}
3809
3810	/* Set the ether header length */
3811	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3812
3813	switch (etype) {
3814		case ETHERTYPE_IP:
3815			ip = (struct ip *)(mp->m_data + ehdrlen);
3816			ip_hlen = ip->ip_hl << 2;
3817			if (mp->m_len < ehdrlen + ip_hlen) {
3818				offload = FALSE;
3819				break;
3820			}
3821			ipproto = ip->ip_p;
3822			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3823			break;
3824		case ETHERTYPE_IPV6:
3825			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3826			ip_hlen = sizeof(struct ip6_hdr);
3827			ipproto = ip6->ip6_nxt;
3828			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3829			break;
3830		default:
3831			offload = FALSE;
3832			break;
3833	}
3834
3835	vlan_macip_lens |= ip_hlen;
3836	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3837
3838	switch (ipproto) {
3839		case IPPROTO_TCP:
3840			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3841				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3842			break;
3843		case IPPROTO_UDP:
3844			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3845				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3846			break;
3847#if __FreeBSD_version >= 800000
3848		case IPPROTO_SCTP:
3849			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3850				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3851			break;
3852#endif
3853		default:
3854			offload = FALSE;
3855			break;
3856	}
3857
3858	/* 82575 needs the queue index added */
3859	if (adapter->hw.mac.type == e1000_82575)
3860		mss_l4len_idx = txr->me << 4;
3861
3862	/* Now copy bits into descriptor */
3863	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3864	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3865	TXD->seqnum_seed = htole32(0);
3866	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3867
3868	tx_buffer->m_head = NULL;
3869	tx_buffer->next_eop = -1;
3870
3871	/* We've consumed the first desc, adjust counters */
3872	if (++ctxd == adapter->num_tx_desc)
3873		ctxd = 0;
3874	txr->next_avail_desc = ctxd;
3875	--txr->tx_avail;
3876
3877        return (offload);
3878}
3879
3880
3881/**********************************************************************
3882 *
3883 *  Examine each tx_buffer in the used queue. If the hardware is done
3884 *  processing the packet then free associated resources. The
3885 *  tx_buffer is put back on the free queue.
3886 *
3887 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3888 **********************************************************************/
3889static bool
3890igb_txeof(struct tx_ring *txr)
3891{
3892	struct adapter	*adapter = txr->adapter;
3893        int first, last, done, processed;
3894        struct igb_tx_buffer *tx_buffer;
3895        struct e1000_tx_desc   *tx_desc, *eop_desc;
3896	struct ifnet   *ifp = adapter->ifp;
3897
3898	IGB_TX_LOCK_ASSERT(txr);
3899
3900#ifdef DEV_NETMAP
3901	if (ifp->if_capenable & IFCAP_NETMAP) {
3902		struct netmap_adapter *na = NA(ifp);
3903
3904		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3905		IGB_TX_UNLOCK(txr);
3906		IGB_CORE_LOCK(adapter);
3907		selwakeuppri(&na->tx_si, PI_NET);
3908		IGB_CORE_UNLOCK(adapter);
3909		IGB_TX_LOCK(txr);
3910		return FALSE;
3911	}
3912#endif /* DEV_NETMAP */
3913        if (txr->tx_avail == adapter->num_tx_desc) {
3914		txr->queue_status = IGB_QUEUE_IDLE;
3915                return FALSE;
3916	}
3917
3918	processed = 0;
3919        first = txr->next_to_clean;
3920        tx_desc = &txr->tx_base[first];
3921        tx_buffer = &txr->tx_buffers[first];
3922	last = tx_buffer->next_eop;
3923        eop_desc = &txr->tx_base[last];
3924
3925	/*
3926	 * What this does is get the index of the
3927	 * first descriptor AFTER the EOP of the
3928	 * first packet, that way we can do the
3929	 * simple comparison on the inner while loop.
3930	 */
3931	if (++last == adapter->num_tx_desc)
3932 		last = 0;
3933	done = last;
3934
3935        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3936            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3937
3938        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3939		/* We clean the range of the packet */
3940		while (first != done) {
3941                	tx_desc->upper.data = 0;
3942                	tx_desc->lower.data = 0;
3943                	tx_desc->buffer_addr = 0;
3944                	++txr->tx_avail;
3945			++processed;
3946
3947			if (tx_buffer->m_head) {
3948				txr->bytes +=
3949				    tx_buffer->m_head->m_pkthdr.len;
3950				bus_dmamap_sync(txr->txtag,
3951				    tx_buffer->map,
3952				    BUS_DMASYNC_POSTWRITE);
3953				bus_dmamap_unload(txr->txtag,
3954				    tx_buffer->map);
3955
3956                        	m_freem(tx_buffer->m_head);
3957                        	tx_buffer->m_head = NULL;
3958                	}
3959			tx_buffer->next_eop = -1;
3960			txr->watchdog_time = ticks;
3961
3962	                if (++first == adapter->num_tx_desc)
3963				first = 0;
3964
3965	                tx_buffer = &txr->tx_buffers[first];
3966			tx_desc = &txr->tx_base[first];
3967		}
3968		++txr->packets;
3969		++ifp->if_opackets;
3970		/* See if we can continue to the next packet */
3971		last = tx_buffer->next_eop;
3972		if (last != -1) {
3973        		eop_desc = &txr->tx_base[last];
3974			/* Get new done point */
3975			if (++last == adapter->num_tx_desc) last = 0;
3976			done = last;
3977		} else
3978			break;
3979        }
3980        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3981            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3982
3983        txr->next_to_clean = first;
3984
3985	/*
3986	** Watchdog calculation, we know there's
3987	** work outstanding or the first return
3988	** would have been taken, so none processed
3989	** for too long indicates a hang.
3990	*/
3991	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3992		txr->queue_status |= IGB_QUEUE_HUNG;
3993        /*
3994         * If we have a minimum free,
3995         * clear depleted state bit
3996         */
3997        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3998                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3999
4000	/* All clean, turn off the watchdog */
4001	if (txr->tx_avail == adapter->num_tx_desc) {
4002		txr->queue_status = IGB_QUEUE_IDLE;
4003		return (FALSE);
4004        }
4005
4006	return (TRUE);
4007}
4008
4009/*********************************************************************
4010 *
4011 *  Refresh mbuf buffers for RX descriptor rings
4012 *   - now keeps its own state so discards due to resource
4013 *     exhaustion are unnecessary, if an mbuf cannot be obtained
4014 *     it just returns, keeping its placeholder, thus it can simply
4015 *     be recalled to try again.
4016 *
4017 **********************************************************************/
4018static void
4019igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4020{
4021	struct adapter		*adapter = rxr->adapter;
4022	bus_dma_segment_t	hseg[1];
4023	bus_dma_segment_t	pseg[1];
4024	struct igb_rx_buf	*rxbuf;
4025	struct mbuf		*mh, *mp;
4026	int			i, j, nsegs, error;
4027	bool			refreshed = FALSE;
4028
4029	i = j = rxr->next_to_refresh;
4030	/*
4031	** Get one descriptor beyond
4032	** our work mark to control
4033	** the loop.
4034        */
4035	if (++j == adapter->num_rx_desc)
4036		j = 0;
4037
4038	while (j != limit) {
4039		rxbuf = &rxr->rx_buffers[i];
4040		/* No hdr mbuf used with header split off */
4041		if (rxr->hdr_split == FALSE)
4042			goto no_split;
4043		if (rxbuf->m_head == NULL) {
4044			mh = m_gethdr(M_NOWAIT, MT_DATA);
4045			if (mh == NULL)
4046				goto update;
4047		} else
4048			mh = rxbuf->m_head;
4049
4050		mh->m_pkthdr.len = mh->m_len = MHLEN;
4051		mh->m_len = MHLEN;
4052		mh->m_flags |= M_PKTHDR;
4053		/* Get the memory mapping */
4054		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4055		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4056		if (error != 0) {
4057			printf("Refresh mbufs: hdr dmamap load"
4058			    " failure - %d\n", error);
4059			m_free(mh);
4060			rxbuf->m_head = NULL;
4061			goto update;
4062		}
4063		rxbuf->m_head = mh;
4064		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4065		    BUS_DMASYNC_PREREAD);
4066		rxr->rx_base[i].read.hdr_addr =
4067		    htole64(hseg[0].ds_addr);
4068no_split:
4069		if (rxbuf->m_pack == NULL) {
4070			mp = m_getjcl(M_NOWAIT, MT_DATA,
4071			    M_PKTHDR, adapter->rx_mbuf_sz);
4072			if (mp == NULL)
4073				goto update;
4074		} else
4075			mp = rxbuf->m_pack;
4076
4077		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4078		/* Get the memory mapping */
4079		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4080		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4081		if (error != 0) {
4082			printf("Refresh mbufs: payload dmamap load"
4083			    " failure - %d\n", error);
4084			m_free(mp);
4085			rxbuf->m_pack = NULL;
4086			goto update;
4087		}
4088		rxbuf->m_pack = mp;
4089		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4090		    BUS_DMASYNC_PREREAD);
4091		rxr->rx_base[i].read.pkt_addr =
4092		    htole64(pseg[0].ds_addr);
4093		refreshed = TRUE; /* I feel wefreshed :) */
4094
4095		i = j; /* our next is precalculated */
4096		rxr->next_to_refresh = i;
4097		if (++j == adapter->num_rx_desc)
4098			j = 0;
4099	}
4100update:
4101	if (refreshed) /* update tail */
4102		E1000_WRITE_REG(&adapter->hw,
4103		    E1000_RDT(rxr->me), rxr->next_to_refresh);
4104	return;
4105}
4106
4107
4108/*********************************************************************
4109 *
4110 *  Allocate memory for rx_buffer structures. Since we use one
4111 *  rx_buffer per received packet, the maximum number of rx_buffer's
4112 *  that we'll need is equal to the number of receive descriptors
4113 *  that we've allocated.
4114 *
4115 **********************************************************************/
4116static int
4117igb_allocate_receive_buffers(struct rx_ring *rxr)
4118{
4119	struct	adapter 	*adapter = rxr->adapter;
4120	device_t 		dev = adapter->dev;
4121	struct igb_rx_buf	*rxbuf;
4122	int             	i, bsize, error;
4123
4124	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4125	if (!(rxr->rx_buffers =
4126	    (struct igb_rx_buf *) malloc(bsize,
4127	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4128		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4129		error = ENOMEM;
4130		goto fail;
4131	}
4132
4133	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4134				   1, 0,		/* alignment, bounds */
4135				   BUS_SPACE_MAXADDR,	/* lowaddr */
4136				   BUS_SPACE_MAXADDR,	/* highaddr */
4137				   NULL, NULL,		/* filter, filterarg */
4138				   MSIZE,		/* maxsize */
4139				   1,			/* nsegments */
4140				   MSIZE,		/* maxsegsize */
4141				   0,			/* flags */
4142				   NULL,		/* lockfunc */
4143				   NULL,		/* lockfuncarg */
4144				   &rxr->htag))) {
4145		device_printf(dev, "Unable to create RX DMA tag\n");
4146		goto fail;
4147	}
4148
4149	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4150				   1, 0,		/* alignment, bounds */
4151				   BUS_SPACE_MAXADDR,	/* lowaddr */
4152				   BUS_SPACE_MAXADDR,	/* highaddr */
4153				   NULL, NULL,		/* filter, filterarg */
4154				   MJUM9BYTES,		/* maxsize */
4155				   1,			/* nsegments */
4156				   MJUM9BYTES,		/* maxsegsize */
4157				   0,			/* flags */
4158				   NULL,		/* lockfunc */
4159				   NULL,		/* lockfuncarg */
4160				   &rxr->ptag))) {
4161		device_printf(dev, "Unable to create RX payload DMA tag\n");
4162		goto fail;
4163	}
4164
4165	for (i = 0; i < adapter->num_rx_desc; i++) {
4166		rxbuf = &rxr->rx_buffers[i];
4167		error = bus_dmamap_create(rxr->htag,
4168		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4169		if (error) {
4170			device_printf(dev,
4171			    "Unable to create RX head DMA maps\n");
4172			goto fail;
4173		}
4174		error = bus_dmamap_create(rxr->ptag,
4175		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4176		if (error) {
4177			device_printf(dev,
4178			    "Unable to create RX packet DMA maps\n");
4179			goto fail;
4180		}
4181	}
4182
4183	return (0);
4184
4185fail:
4186	/* Frees all, but can handle partial completion */
4187	igb_free_receive_structures(adapter);
4188	return (error);
4189}
4190
4191
4192static void
4193igb_free_receive_ring(struct rx_ring *rxr)
4194{
4195	struct	adapter		*adapter = rxr->adapter;
4196	struct igb_rx_buf	*rxbuf;
4197
4198
4199	for (int i = 0; i < adapter->num_rx_desc; i++) {
4200		rxbuf = &rxr->rx_buffers[i];
4201		if (rxbuf->m_head != NULL) {
4202			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4203			    BUS_DMASYNC_POSTREAD);
4204			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4205			rxbuf->m_head->m_flags |= M_PKTHDR;
4206			m_freem(rxbuf->m_head);
4207		}
4208		if (rxbuf->m_pack != NULL) {
4209			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4210			    BUS_DMASYNC_POSTREAD);
4211			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4212			rxbuf->m_pack->m_flags |= M_PKTHDR;
4213			m_freem(rxbuf->m_pack);
4214		}
4215		rxbuf->m_head = NULL;
4216		rxbuf->m_pack = NULL;
4217	}
4218}
4219
4220
4221/*********************************************************************
4222 *
4223 *  Initialize a receive ring and its buffers.
4224 *
4225 **********************************************************************/
4226static int
4227igb_setup_receive_ring(struct rx_ring *rxr)
4228{
4229	struct	adapter		*adapter;
4230	struct  ifnet		*ifp;
4231	device_t		dev;
4232	struct igb_rx_buf	*rxbuf;
4233	bus_dma_segment_t	pseg[1], hseg[1];
4234	struct lro_ctrl		*lro = &rxr->lro;
4235	int			rsize, nsegs, error = 0;
4236#ifdef DEV_NETMAP
4237	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4238	struct netmap_slot *slot;
4239#endif /* DEV_NETMAP */
4240
4241	adapter = rxr->adapter;
4242	dev = adapter->dev;
4243	ifp = adapter->ifp;
4244
4245	/* Clear the ring contents */
4246	IGB_RX_LOCK(rxr);
4247#ifdef DEV_NETMAP
4248	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4249#endif /* DEV_NETMAP */
4250	rsize = roundup2(adapter->num_rx_desc *
4251	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4252	bzero((void *)rxr->rx_base, rsize);
4253
4254	/*
4255	** Free current RX buffer structures and their mbufs
4256	*/
4257	igb_free_receive_ring(rxr);
4258
4259	/* Configure for header split? */
4260	if (igb_header_split)
4261		rxr->hdr_split = TRUE;
4262
4263        /* Now replenish the ring mbufs */
4264	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4265		struct mbuf	*mh, *mp;
4266
4267		rxbuf = &rxr->rx_buffers[j];
4268#ifdef DEV_NETMAP
4269		if (slot) {
4270			/* slot sj is mapped to the i-th NIC-ring entry */
4271			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4272			uint64_t paddr;
4273			void *addr;
4274
4275			addr = PNMB(slot + sj, &paddr);
4276			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4277			/* Update descriptor */
4278			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4279			continue;
4280		}
4281#endif /* DEV_NETMAP */
4282		if (rxr->hdr_split == FALSE)
4283			goto skip_head;
4284
4285		/* First the header */
4286		rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4287		if (rxbuf->m_head == NULL) {
4288			error = ENOBUFS;
4289                        goto fail;
4290		}
4291		m_adj(rxbuf->m_head, ETHER_ALIGN);
4292		mh = rxbuf->m_head;
4293		mh->m_len = mh->m_pkthdr.len = MHLEN;
4294		mh->m_flags |= M_PKTHDR;
4295		/* Get the memory mapping */
4296		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4297		    rxbuf->hmap, rxbuf->m_head, hseg,
4298		    &nsegs, BUS_DMA_NOWAIT);
4299		if (error != 0) /* Nothing elegant to do here */
4300                        goto fail;
4301		bus_dmamap_sync(rxr->htag,
4302		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4303		/* Update descriptor */
4304		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4305
4306skip_head:
4307		/* Now the payload cluster */
4308		rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4309		    M_PKTHDR, adapter->rx_mbuf_sz);
4310		if (rxbuf->m_pack == NULL) {
4311			error = ENOBUFS;
4312                        goto fail;
4313		}
4314		mp = rxbuf->m_pack;
4315		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4316		/* Get the memory mapping */
4317		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4318		    rxbuf->pmap, mp, pseg,
4319		    &nsegs, BUS_DMA_NOWAIT);
4320		if (error != 0)
4321                        goto fail;
4322		bus_dmamap_sync(rxr->ptag,
4323		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4324		/* Update descriptor */
4325		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4326        }
4327
4328	/* Setup our descriptor indices */
4329	rxr->next_to_check = 0;
4330	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4331	rxr->lro_enabled = FALSE;
4332	rxr->rx_split_packets = 0;
4333	rxr->rx_bytes = 0;
4334
4335	rxr->fmp = NULL;
4336	rxr->lmp = NULL;
4337	rxr->discard = FALSE;
4338
4339	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4340	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4341
4342	/*
4343	** Now set up the LRO interface, we
4344	** also only do head split when LRO
4345	** is enabled, since so often they
4346	** are undesireable in similar setups.
4347	*/
4348	if (ifp->if_capenable & IFCAP_LRO) {
4349		error = tcp_lro_init(lro);
4350		if (error) {
4351			device_printf(dev, "LRO Initialization failed!\n");
4352			goto fail;
4353		}
4354		INIT_DEBUGOUT("RX LRO Initialized\n");
4355		rxr->lro_enabled = TRUE;
4356		lro->ifp = adapter->ifp;
4357	}
4358
4359	IGB_RX_UNLOCK(rxr);
4360	return (0);
4361
4362fail:
4363	igb_free_receive_ring(rxr);
4364	IGB_RX_UNLOCK(rxr);
4365	return (error);
4366}
4367
4368
4369/*********************************************************************
4370 *
4371 *  Initialize all receive rings.
4372 *
4373 **********************************************************************/
4374static int
4375igb_setup_receive_structures(struct adapter *adapter)
4376{
4377	struct rx_ring *rxr = adapter->rx_rings;
4378	int i;
4379
4380	for (i = 0; i < adapter->num_queues; i++, rxr++)
4381		if (igb_setup_receive_ring(rxr))
4382			goto fail;
4383
4384	return (0);
4385fail:
4386	/*
4387	 * Free RX buffers allocated so far, we will only handle
4388	 * the rings that completed, the failing case will have
4389	 * cleaned up for itself. 'i' is the endpoint.
4390	 */
4391	for (int j = 0; j < i; ++j) {
4392		rxr = &adapter->rx_rings[j];
4393		IGB_RX_LOCK(rxr);
4394		igb_free_receive_ring(rxr);
4395		IGB_RX_UNLOCK(rxr);
4396	}
4397
4398	return (ENOBUFS);
4399}
4400
4401/*********************************************************************
4402 *
4403 *  Enable receive unit.
4404 *
4405 **********************************************************************/
4406static void
4407igb_initialize_receive_units(struct adapter *adapter)
4408{
4409	struct rx_ring	*rxr = adapter->rx_rings;
4410	struct ifnet	*ifp = adapter->ifp;
4411	struct e1000_hw *hw = &adapter->hw;
4412	u32		rctl, rxcsum, psize, srrctl = 0;
4413
4414	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4415
4416	/*
4417	 * Make sure receives are disabled while setting
4418	 * up the descriptor ring
4419	 */
4420	rctl = E1000_READ_REG(hw, E1000_RCTL);
4421	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4422
4423	/*
4424	** Set up for header split
4425	*/
4426	if (igb_header_split) {
4427		/* Use a standard mbuf for the header */
4428		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4429		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4430	} else
4431		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4432
4433	/*
4434	** Set up for jumbo frames
4435	*/
4436	if (ifp->if_mtu > ETHERMTU) {
4437		rctl |= E1000_RCTL_LPE;
4438		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4439			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4440			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4441		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4442			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4443			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4444		}
4445		/* Set maximum packet len */
4446		psize = adapter->max_frame_size;
4447		/* are we on a vlan? */
4448		if (adapter->ifp->if_vlantrunk != NULL)
4449			psize += VLAN_TAG_SIZE;
4450		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4451	} else {
4452		rctl &= ~E1000_RCTL_LPE;
4453		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4454		rctl |= E1000_RCTL_SZ_2048;
4455	}
4456
4457	/* Setup the Base and Length of the Rx Descriptor Rings */
4458	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4459		u64 bus_addr = rxr->rxdma.dma_paddr;
4460		u32 rxdctl;
4461
4462		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4463		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4464		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4465		    (uint32_t)(bus_addr >> 32));
4466		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4467		    (uint32_t)bus_addr);
4468		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4469		/* Enable this Queue */
4470		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4471		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4472		rxdctl &= 0xFFF00000;
4473		rxdctl |= IGB_RX_PTHRESH;
4474		rxdctl |= IGB_RX_HTHRESH << 8;
4475		rxdctl |= IGB_RX_WTHRESH << 16;
4476		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4477	}
4478
4479	/*
4480	** Setup for RX MultiQueue
4481	*/
4482	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4483	if (adapter->num_queues >1) {
4484		u32 random[10], mrqc, shift = 0;
4485		union igb_reta {
4486			u32 dword;
4487			u8  bytes[4];
4488		} reta;
4489
4490		arc4rand(&random, sizeof(random), 0);
4491		if (adapter->hw.mac.type == e1000_82575)
4492			shift = 6;
4493		/* Warning FM follows */
4494		for (int i = 0; i < 128; i++) {
4495			reta.bytes[i & 3] =
4496			    (i % adapter->num_queues) << shift;
4497			if ((i & 3) == 3)
4498				E1000_WRITE_REG(hw,
4499				    E1000_RETA(i >> 2), reta.dword);
4500		}
4501		/* Now fill in hash table */
4502		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4503		for (int i = 0; i < 10; i++)
4504			E1000_WRITE_REG_ARRAY(hw,
4505			    E1000_RSSRK(0), i, random[i]);
4506
4507		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4508		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4509		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4510		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4511		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4512		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4513		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4514		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4515
4516		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4517
4518		/*
4519		** NOTE: Receive Full-Packet Checksum Offload
4520		** is mutually exclusive with Multiqueue. However
4521		** this is not the same as TCP/IP checksums which
4522		** still work.
4523		*/
4524		rxcsum |= E1000_RXCSUM_PCSD;
4525#if __FreeBSD_version >= 800000
4526		/* For SCTP Offload */
4527		if ((hw->mac.type == e1000_82576)
4528		    && (ifp->if_capenable & IFCAP_RXCSUM))
4529			rxcsum |= E1000_RXCSUM_CRCOFL;
4530#endif
4531	} else {
4532		/* Non RSS setup */
4533		if (ifp->if_capenable & IFCAP_RXCSUM) {
4534			rxcsum |= E1000_RXCSUM_IPPCSE;
4535#if __FreeBSD_version >= 800000
4536			if (adapter->hw.mac.type == e1000_82576)
4537				rxcsum |= E1000_RXCSUM_CRCOFL;
4538#endif
4539		} else
4540			rxcsum &= ~E1000_RXCSUM_TUOFL;
4541	}
4542	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4543
4544	/* Setup the Receive Control Register */
4545	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4546	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4547		   E1000_RCTL_RDMTS_HALF |
4548		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4549	/* Strip CRC bytes. */
4550	rctl |= E1000_RCTL_SECRC;
4551	/* Make sure VLAN Filters are off */
4552	rctl &= ~E1000_RCTL_VFE;
4553	/* Don't store bad packets */
4554	rctl &= ~E1000_RCTL_SBP;
4555
4556	/* Enable Receives */
4557	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4558
4559	/*
4560	 * Setup the HW Rx Head and Tail Descriptor Pointers
4561	 *   - needs to be after enable
4562	 */
4563	for (int i = 0; i < adapter->num_queues; i++) {
4564		rxr = &adapter->rx_rings[i];
4565		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4566#ifdef DEV_NETMAP
4567		/*
4568		 * an init() while a netmap client is active must
4569		 * preserve the rx buffers passed to userspace.
4570		 * In this driver it means we adjust RDT to
4571		 * somthing different from next_to_refresh
4572		 * (which is not used in netmap mode).
4573		 */
4574		if (ifp->if_capenable & IFCAP_NETMAP) {
4575			struct netmap_adapter *na = NA(adapter->ifp);
4576			struct netmap_kring *kring = &na->rx_rings[i];
4577			int t = rxr->next_to_refresh - kring->nr_hwavail;
4578
4579			if (t >= adapter->num_rx_desc)
4580				t -= adapter->num_rx_desc;
4581			else if (t < 0)
4582				t += adapter->num_rx_desc;
4583			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4584		} else
4585#endif /* DEV_NETMAP */
4586		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4587	}
4588	return;
4589}
4590
4591/*********************************************************************
4592 *
4593 *  Free receive rings.
4594 *
4595 **********************************************************************/
4596static void
4597igb_free_receive_structures(struct adapter *adapter)
4598{
4599	struct rx_ring *rxr = adapter->rx_rings;
4600
4601	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4602		struct lro_ctrl	*lro = &rxr->lro;
4603		igb_free_receive_buffers(rxr);
4604		tcp_lro_free(lro);
4605		igb_dma_free(adapter, &rxr->rxdma);
4606	}
4607
4608	free(adapter->rx_rings, M_DEVBUF);
4609}
4610
4611/*********************************************************************
4612 *
4613 *  Free receive ring data structures.
4614 *
4615 **********************************************************************/
4616static void
4617igb_free_receive_buffers(struct rx_ring *rxr)
4618{
4619	struct adapter		*adapter = rxr->adapter;
4620	struct igb_rx_buf	*rxbuf;
4621	int i;
4622
4623	INIT_DEBUGOUT("free_receive_structures: begin");
4624
4625	/* Cleanup any existing buffers */
4626	if (rxr->rx_buffers != NULL) {
4627		for (i = 0; i < adapter->num_rx_desc; i++) {
4628			rxbuf = &rxr->rx_buffers[i];
4629			if (rxbuf->m_head != NULL) {
4630				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4631				    BUS_DMASYNC_POSTREAD);
4632				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4633				rxbuf->m_head->m_flags |= M_PKTHDR;
4634				m_freem(rxbuf->m_head);
4635			}
4636			if (rxbuf->m_pack != NULL) {
4637				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4638				    BUS_DMASYNC_POSTREAD);
4639				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4640				rxbuf->m_pack->m_flags |= M_PKTHDR;
4641				m_freem(rxbuf->m_pack);
4642			}
4643			rxbuf->m_head = NULL;
4644			rxbuf->m_pack = NULL;
4645			if (rxbuf->hmap != NULL) {
4646				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4647				rxbuf->hmap = NULL;
4648			}
4649			if (rxbuf->pmap != NULL) {
4650				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4651				rxbuf->pmap = NULL;
4652			}
4653		}
4654		if (rxr->rx_buffers != NULL) {
4655			free(rxr->rx_buffers, M_DEVBUF);
4656			rxr->rx_buffers = NULL;
4657		}
4658	}
4659
4660	if (rxr->htag != NULL) {
4661		bus_dma_tag_destroy(rxr->htag);
4662		rxr->htag = NULL;
4663	}
4664	if (rxr->ptag != NULL) {
4665		bus_dma_tag_destroy(rxr->ptag);
4666		rxr->ptag = NULL;
4667	}
4668}
4669
4670static __inline void
4671igb_rx_discard(struct rx_ring *rxr, int i)
4672{
4673	struct igb_rx_buf	*rbuf;
4674
4675	rbuf = &rxr->rx_buffers[i];
4676
4677	/* Partially received? Free the chain */
4678	if (rxr->fmp != NULL) {
4679		rxr->fmp->m_flags |= M_PKTHDR;
4680		m_freem(rxr->fmp);
4681		rxr->fmp = NULL;
4682		rxr->lmp = NULL;
4683	}
4684
4685	/*
4686	** With advanced descriptors the writeback
4687	** clobbers the buffer addrs, so its easier
4688	** to just free the existing mbufs and take
4689	** the normal refresh path to get new buffers
4690	** and mapping.
4691	*/
4692	if (rbuf->m_head) {
4693		m_free(rbuf->m_head);
4694		rbuf->m_head = NULL;
4695	}
4696
4697	if (rbuf->m_pack) {
4698		m_free(rbuf->m_pack);
4699		rbuf->m_pack = NULL;
4700	}
4701
4702	return;
4703}
4704
4705static __inline void
4706igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4707{
4708
4709	/*
4710	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4711	 * should be computed by hardware. Also it should not have VLAN tag in
4712	 * ethernet header.
4713	 */
4714	if (rxr->lro_enabled &&
4715	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4716	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4717	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4718	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4719	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4720	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4721		/*
4722		 * Send to the stack if:
4723		 **  - LRO not enabled, or
4724		 **  - no LRO resources, or
4725		 **  - lro enqueue fails
4726		 */
4727		if (rxr->lro.lro_cnt != 0)
4728			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4729				return;
4730	}
4731	IGB_RX_UNLOCK(rxr);
4732	(*ifp->if_input)(ifp, m);
4733	IGB_RX_LOCK(rxr);
4734}
4735
4736/*********************************************************************
4737 *
4738 *  This routine executes in interrupt context. It replenishes
4739 *  the mbufs in the descriptor and sends data which has been
4740 *  dma'ed into host memory to upper layer.
4741 *
4742 *  We loop at most count times if count is > 0, or until done if
4743 *  count < 0.
4744 *
4745 *  Return TRUE if more to clean, FALSE otherwise
4746 *********************************************************************/
4747static bool
4748igb_rxeof(struct igb_queue *que, int count, int *done)
4749{
4750	struct adapter		*adapter = que->adapter;
4751	struct rx_ring		*rxr = que->rxr;
4752	struct ifnet		*ifp = adapter->ifp;
4753	struct lro_ctrl		*lro = &rxr->lro;
4754	struct lro_entry	*queued;
4755	int			i, processed = 0, rxdone = 0;
4756	u32			ptype, staterr = 0;
4757	union e1000_adv_rx_desc	*cur;
4758
4759	IGB_RX_LOCK(rxr);
4760	/* Sync the ring. */
4761	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4762	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4763
4764#ifdef DEV_NETMAP
4765	if (ifp->if_capenable & IFCAP_NETMAP) {
4766		struct netmap_adapter *na = NA(ifp);
4767
4768		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4769		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4770		IGB_RX_UNLOCK(rxr);
4771		IGB_CORE_LOCK(adapter);
4772		selwakeuppri(&na->rx_si, PI_NET);
4773		IGB_CORE_UNLOCK(adapter);
4774		return (0);
4775	}
4776#endif /* DEV_NETMAP */
4777
4778	/* Main clean loop */
4779	for (i = rxr->next_to_check; count != 0;) {
4780		struct mbuf		*sendmp, *mh, *mp;
4781		struct igb_rx_buf	*rxbuf;
4782		u16			hlen, plen, hdr, vtag;
4783		bool			eop = FALSE;
4784
4785		cur = &rxr->rx_base[i];
4786		staterr = le32toh(cur->wb.upper.status_error);
4787		if ((staterr & E1000_RXD_STAT_DD) == 0)
4788			break;
4789		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4790			break;
4791		count--;
4792		sendmp = mh = mp = NULL;
4793		cur->wb.upper.status_error = 0;
4794		rxbuf = &rxr->rx_buffers[i];
4795		plen = le16toh(cur->wb.upper.length);
4796		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4797		if ((adapter->hw.mac.type == e1000_i350) &&
4798		    (staterr & E1000_RXDEXT_STATERR_LB))
4799			vtag = be16toh(cur->wb.upper.vlan);
4800		else
4801			vtag = le16toh(cur->wb.upper.vlan);
4802		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4803		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4804
4805		/* Make sure all segments of a bad packet are discarded */
4806		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4807		    (rxr->discard)) {
4808			adapter->dropped_pkts++;
4809			++rxr->rx_discarded;
4810			if (!eop) /* Catch subsequent segs */
4811				rxr->discard = TRUE;
4812			else
4813				rxr->discard = FALSE;
4814			igb_rx_discard(rxr, i);
4815			goto next_desc;
4816		}
4817
4818		/*
4819		** The way the hardware is configured to
4820		** split, it will ONLY use the header buffer
4821		** when header split is enabled, otherwise we
4822		** get normal behavior, ie, both header and
4823		** payload are DMA'd into the payload buffer.
4824		**
4825		** The fmp test is to catch the case where a
4826		** packet spans multiple descriptors, in that
4827		** case only the first header is valid.
4828		*/
4829		if (rxr->hdr_split && rxr->fmp == NULL) {
4830			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4831			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4832			if (hlen > IGB_HDR_BUF)
4833				hlen = IGB_HDR_BUF;
4834			mh = rxr->rx_buffers[i].m_head;
4835			mh->m_len = hlen;
4836			/* clear buf pointer for refresh */
4837			rxbuf->m_head = NULL;
4838			/*
4839			** Get the payload length, this
4840			** could be zero if its a small
4841			** packet.
4842			*/
4843			if (plen > 0) {
4844				mp = rxr->rx_buffers[i].m_pack;
4845				mp->m_len = plen;
4846				mh->m_next = mp;
4847				/* clear buf pointer */
4848				rxbuf->m_pack = NULL;
4849				rxr->rx_split_packets++;
4850			}
4851		} else {
4852			/*
4853			** Either no header split, or a
4854			** secondary piece of a fragmented
4855			** split packet.
4856			*/
4857			mh = rxr->rx_buffers[i].m_pack;
4858			mh->m_len = plen;
4859			/* clear buf info for refresh */
4860			rxbuf->m_pack = NULL;
4861		}
4862
4863		++processed; /* So we know when to refresh */
4864
4865		/* Initial frame - setup */
4866		if (rxr->fmp == NULL) {
4867			mh->m_pkthdr.len = mh->m_len;
4868			/* Save the head of the chain */
4869			rxr->fmp = mh;
4870			rxr->lmp = mh;
4871			if (mp != NULL) {
4872				/* Add payload if split */
4873				mh->m_pkthdr.len += mp->m_len;
4874				rxr->lmp = mh->m_next;
4875			}
4876		} else {
4877			/* Chain mbuf's together */
4878			rxr->lmp->m_next = mh;
4879			rxr->lmp = rxr->lmp->m_next;
4880			rxr->fmp->m_pkthdr.len += mh->m_len;
4881		}
4882
4883		if (eop) {
4884			rxr->fmp->m_pkthdr.rcvif = ifp;
4885			ifp->if_ipackets++;
4886			rxr->rx_packets++;
4887			/* capture data for AIM */
4888			rxr->packets++;
4889			rxr->bytes += rxr->fmp->m_pkthdr.len;
4890			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4891
4892			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4893				igb_rx_checksum(staterr, rxr->fmp, ptype);
4894
4895			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4896			    (staterr & E1000_RXD_STAT_VP) != 0) {
4897				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4898				rxr->fmp->m_flags |= M_VLANTAG;
4899			}
4900#ifndef IGB_LEGACY_TX
4901			rxr->fmp->m_pkthdr.flowid = que->msix;
4902			rxr->fmp->m_flags |= M_FLOWID;
4903#endif
4904			sendmp = rxr->fmp;
4905			/* Make sure to set M_PKTHDR. */
4906			sendmp->m_flags |= M_PKTHDR;
4907			rxr->fmp = NULL;
4908			rxr->lmp = NULL;
4909		}
4910
4911next_desc:
4912		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4913		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4914
4915		/* Advance our pointers to the next descriptor. */
4916		if (++i == adapter->num_rx_desc)
4917			i = 0;
4918		/*
4919		** Send to the stack or LRO
4920		*/
4921		if (sendmp != NULL) {
4922			rxr->next_to_check = i;
4923			igb_rx_input(rxr, ifp, sendmp, ptype);
4924			i = rxr->next_to_check;
4925			rxdone++;
4926		}
4927
4928		/* Every 8 descriptors we go to refresh mbufs */
4929		if (processed == 8) {
4930                        igb_refresh_mbufs(rxr, i);
4931                        processed = 0;
4932		}
4933	}
4934
4935	/* Catch any remainders */
4936	if (igb_rx_unrefreshed(rxr))
4937		igb_refresh_mbufs(rxr, i);
4938
4939	rxr->next_to_check = i;
4940
4941	/*
4942	 * Flush any outstanding LRO work
4943	 */
4944	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4945		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4946		tcp_lro_flush(lro, queued);
4947	}
4948
4949	if (done != NULL)
4950		*done += rxdone;
4951
4952	IGB_RX_UNLOCK(rxr);
4953	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4954}
4955
4956/*********************************************************************
4957 *
4958 *  Verify that the hardware indicated that the checksum is valid.
4959 *  Inform the stack about the status of checksum so that stack
4960 *  doesn't spend time verifying the checksum.
4961 *
4962 *********************************************************************/
4963static void
4964igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4965{
4966	u16 status = (u16)staterr;
4967	u8  errors = (u8) (staterr >> 24);
4968	int sctp;
4969
4970	/* Ignore Checksum bit is set */
4971	if (status & E1000_RXD_STAT_IXSM) {
4972		mp->m_pkthdr.csum_flags = 0;
4973		return;
4974	}
4975
4976	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4977	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4978		sctp = 1;
4979	else
4980		sctp = 0;
4981	if (status & E1000_RXD_STAT_IPCS) {
4982		/* Did it pass? */
4983		if (!(errors & E1000_RXD_ERR_IPE)) {
4984			/* IP Checksum Good */
4985			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4986			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4987		} else
4988			mp->m_pkthdr.csum_flags = 0;
4989	}
4990
4991	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4992		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4993#if __FreeBSD_version >= 800000
4994		if (sctp) /* reassign */
4995			type = CSUM_SCTP_VALID;
4996#endif
4997		/* Did it pass? */
4998		if (!(errors & E1000_RXD_ERR_TCPE)) {
4999			mp->m_pkthdr.csum_flags |= type;
5000			if (sctp == 0)
5001				mp->m_pkthdr.csum_data = htons(0xffff);
5002		}
5003	}
5004	return;
5005}
5006
5007/*
5008 * This routine is run via an vlan
5009 * config EVENT
5010 */
5011static void
5012igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5013{
5014	struct adapter	*adapter = ifp->if_softc;
5015	u32		index, bit;
5016
5017	if (ifp->if_softc !=  arg)   /* Not our event */
5018		return;
5019
5020	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5021                return;
5022
5023	IGB_CORE_LOCK(adapter);
5024	index = (vtag >> 5) & 0x7F;
5025	bit = vtag & 0x1F;
5026	adapter->shadow_vfta[index] |= (1 << bit);
5027	++adapter->num_vlans;
5028	/* Change hw filter setting */
5029	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5030		igb_setup_vlan_hw_support(adapter);
5031	IGB_CORE_UNLOCK(adapter);
5032}
5033
5034/*
5035 * This routine is run via an vlan
5036 * unconfig EVENT
5037 */
5038static void
5039igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5040{
5041	struct adapter	*adapter = ifp->if_softc;
5042	u32		index, bit;
5043
5044	if (ifp->if_softc !=  arg)
5045		return;
5046
5047	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
5048                return;
5049
5050	IGB_CORE_LOCK(adapter);
5051	index = (vtag >> 5) & 0x7F;
5052	bit = vtag & 0x1F;
5053	adapter->shadow_vfta[index] &= ~(1 << bit);
5054	--adapter->num_vlans;
5055	/* Change hw filter setting */
5056	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5057		igb_setup_vlan_hw_support(adapter);
5058	IGB_CORE_UNLOCK(adapter);
5059}
5060
5061static void
5062igb_setup_vlan_hw_support(struct adapter *adapter)
5063{
5064	struct e1000_hw *hw = &adapter->hw;
5065	struct ifnet	*ifp = adapter->ifp;
5066	u32             reg;
5067
5068	if (adapter->vf_ifp) {
5069		e1000_rlpml_set_vf(hw,
5070		    adapter->max_frame_size + VLAN_TAG_SIZE);
5071		return;
5072	}
5073
5074	reg = E1000_READ_REG(hw, E1000_CTRL);
5075	reg |= E1000_CTRL_VME;
5076	E1000_WRITE_REG(hw, E1000_CTRL, reg);
5077
5078	/* Enable the Filter Table */
5079	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5080		reg = E1000_READ_REG(hw, E1000_RCTL);
5081		reg &= ~E1000_RCTL_CFIEN;
5082		reg |= E1000_RCTL_VFE;
5083		E1000_WRITE_REG(hw, E1000_RCTL, reg);
5084	}
5085
5086	/* Update the frame size */
5087	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5088	    adapter->max_frame_size + VLAN_TAG_SIZE);
5089
5090	/* Don't bother with table if no vlans */
5091	if ((adapter->num_vlans == 0) ||
5092	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5093                return;
5094	/*
5095	** A soft reset zero's out the VFTA, so
5096	** we need to repopulate it now.
5097	*/
5098	for (int i = 0; i < IGB_VFTA_SIZE; i++)
5099                if (adapter->shadow_vfta[i] != 0) {
5100			if (adapter->vf_ifp)
5101				e1000_vfta_set_vf(hw,
5102				    adapter->shadow_vfta[i], TRUE);
5103			else
5104				e1000_write_vfta(hw,
5105				    i, adapter->shadow_vfta[i]);
5106		}
5107}
5108
5109static void
5110igb_enable_intr(struct adapter *adapter)
5111{
5112	/* With RSS set up what to auto clear */
5113	if (adapter->msix_mem) {
5114		u32 mask = (adapter->que_mask | adapter->link_mask);
5115		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5116		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5117		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5118		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5119		    E1000_IMS_LSC);
5120	} else {
5121		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5122		    IMS_ENABLE_MASK);
5123	}
5124	E1000_WRITE_FLUSH(&adapter->hw);
5125
5126	return;
5127}
5128
5129static void
5130igb_disable_intr(struct adapter *adapter)
5131{
5132	if (adapter->msix_mem) {
5133		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5134		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5135	}
5136	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5137	E1000_WRITE_FLUSH(&adapter->hw);
5138	return;
5139}
5140
5141/*
5142 * Bit of a misnomer, what this really means is
5143 * to enable OS management of the system... aka
5144 * to disable special hardware management features
5145 */
5146static void
5147igb_init_manageability(struct adapter *adapter)
5148{
5149	if (adapter->has_manage) {
5150		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5151		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5152
5153		/* disable hardware interception of ARP */
5154		manc &= ~(E1000_MANC_ARP_EN);
5155
5156                /* enable receiving management packets to the host */
5157		manc |= E1000_MANC_EN_MNG2HOST;
5158		manc2h |= 1 << 5;  /* Mng Port 623 */
5159		manc2h |= 1 << 6;  /* Mng Port 664 */
5160		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5161		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5162	}
5163}
5164
5165/*
5166 * Give control back to hardware management
5167 * controller if there is one.
5168 */
5169static void
5170igb_release_manageability(struct adapter *adapter)
5171{
5172	if (adapter->has_manage) {
5173		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5174
5175		/* re-enable hardware interception of ARP */
5176		manc |= E1000_MANC_ARP_EN;
5177		manc &= ~E1000_MANC_EN_MNG2HOST;
5178
5179		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5180	}
5181}
5182
5183/*
5184 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5185 * For ASF and Pass Through versions of f/w this means that
5186 * the driver is loaded.
5187 *
5188 */
5189static void
5190igb_get_hw_control(struct adapter *adapter)
5191{
5192	u32 ctrl_ext;
5193
5194	if (adapter->vf_ifp)
5195		return;
5196
5197	/* Let firmware know the driver has taken over */
5198	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5199	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5200	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5201}
5202
5203/*
5204 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5205 * For ASF and Pass Through versions of f/w this means that the
5206 * driver is no longer loaded.
5207 *
5208 */
5209static void
5210igb_release_hw_control(struct adapter *adapter)
5211{
5212	u32 ctrl_ext;
5213
5214	if (adapter->vf_ifp)
5215		return;
5216
5217	/* Let firmware taken over control of h/w */
5218	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5219	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5220	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5221}
5222
5223static int
5224igb_is_valid_ether_addr(uint8_t *addr)
5225{
5226	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5227
5228	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5229		return (FALSE);
5230	}
5231
5232	return (TRUE);
5233}
5234
5235
5236/*
5237 * Enable PCI Wake On Lan capability
5238 */
5239static void
5240igb_enable_wakeup(device_t dev)
5241{
5242	u16     cap, status;
5243	u8      id;
5244
5245	/* First find the capabilities pointer*/
5246	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5247	/* Read the PM Capabilities */
5248	id = pci_read_config(dev, cap, 1);
5249	if (id != PCIY_PMG)     /* Something wrong */
5250		return;
5251	/* OK, we have the power capabilities, so
5252	   now get the status register */
5253	cap += PCIR_POWER_STATUS;
5254	status = pci_read_config(dev, cap, 2);
5255	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5256	pci_write_config(dev, cap, status, 2);
5257	return;
5258}
5259
5260static void
5261igb_led_func(void *arg, int onoff)
5262{
5263	struct adapter	*adapter = arg;
5264
5265	IGB_CORE_LOCK(adapter);
5266	if (onoff) {
5267		e1000_setup_led(&adapter->hw);
5268		e1000_led_on(&adapter->hw);
5269	} else {
5270		e1000_led_off(&adapter->hw);
5271		e1000_cleanup_led(&adapter->hw);
5272	}
5273	IGB_CORE_UNLOCK(adapter);
5274}
5275
5276/**********************************************************************
5277 *
5278 *  Update the board statistics counters.
5279 *
5280 **********************************************************************/
5281static void
5282igb_update_stats_counters(struct adapter *adapter)
5283{
5284	struct ifnet		*ifp;
5285        struct e1000_hw		*hw = &adapter->hw;
5286	struct e1000_hw_stats	*stats;
5287
5288	/*
5289	** The virtual function adapter has only a
5290	** small controlled set of stats, do only
5291	** those and return.
5292	*/
5293	if (adapter->vf_ifp) {
5294		igb_update_vf_stats_counters(adapter);
5295		return;
5296	}
5297
5298	stats = (struct e1000_hw_stats	*)adapter->stats;
5299
5300	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5301	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5302		stats->symerrs +=
5303		    E1000_READ_REG(hw,E1000_SYMERRS);
5304		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5305	}
5306
5307	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5308	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5309	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5310	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5311
5312	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5313	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5314	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5315	stats->dc += E1000_READ_REG(hw, E1000_DC);
5316	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5317	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5318	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5319	/*
5320	** For watchdog management we need to know if we have been
5321	** paused during the last interval, so capture that here.
5322	*/
5323        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5324        stats->xoffrxc += adapter->pause_frames;
5325	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5326	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5327	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5328	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5329	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5330	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5331	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5332	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5333	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5334	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5335	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5336	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5337
5338	/* For the 64-bit byte counters the low dword must be read first. */
5339	/* Both registers clear on the read of the high dword */
5340
5341	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5342	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5343	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5344	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5345
5346	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5347	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5348	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5349	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5350	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5351
5352	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5353	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5354
5355	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5356	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5357	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5358	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5359	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5360	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5361	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5362	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5363	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5364	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5365
5366	/* Interrupt Counts */
5367
5368	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5369	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5370	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5371	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5372	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5373	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5374	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5375	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5376	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5377
5378	/* Host to Card Statistics */
5379
5380	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5381	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5382	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5383	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5384	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5385	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5386	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5387	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5388	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5389	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5390	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5391	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5392	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5393	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5394
5395	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5396	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5397	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5398	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5399	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5400	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5401
5402	ifp = adapter->ifp;
5403	ifp->if_collisions = stats->colc;
5404
5405	/* Rx Errors */
5406	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5407	    stats->crcerrs + stats->algnerrc +
5408	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5409
5410	/* Tx Errors */
5411	ifp->if_oerrors = stats->ecol +
5412	    stats->latecol + adapter->watchdog_events;
5413
5414	/* Driver specific counters */
5415	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5416	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5417	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5418	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5419	adapter->packet_buf_alloc_tx =
5420	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5421	adapter->packet_buf_alloc_rx =
5422	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5423}
5424
5425
5426/**********************************************************************
5427 *
5428 *  Initialize the VF board statistics counters.
5429 *
5430 **********************************************************************/
5431static void
5432igb_vf_init_stats(struct adapter *adapter)
5433{
5434        struct e1000_hw *hw = &adapter->hw;
5435	struct e1000_vf_stats	*stats;
5436
5437	stats = (struct e1000_vf_stats	*)adapter->stats;
5438	if (stats == NULL)
5439		return;
5440        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5441        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5442        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5443        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5444        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5445}
5446
5447/**********************************************************************
5448 *
5449 *  Update the VF board statistics counters.
5450 *
5451 **********************************************************************/
5452static void
5453igb_update_vf_stats_counters(struct adapter *adapter)
5454{
5455	struct e1000_hw *hw = &adapter->hw;
5456	struct e1000_vf_stats	*stats;
5457
5458	if (adapter->link_speed == 0)
5459		return;
5460
5461	stats = (struct e1000_vf_stats	*)adapter->stats;
5462
5463	UPDATE_VF_REG(E1000_VFGPRC,
5464	    stats->last_gprc, stats->gprc);
5465	UPDATE_VF_REG(E1000_VFGORC,
5466	    stats->last_gorc, stats->gorc);
5467	UPDATE_VF_REG(E1000_VFGPTC,
5468	    stats->last_gptc, stats->gptc);
5469	UPDATE_VF_REG(E1000_VFGOTC,
5470	    stats->last_gotc, stats->gotc);
5471	UPDATE_VF_REG(E1000_VFMPRC,
5472	    stats->last_mprc, stats->mprc);
5473}
5474
5475/* Export a single 32-bit register via a read-only sysctl. */
5476static int
5477igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5478{
5479	struct adapter *adapter;
5480	u_int val;
5481
5482	adapter = oidp->oid_arg1;
5483	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5484	return (sysctl_handle_int(oidp, &val, 0, req));
5485}
5486
5487/*
5488**  Tuneable interrupt rate handler
5489*/
5490static int
5491igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5492{
5493	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5494	int			error;
5495	u32			reg, usec, rate;
5496
5497	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5498	usec = ((reg & 0x7FFC) >> 2);
5499	if (usec > 0)
5500		rate = 1000000 / usec;
5501	else
5502		rate = 0;
5503	error = sysctl_handle_int(oidp, &rate, 0, req);
5504	if (error || !req->newptr)
5505		return error;
5506	return 0;
5507}
5508
5509/*
5510 * Add sysctl variables, one per statistic, to the system.
5511 */
5512static void
5513igb_add_hw_stats(struct adapter *adapter)
5514{
5515	device_t dev = adapter->dev;
5516
5517	struct tx_ring *txr = adapter->tx_rings;
5518	struct rx_ring *rxr = adapter->rx_rings;
5519
5520	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5521	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5522	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5523	struct e1000_hw_stats *stats = adapter->stats;
5524
5525	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5526	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5527
5528#define QUEUE_NAME_LEN 32
5529	char namebuf[QUEUE_NAME_LEN];
5530
5531	/* Driver Statistics */
5532	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5533			CTLFLAG_RD, &adapter->link_irq, 0,
5534			"Link MSIX IRQ Handled");
5535	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5536			CTLFLAG_RD, &adapter->dropped_pkts,
5537			"Driver dropped packets");
5538	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5539			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5540			"Driver tx dma failure in xmit");
5541	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5542			CTLFLAG_RD, &adapter->rx_overruns,
5543			"RX overruns");
5544	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5545			CTLFLAG_RD, &adapter->watchdog_events,
5546			"Watchdog timeouts");
5547
5548	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5549			CTLFLAG_RD, &adapter->device_control,
5550			"Device Control Register");
5551	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5552			CTLFLAG_RD, &adapter->rx_control,
5553			"Receiver Control Register");
5554	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5555			CTLFLAG_RD, &adapter->int_mask,
5556			"Interrupt Mask");
5557	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5558			CTLFLAG_RD, &adapter->eint_mask,
5559			"Extended Interrupt Mask");
5560	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5561			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5562			"Transmit Buffer Packet Allocation");
5563	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5564			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5565			"Receive Buffer Packet Allocation");
5566	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5567			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5568			"Flow Control High Watermark");
5569	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5570			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5571			"Flow Control Low Watermark");
5572
5573	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5574		struct lro_ctrl *lro = &rxr->lro;
5575
5576		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5577		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5578					    CTLFLAG_RD, NULL, "Queue Name");
5579		queue_list = SYSCTL_CHILDREN(queue_node);
5580
5581		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5582				CTLFLAG_RD, &adapter->queues[i],
5583				sizeof(&adapter->queues[i]),
5584				igb_sysctl_interrupt_rate_handler,
5585				"IU", "Interrupt Rate");
5586
5587		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5588				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5589				igb_sysctl_reg_handler, "IU",
5590 				"Transmit Descriptor Head");
5591		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5592				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5593				igb_sysctl_reg_handler, "IU",
5594 				"Transmit Descriptor Tail");
5595		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5596				CTLFLAG_RD, &txr->no_desc_avail,
5597				"Queue No Descriptor Available");
5598		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5599				CTLFLAG_RD, &txr->tx_packets,
5600				"Queue Packets Transmitted");
5601
5602		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5603				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5604				igb_sysctl_reg_handler, "IU",
5605				"Receive Descriptor Head");
5606		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5607				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5608				igb_sysctl_reg_handler, "IU",
5609				"Receive Descriptor Tail");
5610		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5611				CTLFLAG_RD, &rxr->rx_packets,
5612				"Queue Packets Received");
5613		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5614				CTLFLAG_RD, &rxr->rx_bytes,
5615				"Queue Bytes Received");
5616		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5617				CTLFLAG_RD, &lro->lro_queued, 0,
5618				"LRO Queued");
5619		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5620				CTLFLAG_RD, &lro->lro_flushed, 0,
5621				"LRO Flushed");
5622	}
5623
5624	/* MAC stats get their own sub node */
5625
5626	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5627				    CTLFLAG_RD, NULL, "MAC Statistics");
5628	stat_list = SYSCTL_CHILDREN(stat_node);
5629
5630	/*
5631	** VF adapter has a very limited set of stats
5632	** since its not managing the metal, so to speak.
5633	*/
5634	if (adapter->vf_ifp) {
5635	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5636			CTLFLAG_RD, &stats->gprc,
5637			"Good Packets Received");
5638	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5639			CTLFLAG_RD, &stats->gptc,
5640			"Good Packets Transmitted");
5641 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5642 			CTLFLAG_RD, &stats->gorc,
5643 			"Good Octets Received");
5644 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5645 			CTLFLAG_RD, &stats->gotc,
5646 			"Good Octets Transmitted");
5647	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5648			CTLFLAG_RD, &stats->mprc,
5649			"Multicast Packets Received");
5650		return;
5651	}
5652
5653	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5654			CTLFLAG_RD, &stats->ecol,
5655			"Excessive collisions");
5656	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5657			CTLFLAG_RD, &stats->scc,
5658			"Single collisions");
5659	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5660			CTLFLAG_RD, &stats->mcc,
5661			"Multiple collisions");
5662	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5663			CTLFLAG_RD, &stats->latecol,
5664			"Late collisions");
5665	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5666			CTLFLAG_RD, &stats->colc,
5667			"Collision Count");
5668	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5669			CTLFLAG_RD, &stats->symerrs,
5670			"Symbol Errors");
5671	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5672			CTLFLAG_RD, &stats->sec,
5673			"Sequence Errors");
5674	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5675			CTLFLAG_RD, &stats->dc,
5676			"Defer Count");
5677	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5678			CTLFLAG_RD, &stats->mpc,
5679			"Missed Packets");
5680	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5681			CTLFLAG_RD, &stats->rnbc,
5682			"Receive No Buffers");
5683	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5684			CTLFLAG_RD, &stats->ruc,
5685			"Receive Undersize");
5686	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5687			CTLFLAG_RD, &stats->rfc,
5688			"Fragmented Packets Received ");
5689	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5690			CTLFLAG_RD, &stats->roc,
5691			"Oversized Packets Received");
5692	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5693			CTLFLAG_RD, &stats->rjc,
5694			"Recevied Jabber");
5695	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5696			CTLFLAG_RD, &stats->rxerrc,
5697			"Receive Errors");
5698	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5699			CTLFLAG_RD, &stats->crcerrs,
5700			"CRC errors");
5701	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5702			CTLFLAG_RD, &stats->algnerrc,
5703			"Alignment Errors");
5704	/* On 82575 these are collision counts */
5705	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5706			CTLFLAG_RD, &stats->cexterr,
5707			"Collision/Carrier extension errors");
5708	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5709			CTLFLAG_RD, &stats->xonrxc,
5710			"XON Received");
5711	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5712			CTLFLAG_RD, &stats->xontxc,
5713			"XON Transmitted");
5714	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5715			CTLFLAG_RD, &stats->xoffrxc,
5716			"XOFF Received");
5717	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5718			CTLFLAG_RD, &stats->xofftxc,
5719			"XOFF Transmitted");
5720	/* Packet Reception Stats */
5721	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5722			CTLFLAG_RD, &stats->tpr,
5723			"Total Packets Received ");
5724	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5725			CTLFLAG_RD, &stats->gprc,
5726			"Good Packets Received");
5727	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5728			CTLFLAG_RD, &stats->bprc,
5729			"Broadcast Packets Received");
5730	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5731			CTLFLAG_RD, &stats->mprc,
5732			"Multicast Packets Received");
5733	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5734			CTLFLAG_RD, &stats->prc64,
5735			"64 byte frames received ");
5736	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5737			CTLFLAG_RD, &stats->prc127,
5738			"65-127 byte frames received");
5739	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5740			CTLFLAG_RD, &stats->prc255,
5741			"128-255 byte frames received");
5742	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5743			CTLFLAG_RD, &stats->prc511,
5744			"256-511 byte frames received");
5745	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5746			CTLFLAG_RD, &stats->prc1023,
5747			"512-1023 byte frames received");
5748	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5749			CTLFLAG_RD, &stats->prc1522,
5750			"1023-1522 byte frames received");
5751 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5752 			CTLFLAG_RD, &stats->gorc,
5753 			"Good Octets Received");
5754
5755	/* Packet Transmission Stats */
5756 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5757 			CTLFLAG_RD, &stats->gotc,
5758 			"Good Octets Transmitted");
5759	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5760			CTLFLAG_RD, &stats->tpt,
5761			"Total Packets Transmitted");
5762	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5763			CTLFLAG_RD, &stats->gptc,
5764			"Good Packets Transmitted");
5765	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5766			CTLFLAG_RD, &stats->bptc,
5767			"Broadcast Packets Transmitted");
5768	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5769			CTLFLAG_RD, &stats->mptc,
5770			"Multicast Packets Transmitted");
5771	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5772			CTLFLAG_RD, &stats->ptc64,
5773			"64 byte frames transmitted ");
5774	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5775			CTLFLAG_RD, &stats->ptc127,
5776			"65-127 byte frames transmitted");
5777	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5778			CTLFLAG_RD, &stats->ptc255,
5779			"128-255 byte frames transmitted");
5780	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5781			CTLFLAG_RD, &stats->ptc511,
5782			"256-511 byte frames transmitted");
5783	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5784			CTLFLAG_RD, &stats->ptc1023,
5785			"512-1023 byte frames transmitted");
5786	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5787			CTLFLAG_RD, &stats->ptc1522,
5788			"1024-1522 byte frames transmitted");
5789	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5790			CTLFLAG_RD, &stats->tsctc,
5791			"TSO Contexts Transmitted");
5792	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5793			CTLFLAG_RD, &stats->tsctfc,
5794			"TSO Contexts Failed");
5795
5796
5797	/* Interrupt Stats */
5798
5799	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5800				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5801	int_list = SYSCTL_CHILDREN(int_node);
5802
5803	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5804			CTLFLAG_RD, &stats->iac,
5805			"Interrupt Assertion Count");
5806
5807	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5808			CTLFLAG_RD, &stats->icrxptc,
5809			"Interrupt Cause Rx Pkt Timer Expire Count");
5810
5811	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5812			CTLFLAG_RD, &stats->icrxatc,
5813			"Interrupt Cause Rx Abs Timer Expire Count");
5814
5815	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5816			CTLFLAG_RD, &stats->ictxptc,
5817			"Interrupt Cause Tx Pkt Timer Expire Count");
5818
5819	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5820			CTLFLAG_RD, &stats->ictxatc,
5821			"Interrupt Cause Tx Abs Timer Expire Count");
5822
5823	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5824			CTLFLAG_RD, &stats->ictxqec,
5825			"Interrupt Cause Tx Queue Empty Count");
5826
5827	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5828			CTLFLAG_RD, &stats->ictxqmtc,
5829			"Interrupt Cause Tx Queue Min Thresh Count");
5830
5831	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5832			CTLFLAG_RD, &stats->icrxdmtc,
5833			"Interrupt Cause Rx Desc Min Thresh Count");
5834
5835	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5836			CTLFLAG_RD, &stats->icrxoc,
5837			"Interrupt Cause Receiver Overrun Count");
5838
5839	/* Host to Card Stats */
5840
5841	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5842				    CTLFLAG_RD, NULL,
5843				    "Host to Card Statistics");
5844
5845	host_list = SYSCTL_CHILDREN(host_node);
5846
5847	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5848			CTLFLAG_RD, &stats->cbtmpc,
5849			"Circuit Breaker Tx Packet Count");
5850
5851	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5852			CTLFLAG_RD, &stats->htdpmc,
5853			"Host Transmit Discarded Packets");
5854
5855	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5856			CTLFLAG_RD, &stats->rpthc,
5857			"Rx Packets To Host");
5858
5859	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5860			CTLFLAG_RD, &stats->cbrmpc,
5861			"Circuit Breaker Rx Packet Count");
5862
5863	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5864			CTLFLAG_RD, &stats->cbrdpc,
5865			"Circuit Breaker Rx Dropped Count");
5866
5867	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5868			CTLFLAG_RD, &stats->hgptc,
5869			"Host Good Packets Tx Count");
5870
5871	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5872			CTLFLAG_RD, &stats->htcbdpc,
5873			"Host Tx Circuit Breaker Dropped Count");
5874
5875	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5876			CTLFLAG_RD, &stats->hgorc,
5877			"Host Good Octets Received Count");
5878
5879	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5880			CTLFLAG_RD, &stats->hgotc,
5881			"Host Good Octets Transmit Count");
5882
5883	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5884			CTLFLAG_RD, &stats->lenerrs,
5885			"Length Errors");
5886
5887	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5888			CTLFLAG_RD, &stats->scvpc,
5889			"SerDes/SGMII Code Violation Pkt Count");
5890
5891	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5892			CTLFLAG_RD, &stats->hrmpc,
5893			"Header Redirection Missed Packet Count");
5894}
5895
5896
5897/**********************************************************************
5898 *
5899 *  This routine provides a way to dump out the adapter eeprom,
5900 *  often a useful debug/service tool. This only dumps the first
5901 *  32 words, stuff that matters is in that extent.
5902 *
5903 **********************************************************************/
5904static int
5905igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5906{
5907	struct adapter *adapter;
5908	int error;
5909	int result;
5910
5911	result = -1;
5912	error = sysctl_handle_int(oidp, &result, 0, req);
5913
5914	if (error || !req->newptr)
5915		return (error);
5916
5917	/*
5918	 * This value will cause a hex dump of the
5919	 * first 32 16-bit words of the EEPROM to
5920	 * the screen.
5921	 */
5922	if (result == 1) {
5923		adapter = (struct adapter *)arg1;
5924		igb_print_nvm_info(adapter);
5925        }
5926
5927	return (error);
5928}
5929
5930static void
5931igb_print_nvm_info(struct adapter *adapter)
5932{
5933	u16	eeprom_data;
5934	int	i, j, row = 0;
5935
5936	/* Its a bit crude, but it gets the job done */
5937	printf("\nInterface EEPROM Dump:\n");
5938	printf("Offset\n0x0000  ");
5939	for (i = 0, j = 0; i < 32; i++, j++) {
5940		if (j == 8) { /* Make the offset block */
5941			j = 0; ++row;
5942			printf("\n0x00%x0  ",row);
5943		}
5944		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5945		printf("%04x ", eeprom_data);
5946	}
5947	printf("\n");
5948}
5949
5950static void
5951igb_set_sysctl_value(struct adapter *adapter, const char *name,
5952	const char *description, int *limit, int value)
5953{
5954	*limit = value;
5955	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5956	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5957	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5958}
5959
5960/*
5961** Set flow control using sysctl:
5962** Flow control values:
5963** 	0 - off
5964**	1 - rx pause
5965**	2 - tx pause
5966**	3 - full
5967*/
5968static int
5969igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5970{
5971	int		error;
5972	static int	input = 3; /* default is full */
5973	struct adapter	*adapter = (struct adapter *) arg1;
5974
5975	error = sysctl_handle_int(oidp, &input, 0, req);
5976
5977	if ((error) || (req->newptr == NULL))
5978		return (error);
5979
5980	switch (input) {
5981		case e1000_fc_rx_pause:
5982		case e1000_fc_tx_pause:
5983		case e1000_fc_full:
5984		case e1000_fc_none:
5985			adapter->hw.fc.requested_mode = input;
5986			adapter->fc = input;
5987			break;
5988		default:
5989			/* Do nothing */
5990			return (error);
5991	}
5992
5993	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5994	e1000_force_mac_fc(&adapter->hw);
5995	return (error);
5996}
5997
5998/*
5999** Manage DMA Coalesce:
6000** Control values:
6001** 	0/1 - off/on
6002**	Legal timer values are:
6003**	250,500,1000-10000 in thousands
6004*/
6005static int
6006igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6007{
6008	struct adapter *adapter = (struct adapter *) arg1;
6009	int		error;
6010
6011	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6012
6013	if ((error) || (req->newptr == NULL))
6014		return (error);
6015
6016	switch (adapter->dmac) {
6017		case 0:
6018			/*Disabling */
6019			break;
6020		case 1: /* Just enable and use default */
6021			adapter->dmac = 1000;
6022			break;
6023		case 250:
6024		case 500:
6025		case 1000:
6026		case 2000:
6027		case 3000:
6028		case 4000:
6029		case 5000:
6030		case 6000:
6031		case 7000:
6032		case 8000:
6033		case 9000:
6034		case 10000:
6035			/* Legal values - allow */
6036			break;
6037		default:
6038			/* Do nothing, illegal value */
6039			adapter->dmac = 0;
6040			return (error);
6041	}
6042	/* Reinit the interface */
6043	igb_init(adapter);
6044	return (error);
6045}
6046
6047/*
6048** Manage Energy Efficient Ethernet:
6049** Control values:
6050**     0/1 - enabled/disabled
6051*/
6052static int
6053igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6054{
6055	struct adapter	*adapter = (struct adapter *) arg1;
6056	int		error, value;
6057
6058	value = adapter->hw.dev_spec._82575.eee_disable;
6059	error = sysctl_handle_int(oidp, &value, 0, req);
6060	if (error || req->newptr == NULL)
6061		return (error);
6062	IGB_CORE_LOCK(adapter);
6063	adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6064	igb_init_locked(adapter);
6065	IGB_CORE_UNLOCK(adapter);
6066	return (0);
6067}
6068