if_igb.c revision 218583
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 218583 2011-02-11 19:49:07Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_altq.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <sys/pcpu.h>
61#include <sys/smp.h>
62#include <machine/smp.h>
63#include <machine/bus.h>
64#include <machine/resource.h>
65
66#include <net/bpf.h>
67#include <net/ethernet.h>
68#include <net/if.h>
69#include <net/if_arp.h>
70#include <net/if_dl.h>
71#include <net/if_media.h>
72
73#include <net/if_types.h>
74#include <net/if_vlan_var.h>
75
76#include <netinet/in_systm.h>
77#include <netinet/in.h>
78#include <netinet/if_ether.h>
79#include <netinet/ip.h>
80#include <netinet/ip6.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_lro.h>
83#include <netinet/udp.h>
84
85#include <machine/in_cksum.h>
86#include <dev/led/led.h>
87#include <dev/pci/pcivar.h>
88#include <dev/pci/pcireg.h>
89
90#include "e1000_api.h"
91#include "e1000_82575.h"
92#include "if_igb.h"
93
94/*********************************************************************
95 *  Set this to one to display debug statistics
96 *********************************************************************/
97int	igb_display_debug_stats = 0;
98
99/*********************************************************************
100 *  Driver version:
101 *********************************************************************/
102char igb_driver_version[] = "version - 2.1.4";
103
104
105/*********************************************************************
106 *  PCI Device ID Table
107 *
108 *  Used by probe to select devices to load on
109 *  Last field stores an index into e1000_strings
110 *  Last entry must be all 0s
111 *
112 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113 *********************************************************************/
114
115static igb_vendor_info_t igb_vendor_info_array[] =
116{
117	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
132						PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
152	/* required last entry */
153	{ 0, 0, 0, 0, 0}
154};
155
156/*********************************************************************
157 *  Table of branding strings for all supported NICs.
158 *********************************************************************/
159
160static char *igb_strings[] = {
161	"Intel(R) PRO/1000 Network Connection"
162};
163
164/*********************************************************************
165 *  Function prototypes
166 *********************************************************************/
167static int	igb_probe(device_t);
168static int	igb_attach(device_t);
169static int	igb_detach(device_t);
170static int	igb_shutdown(device_t);
171static int	igb_suspend(device_t);
172static int	igb_resume(device_t);
173static void	igb_start(struct ifnet *);
174static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
175#if __FreeBSD_version >= 800000
176static int	igb_mq_start(struct ifnet *, struct mbuf *);
177static int	igb_mq_start_locked(struct ifnet *,
178		    struct tx_ring *, struct mbuf *);
179static void	igb_qflush(struct ifnet *);
180#endif
181static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
182static void	igb_init(void *);
183static void	igb_init_locked(struct adapter *);
184static void	igb_stop(void *);
185static void	igb_media_status(struct ifnet *, struct ifmediareq *);
186static int	igb_media_change(struct ifnet *);
187static void	igb_identify_hardware(struct adapter *);
188static int	igb_allocate_pci_resources(struct adapter *);
189static int	igb_allocate_msix(struct adapter *);
190static int	igb_allocate_legacy(struct adapter *);
191static int	igb_setup_msix(struct adapter *);
192static void	igb_free_pci_resources(struct adapter *);
193static void	igb_local_timer(void *);
194static void	igb_reset(struct adapter *);
195static int	igb_setup_interface(device_t, struct adapter *);
196static int	igb_allocate_queues(struct adapter *);
197static void	igb_configure_queues(struct adapter *);
198
199static int	igb_allocate_transmit_buffers(struct tx_ring *);
200static void	igb_setup_transmit_structures(struct adapter *);
201static void	igb_setup_transmit_ring(struct tx_ring *);
202static void	igb_initialize_transmit_units(struct adapter *);
203static void	igb_free_transmit_structures(struct adapter *);
204static void	igb_free_transmit_buffers(struct tx_ring *);
205
206static int	igb_allocate_receive_buffers(struct rx_ring *);
207static int	igb_setup_receive_structures(struct adapter *);
208static int	igb_setup_receive_ring(struct rx_ring *);
209static void	igb_initialize_receive_units(struct adapter *);
210static void	igb_free_receive_structures(struct adapter *);
211static void	igb_free_receive_buffers(struct rx_ring *);
212static void	igb_free_receive_ring(struct rx_ring *);
213
214static void	igb_enable_intr(struct adapter *);
215static void	igb_disable_intr(struct adapter *);
216static void	igb_update_stats_counters(struct adapter *);
217static bool	igb_txeof(struct tx_ring *);
218
219static __inline	void igb_rx_discard(struct rx_ring *, int);
220static __inline void igb_rx_input(struct rx_ring *,
221		    struct ifnet *, struct mbuf *, u32);
222
223static bool	igb_rxeof(struct igb_queue *, int, int *);
224static void	igb_rx_checksum(u32, struct mbuf *, u32);
225static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
226static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
227static void	igb_set_promisc(struct adapter *);
228static void	igb_disable_promisc(struct adapter *);
229static void	igb_set_multi(struct adapter *);
230static void	igb_update_link_status(struct adapter *);
231static void	igb_refresh_mbufs(struct rx_ring *, int);
232
233static void	igb_register_vlan(void *, struct ifnet *, u16);
234static void	igb_unregister_vlan(void *, struct ifnet *, u16);
235static void	igb_setup_vlan_hw_support(struct adapter *);
236
237static int	igb_xmit(struct tx_ring *, struct mbuf **);
238static int	igb_dma_malloc(struct adapter *, bus_size_t,
239		    struct igb_dma_alloc *, int);
240static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
241static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
242static void	igb_print_nvm_info(struct adapter *);
243static int 	igb_is_valid_ether_addr(u8 *);
244static void     igb_add_hw_stats(struct adapter *);
245
246static void	igb_vf_init_stats(struct adapter *);
247static void	igb_update_vf_stats_counters(struct adapter *);
248
249/* Management and WOL Support */
250static void	igb_init_manageability(struct adapter *);
251static void	igb_release_manageability(struct adapter *);
252static void     igb_get_hw_control(struct adapter *);
253static void     igb_release_hw_control(struct adapter *);
254static void     igb_enable_wakeup(device_t);
255static void     igb_led_func(void *, int);
256
257static int	igb_irq_fast(void *);
258static void	igb_add_rx_process_limit(struct adapter *, const char *,
259		    const char *, int *, int);
260static void	igb_handle_que(void *context, int pending);
261static void	igb_handle_link(void *context, int pending);
262
263/* These are MSIX only irq handlers */
264static void	igb_msix_que(void *);
265static void	igb_msix_link(void *);
266
267#ifdef DEVICE_POLLING
268static poll_handler_t igb_poll;
269#endif /* POLLING */
270
271/*********************************************************************
272 *  FreeBSD Device Interface Entry Points
273 *********************************************************************/
274
275static device_method_t igb_methods[] = {
276	/* Device interface */
277	DEVMETHOD(device_probe, igb_probe),
278	DEVMETHOD(device_attach, igb_attach),
279	DEVMETHOD(device_detach, igb_detach),
280	DEVMETHOD(device_shutdown, igb_shutdown),
281	DEVMETHOD(device_suspend, igb_suspend),
282	DEVMETHOD(device_resume, igb_resume),
283	{0, 0}
284};
285
286static driver_t igb_driver = {
287	"igb", igb_methods, sizeof(struct adapter),
288};
289
290static devclass_t igb_devclass;
291DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
292MODULE_DEPEND(igb, pci, 1, 1, 1);
293MODULE_DEPEND(igb, ether, 1, 1, 1);
294
295/*********************************************************************
296 *  Tunable default values.
297 *********************************************************************/
298
299/* Descriptor defaults */
300static int igb_rxd = IGB_DEFAULT_RXD;
301static int igb_txd = IGB_DEFAULT_TXD;
302TUNABLE_INT("hw.igb.rxd", &igb_rxd);
303TUNABLE_INT("hw.igb.txd", &igb_txd);
304
305/*
306** AIM: Adaptive Interrupt Moderation
307** which means that the interrupt rate
308** is varied over time based on the
309** traffic for that interrupt vector
310*/
311static int igb_enable_aim = TRUE;
312TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
313
314/*
315 * MSIX should be the default for best performance,
316 * but this allows it to be forced off for testing.
317 */
318static int igb_enable_msix = 1;
319TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
320
321/*
322** Tuneable Interrupt rate
323*/
324static int igb_max_interrupt_rate = 8000;
325TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
326
327/*
328** Header split causes the packet header to
329** be dma'd to a seperate mbuf from the payload.
330** this can have memory alignment benefits. But
331** another plus is that small packets often fit
332** into the header and thus use no cluster. Its
333** a very workload dependent type feature.
334*/
335static bool igb_header_split = FALSE;
336TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
337
338/*
339** This will autoconfigure based on
340** the number of CPUs if left at 0.
341*/
342static int igb_num_queues = 0;
343TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
344
345/* How many packets rxeof tries to clean at a time */
346static int igb_rx_process_limit = 100;
347TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
348
349/* Flow control setting - default to FULL */
350static int igb_fc_setting = e1000_fc_full;
351TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
352
353/*********************************************************************
354 *  Device identification routine
355 *
356 *  igb_probe determines if the driver should be loaded on
357 *  adapter based on PCI vendor/device id of the adapter.
358 *
359 *  return BUS_PROBE_DEFAULT on success, positive on failure
360 *********************************************************************/
361
362static int
363igb_probe(device_t dev)
364{
365	char		adapter_name[60];
366	uint16_t	pci_vendor_id = 0;
367	uint16_t	pci_device_id = 0;
368	uint16_t	pci_subvendor_id = 0;
369	uint16_t	pci_subdevice_id = 0;
370	igb_vendor_info_t *ent;
371
372	INIT_DEBUGOUT("igb_probe: begin");
373
374	pci_vendor_id = pci_get_vendor(dev);
375	if (pci_vendor_id != IGB_VENDOR_ID)
376		return (ENXIO);
377
378	pci_device_id = pci_get_device(dev);
379	pci_subvendor_id = pci_get_subvendor(dev);
380	pci_subdevice_id = pci_get_subdevice(dev);
381
382	ent = igb_vendor_info_array;
383	while (ent->vendor_id != 0) {
384		if ((pci_vendor_id == ent->vendor_id) &&
385		    (pci_device_id == ent->device_id) &&
386
387		    ((pci_subvendor_id == ent->subvendor_id) ||
388		    (ent->subvendor_id == PCI_ANY_ID)) &&
389
390		    ((pci_subdevice_id == ent->subdevice_id) ||
391		    (ent->subdevice_id == PCI_ANY_ID))) {
392			sprintf(adapter_name, "%s %s",
393				igb_strings[ent->index],
394				igb_driver_version);
395			device_set_desc_copy(dev, adapter_name);
396			return (BUS_PROBE_DEFAULT);
397		}
398		ent++;
399	}
400
401	return (ENXIO);
402}
403
404/*********************************************************************
405 *  Device initialization routine
406 *
407 *  The attach entry point is called when the driver is being loaded.
408 *  This routine identifies the type of hardware, allocates all resources
409 *  and initializes the hardware.
410 *
411 *  return 0 on success, positive on failure
412 *********************************************************************/
413
414static int
415igb_attach(device_t dev)
416{
417	struct adapter	*adapter;
418	int		error = 0;
419	u16		eeprom_data;
420
421	INIT_DEBUGOUT("igb_attach: begin");
422
423	adapter = device_get_softc(dev);
424	adapter->dev = adapter->osdep.dev = dev;
425	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
426
427	/* SYSCTL stuff */
428	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
429	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
431	    igb_sysctl_nvm_info, "I", "NVM Information");
432
433	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
434	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
435	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
436	    &igb_fc_setting, 0, "Flow Control");
437
438	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
439	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
440	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
441	    &igb_enable_aim, 1, "Interrupt Moderation");
442
443	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
444
445	/* Determine hardware and mac info */
446	igb_identify_hardware(adapter);
447
448	/* Setup PCI resources */
449	if (igb_allocate_pci_resources(adapter)) {
450		device_printf(dev, "Allocation of PCI resources failed\n");
451		error = ENXIO;
452		goto err_pci;
453	}
454
455	/* Do Shared Code initialization */
456	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
457		device_printf(dev, "Setup of Shared code failed\n");
458		error = ENXIO;
459		goto err_pci;
460	}
461
462	e1000_get_bus_info(&adapter->hw);
463
464	/* Sysctls for limiting the amount of work done in the taskqueue */
465	igb_add_rx_process_limit(adapter, "rx_processing_limit",
466	    "max number of rx packets to process", &adapter->rx_process_limit,
467	    igb_rx_process_limit);
468
469	/*
470	 * Validate number of transmit and receive descriptors. It
471	 * must not exceed hardware maximum, and must be multiple
472	 * of E1000_DBA_ALIGN.
473	 */
474	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
475	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
476		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
477		    IGB_DEFAULT_TXD, igb_txd);
478		adapter->num_tx_desc = IGB_DEFAULT_TXD;
479	} else
480		adapter->num_tx_desc = igb_txd;
481	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
482	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
483		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
484		    IGB_DEFAULT_RXD, igb_rxd);
485		adapter->num_rx_desc = IGB_DEFAULT_RXD;
486	} else
487		adapter->num_rx_desc = igb_rxd;
488
489	adapter->hw.mac.autoneg = DO_AUTO_NEG;
490	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
491	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
492
493	/* Copper options */
494	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
495		adapter->hw.phy.mdix = AUTO_ALL_MODES;
496		adapter->hw.phy.disable_polarity_correction = FALSE;
497		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
498	}
499
500	/*
501	 * Set the frame limits assuming
502	 * standard ethernet sized frames.
503	 */
504	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
505	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
506
507	/*
508	** Allocate and Setup Queues
509	*/
510	if (igb_allocate_queues(adapter)) {
511		error = ENOMEM;
512		goto err_pci;
513	}
514
515	/* Allocate the appropriate stats memory */
516	if (adapter->vf_ifp) {
517		adapter->stats =
518		    (struct e1000_vf_stats *)malloc(sizeof \
519		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
520		igb_vf_init_stats(adapter);
521	} else
522		adapter->stats =
523		    (struct e1000_hw_stats *)malloc(sizeof \
524		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
525	if (adapter->stats == NULL) {
526		device_printf(dev, "Can not allocate stats memory\n");
527		error = ENOMEM;
528		goto err_late;
529	}
530
531	/* Allocate multicast array memory. */
532	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
533	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
534	if (adapter->mta == NULL) {
535		device_printf(dev, "Can not allocate multicast setup array\n");
536		error = ENOMEM;
537		goto err_late;
538	}
539
540	/*
541	** Start from a known state, this is
542	** important in reading the nvm and
543	** mac from that.
544	*/
545	e1000_reset_hw(&adapter->hw);
546
547	/* Make sure we have a good EEPROM before we read from it */
548	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
549		/*
550		** Some PCI-E parts fail the first check due to
551		** the link being in sleep state, call it again,
552		** if it fails a second time its a real issue.
553		*/
554		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
555			device_printf(dev,
556			    "The EEPROM Checksum Is Not Valid\n");
557			error = EIO;
558			goto err_late;
559		}
560	}
561
562	/*
563	** Copy the permanent MAC address out of the EEPROM
564	*/
565	if (e1000_read_mac_addr(&adapter->hw) < 0) {
566		device_printf(dev, "EEPROM read error while reading MAC"
567		    " address\n");
568		error = EIO;
569		goto err_late;
570	}
571	/* Check its sanity */
572	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
573		device_printf(dev, "Invalid MAC address\n");
574		error = EIO;
575		goto err_late;
576	}
577
578	/*
579	** Configure Interrupts
580	*/
581	if ((adapter->msix > 1) && (igb_enable_msix))
582		error = igb_allocate_msix(adapter);
583	else /* MSI or Legacy */
584		error = igb_allocate_legacy(adapter);
585	if (error)
586		goto err_late;
587
588	/* Setup OS specific network interface */
589	if (igb_setup_interface(dev, adapter) != 0)
590		goto err_late;
591
592	/* Now get a good starting state */
593	igb_reset(adapter);
594
595	/* Initialize statistics */
596	igb_update_stats_counters(adapter);
597
598	adapter->hw.mac.get_link_status = 1;
599	igb_update_link_status(adapter);
600
601	/* Indicate SOL/IDER usage */
602	if (e1000_check_reset_block(&adapter->hw))
603		device_printf(dev,
604		    "PHY reset is blocked due to SOL/IDER session.\n");
605
606	/* Determine if we have to control management hardware */
607	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
608
609	/*
610	 * Setup Wake-on-Lan
611	 */
612	/* APME bit in EEPROM is mapped to WUC.APME */
613	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
614	if (eeprom_data)
615		adapter->wol = E1000_WUFC_MAG;
616
617	/* Register for VLAN events */
618	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
619	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
620	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
621	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
622
623	igb_add_hw_stats(adapter);
624
625	/* Tell the stack that the interface is not active */
626	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
627
628	adapter->led_dev = led_create(igb_led_func, adapter,
629	    device_get_nameunit(dev));
630
631	INIT_DEBUGOUT("igb_attach: end");
632
633	return (0);
634
635err_late:
636	igb_free_transmit_structures(adapter);
637	igb_free_receive_structures(adapter);
638	igb_release_hw_control(adapter);
639	if (adapter->ifp != NULL)
640		if_free(adapter->ifp);
641err_pci:
642	igb_free_pci_resources(adapter);
643	free(adapter->mta, M_DEVBUF);
644	IGB_CORE_LOCK_DESTROY(adapter);
645
646	return (error);
647}
648
649/*********************************************************************
650 *  Device removal routine
651 *
652 *  The detach entry point is called when the driver is being removed.
653 *  This routine stops the adapter and deallocates all the resources
654 *  that were allocated for driver operation.
655 *
656 *  return 0 on success, positive on failure
657 *********************************************************************/
658
659static int
660igb_detach(device_t dev)
661{
662	struct adapter	*adapter = device_get_softc(dev);
663	struct ifnet	*ifp = adapter->ifp;
664
665	INIT_DEBUGOUT("igb_detach: begin");
666
667	/* Make sure VLANS are not using driver */
668	if (adapter->ifp->if_vlantrunk != NULL) {
669		device_printf(dev,"Vlan in use, detach first\n");
670		return (EBUSY);
671	}
672
673	if (adapter->led_dev != NULL)
674		led_destroy(adapter->led_dev);
675
676#ifdef DEVICE_POLLING
677	if (ifp->if_capenable & IFCAP_POLLING)
678		ether_poll_deregister(ifp);
679#endif
680
681	IGB_CORE_LOCK(adapter);
682	adapter->in_detach = 1;
683	igb_stop(adapter);
684	IGB_CORE_UNLOCK(adapter);
685
686	e1000_phy_hw_reset(&adapter->hw);
687
688	/* Give control back to firmware */
689	igb_release_manageability(adapter);
690	igb_release_hw_control(adapter);
691
692	if (adapter->wol) {
693		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
694		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
695		igb_enable_wakeup(dev);
696	}
697
698	/* Unregister VLAN events */
699	if (adapter->vlan_attach != NULL)
700		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
701	if (adapter->vlan_detach != NULL)
702		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
703
704	ether_ifdetach(adapter->ifp);
705
706	callout_drain(&adapter->timer);
707
708	igb_free_pci_resources(adapter);
709	bus_generic_detach(dev);
710	if_free(ifp);
711
712	igb_free_transmit_structures(adapter);
713	igb_free_receive_structures(adapter);
714	free(adapter->mta, M_DEVBUF);
715
716	IGB_CORE_LOCK_DESTROY(adapter);
717
718	return (0);
719}
720
721/*********************************************************************
722 *
723 *  Shutdown entry point
724 *
725 **********************************************************************/
726
727static int
728igb_shutdown(device_t dev)
729{
730	return igb_suspend(dev);
731}
732
733/*
734 * Suspend/resume device methods.
735 */
736static int
737igb_suspend(device_t dev)
738{
739	struct adapter *adapter = device_get_softc(dev);
740
741	IGB_CORE_LOCK(adapter);
742
743	igb_stop(adapter);
744
745        igb_release_manageability(adapter);
746	igb_release_hw_control(adapter);
747
748        if (adapter->wol) {
749                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
750                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
751                igb_enable_wakeup(dev);
752        }
753
754	IGB_CORE_UNLOCK(adapter);
755
756	return bus_generic_suspend(dev);
757}
758
759static int
760igb_resume(device_t dev)
761{
762	struct adapter *adapter = device_get_softc(dev);
763	struct ifnet *ifp = adapter->ifp;
764
765	IGB_CORE_LOCK(adapter);
766	igb_init_locked(adapter);
767	igb_init_manageability(adapter);
768
769	if ((ifp->if_flags & IFF_UP) &&
770	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
771		igb_start(ifp);
772
773	IGB_CORE_UNLOCK(adapter);
774
775	return bus_generic_resume(dev);
776}
777
778
779/*********************************************************************
780 *  Transmit entry point
781 *
782 *  igb_start is called by the stack to initiate a transmit.
783 *  The driver will remain in this routine as long as there are
784 *  packets to transmit and transmit resources are available.
785 *  In case resources are not available stack is notified and
786 *  the packet is requeued.
787 **********************************************************************/
788
789static void
790igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
791{
792	struct adapter	*adapter = ifp->if_softc;
793	struct mbuf	*m_head;
794
795	IGB_TX_LOCK_ASSERT(txr);
796
797	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
798	    IFF_DRV_RUNNING)
799		return;
800	if (!adapter->link_active)
801		return;
802
803	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
804		/* Cleanup if TX descriptors are low */
805		if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
806			igb_txeof(txr);
807		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
808			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
809			break;
810		}
811		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
812		if (m_head == NULL)
813			break;
814		/*
815		 *  Encapsulation can modify our pointer, and or make it
816		 *  NULL on failure.  In that event, we can't requeue.
817		 */
818		if (igb_xmit(txr, &m_head)) {
819			if (m_head == NULL)
820				break;
821			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
822			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
823			break;
824		}
825
826		/* Send a copy of the frame to the BPF listener */
827		ETHER_BPF_MTAP(ifp, m_head);
828
829		/* Set watchdog on */
830		txr->watchdog_time = ticks;
831		txr->queue_status = IGB_QUEUE_WORKING;
832	}
833}
834
835/*
836 * Legacy TX driver routine, called from the
837 * stack, always uses tx[0], and spins for it.
838 * Should not be used with multiqueue tx
839 */
840static void
841igb_start(struct ifnet *ifp)
842{
843	struct adapter	*adapter = ifp->if_softc;
844	struct tx_ring	*txr = adapter->tx_rings;
845
846	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
847		IGB_TX_LOCK(txr);
848		igb_start_locked(txr, ifp);
849		IGB_TX_UNLOCK(txr);
850	}
851	return;
852}
853
854#if __FreeBSD_version >= 800000
855/*
856** Multiqueue Transmit driver
857**
858*/
859static int
860igb_mq_start(struct ifnet *ifp, struct mbuf *m)
861{
862	struct adapter		*adapter = ifp->if_softc;
863	struct igb_queue	*que;
864	struct tx_ring		*txr;
865	int 			i = 0, err = 0;
866
867	/* Which queue to use */
868	if ((m->m_flags & M_FLOWID) != 0)
869		i = m->m_pkthdr.flowid % adapter->num_queues;
870
871	txr = &adapter->tx_rings[i];
872	que = &adapter->queues[i];
873
874	if (IGB_TX_TRYLOCK(txr)) {
875		err = igb_mq_start_locked(ifp, txr, m);
876		IGB_TX_UNLOCK(txr);
877	} else {
878		err = drbr_enqueue(ifp, txr->br, m);
879		taskqueue_enqueue(que->tq, &que->que_task);
880	}
881
882	return (err);
883}
884
885static int
886igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
887{
888	struct adapter  *adapter = txr->adapter;
889        struct mbuf     *next;
890        int             err = 0, enq;
891
892	IGB_TX_LOCK_ASSERT(txr);
893
894	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
895	    IFF_DRV_RUNNING || adapter->link_active == 0) {
896		if (m != NULL)
897			err = drbr_enqueue(ifp, txr->br, m);
898		return (err);
899	}
900
901	enq = 0;
902	if (m == NULL) {
903		next = drbr_dequeue(ifp, txr->br);
904	} else if (drbr_needs_enqueue(ifp, txr->br)) {
905		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
906			return (err);
907		next = drbr_dequeue(ifp, txr->br);
908	} else
909		next = m;
910
911	/* Process the queue */
912	while (next != NULL) {
913		/* Call cleanup if number of TX descriptors low */
914		if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
915			igb_txeof(txr);
916		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
917			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
918			break;
919		}
920		if ((err = igb_xmit(txr, &next)) != 0) {
921			if (next != NULL)
922				err = drbr_enqueue(ifp, txr->br, next);
923			break;
924		}
925		enq++;
926		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
927		ETHER_BPF_MTAP(ifp, next);
928		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
929			break;
930		next = drbr_dequeue(ifp, txr->br);
931	}
932	if (enq > 0) {
933		/* Set the watchdog */
934		txr->queue_status = IGB_QUEUE_WORKING;
935		txr->watchdog_time = ticks;
936	}
937	return (err);
938}
939
940/*
941** Flush all ring buffers
942*/
943static void
944igb_qflush(struct ifnet *ifp)
945{
946	struct adapter	*adapter = ifp->if_softc;
947	struct tx_ring	*txr = adapter->tx_rings;
948	struct mbuf	*m;
949
950	for (int i = 0; i < adapter->num_queues; i++, txr++) {
951		IGB_TX_LOCK(txr);
952		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
953			m_freem(m);
954		IGB_TX_UNLOCK(txr);
955	}
956	if_qflush(ifp);
957}
958#endif /* __FreeBSD_version >= 800000 */
959
960/*********************************************************************
961 *  Ioctl entry point
962 *
963 *  igb_ioctl is called when the user wants to configure the
964 *  interface.
965 *
966 *  return 0 on success, positive on failure
967 **********************************************************************/
968
969static int
970igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
971{
972	struct adapter	*adapter = ifp->if_softc;
973	struct ifreq *ifr = (struct ifreq *)data;
974#ifdef INET
975	struct ifaddr *ifa = (struct ifaddr *)data;
976#endif
977	int error = 0;
978
979	if (adapter->in_detach)
980		return (error);
981
982	switch (command) {
983	case SIOCSIFADDR:
984#ifdef INET
985		if (ifa->ifa_addr->sa_family == AF_INET) {
986			/*
987			 * XXX
988			 * Since resetting hardware takes a very long time
989			 * and results in link renegotiation we only
990			 * initialize the hardware only when it is absolutely
991			 * required.
992			 */
993			ifp->if_flags |= IFF_UP;
994			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
995				IGB_CORE_LOCK(adapter);
996				igb_init_locked(adapter);
997				IGB_CORE_UNLOCK(adapter);
998			}
999			if (!(ifp->if_flags & IFF_NOARP))
1000				arp_ifinit(ifp, ifa);
1001		} else
1002#endif
1003			error = ether_ioctl(ifp, command, data);
1004		break;
1005	case SIOCSIFMTU:
1006	    {
1007		int max_frame_size;
1008
1009		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1010
1011		IGB_CORE_LOCK(adapter);
1012		max_frame_size = 9234;
1013		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1014		    ETHER_CRC_LEN) {
1015			IGB_CORE_UNLOCK(adapter);
1016			error = EINVAL;
1017			break;
1018		}
1019
1020		ifp->if_mtu = ifr->ifr_mtu;
1021		adapter->max_frame_size =
1022		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1023		igb_init_locked(adapter);
1024		IGB_CORE_UNLOCK(adapter);
1025		break;
1026	    }
1027	case SIOCSIFFLAGS:
1028		IOCTL_DEBUGOUT("ioctl rcv'd:\
1029		    SIOCSIFFLAGS (Set Interface Flags)");
1030		IGB_CORE_LOCK(adapter);
1031		if (ifp->if_flags & IFF_UP) {
1032			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1033				if ((ifp->if_flags ^ adapter->if_flags) &
1034				    (IFF_PROMISC | IFF_ALLMULTI)) {
1035					igb_disable_promisc(adapter);
1036					igb_set_promisc(adapter);
1037				}
1038			} else
1039				igb_init_locked(adapter);
1040		} else
1041			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1042				igb_stop(adapter);
1043		adapter->if_flags = ifp->if_flags;
1044		IGB_CORE_UNLOCK(adapter);
1045		break;
1046	case SIOCADDMULTI:
1047	case SIOCDELMULTI:
1048		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1049		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1050			IGB_CORE_LOCK(adapter);
1051			igb_disable_intr(adapter);
1052			igb_set_multi(adapter);
1053#ifdef DEVICE_POLLING
1054			if (!(ifp->if_capenable & IFCAP_POLLING))
1055#endif
1056				igb_enable_intr(adapter);
1057			IGB_CORE_UNLOCK(adapter);
1058		}
1059		break;
1060	case SIOCSIFMEDIA:
1061		/*
1062		** As the speed/duplex settings are being
1063		** changed, we need toreset the PHY.
1064		*/
1065		adapter->hw.phy.reset_disable = FALSE;
1066		/* Check SOL/IDER usage */
1067		IGB_CORE_LOCK(adapter);
1068		if (e1000_check_reset_block(&adapter->hw)) {
1069			IGB_CORE_UNLOCK(adapter);
1070			device_printf(adapter->dev, "Media change is"
1071			    " blocked due to SOL/IDER session.\n");
1072			break;
1073		}
1074		IGB_CORE_UNLOCK(adapter);
1075	case SIOCGIFMEDIA:
1076		IOCTL_DEBUGOUT("ioctl rcv'd: \
1077		    SIOCxIFMEDIA (Get/Set Interface Media)");
1078		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1079		break;
1080	case SIOCSIFCAP:
1081	    {
1082		int mask, reinit;
1083
1084		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1085		reinit = 0;
1086		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1087#ifdef DEVICE_POLLING
1088		if (mask & IFCAP_POLLING) {
1089			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1090				error = ether_poll_register(igb_poll, ifp);
1091				if (error)
1092					return (error);
1093				IGB_CORE_LOCK(adapter);
1094				igb_disable_intr(adapter);
1095				ifp->if_capenable |= IFCAP_POLLING;
1096				IGB_CORE_UNLOCK(adapter);
1097			} else {
1098				error = ether_poll_deregister(ifp);
1099				/* Enable interrupt even in error case */
1100				IGB_CORE_LOCK(adapter);
1101				igb_enable_intr(adapter);
1102				ifp->if_capenable &= ~IFCAP_POLLING;
1103				IGB_CORE_UNLOCK(adapter);
1104			}
1105		}
1106#endif
1107		if (mask & IFCAP_HWCSUM) {
1108			ifp->if_capenable ^= IFCAP_HWCSUM;
1109			reinit = 1;
1110		}
1111		if (mask & IFCAP_TSO4) {
1112			ifp->if_capenable ^= IFCAP_TSO4;
1113			reinit = 1;
1114		}
1115		if (mask & IFCAP_VLAN_HWTAGGING) {
1116			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1117			reinit = 1;
1118		}
1119		if (mask & IFCAP_VLAN_HWFILTER) {
1120			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1121			reinit = 1;
1122		}
1123		if (mask & IFCAP_LRO) {
1124			ifp->if_capenable ^= IFCAP_LRO;
1125			reinit = 1;
1126		}
1127		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1128			igb_init(adapter);
1129		VLAN_CAPABILITIES(ifp);
1130		break;
1131	    }
1132
1133	default:
1134		error = ether_ioctl(ifp, command, data);
1135		break;
1136	}
1137
1138	return (error);
1139}
1140
1141
1142/*********************************************************************
1143 *  Init entry point
1144 *
1145 *  This routine is used in two ways. It is used by the stack as
1146 *  init entry point in network interface structure. It is also used
1147 *  by the driver as a hw/sw initialization routine to get to a
1148 *  consistent state.
1149 *
1150 *  return 0 on success, positive on failure
1151 **********************************************************************/
1152
1153static void
1154igb_init_locked(struct adapter *adapter)
1155{
1156	struct ifnet	*ifp = adapter->ifp;
1157	device_t	dev = adapter->dev;
1158
1159	INIT_DEBUGOUT("igb_init: begin");
1160
1161	IGB_CORE_LOCK_ASSERT(adapter);
1162
1163	igb_disable_intr(adapter);
1164	callout_stop(&adapter->timer);
1165
1166	/* Get the latest mac address, User can use a LAA */
1167        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1168              ETHER_ADDR_LEN);
1169
1170	/* Put the address into the Receive Address Array */
1171	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1172
1173	igb_reset(adapter);
1174	igb_update_link_status(adapter);
1175
1176	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1177
1178	/* Set hardware offload abilities */
1179	ifp->if_hwassist = 0;
1180	if (ifp->if_capenable & IFCAP_TXCSUM) {
1181		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1182#if __FreeBSD_version >= 800000
1183		if (adapter->hw.mac.type == e1000_82576)
1184			ifp->if_hwassist |= CSUM_SCTP;
1185#endif
1186	}
1187
1188	if (ifp->if_capenable & IFCAP_TSO4)
1189		ifp->if_hwassist |= CSUM_TSO;
1190
1191	/* Configure for OS presence */
1192	igb_init_manageability(adapter);
1193
1194	/* Prepare transmit descriptors and buffers */
1195	igb_setup_transmit_structures(adapter);
1196	igb_initialize_transmit_units(adapter);
1197
1198	/* Setup Multicast table */
1199	igb_set_multi(adapter);
1200
1201	/*
1202	** Figure out the desired mbuf pool
1203	** for doing jumbo/packetsplit
1204	*/
1205	if (adapter->max_frame_size <= 2048)
1206		adapter->rx_mbuf_sz = MCLBYTES;
1207	else if (adapter->max_frame_size <= 4096)
1208		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1209	else
1210		adapter->rx_mbuf_sz = MJUM9BYTES;
1211
1212	/* Prepare receive descriptors and buffers */
1213	if (igb_setup_receive_structures(adapter)) {
1214		device_printf(dev, "Could not setup receive structures\n");
1215		return;
1216	}
1217	igb_initialize_receive_units(adapter);
1218
1219        /* Enable VLAN support */
1220	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1221		igb_setup_vlan_hw_support(adapter);
1222
1223	/* Don't lose promiscuous settings */
1224	igb_set_promisc(adapter);
1225
1226	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1227	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1228
1229	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1230	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1231
1232	if (adapter->msix > 1) /* Set up queue routing */
1233		igb_configure_queues(adapter);
1234
1235	/* this clears any pending interrupts */
1236	E1000_READ_REG(&adapter->hw, E1000_ICR);
1237#ifdef DEVICE_POLLING
1238	/*
1239	 * Only enable interrupts if we are not polling, make sure
1240	 * they are off otherwise.
1241	 */
1242	if (ifp->if_capenable & IFCAP_POLLING)
1243		igb_disable_intr(adapter);
1244	else
1245#endif /* DEVICE_POLLING */
1246	{
1247	igb_enable_intr(adapter);
1248	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1249	}
1250
1251	/* Don't reset the phy next time init gets called */
1252	adapter->hw.phy.reset_disable = TRUE;
1253}
1254
1255static void
1256igb_init(void *arg)
1257{
1258	struct adapter *adapter = arg;
1259
1260	IGB_CORE_LOCK(adapter);
1261	igb_init_locked(adapter);
1262	IGB_CORE_UNLOCK(adapter);
1263}
1264
1265
1266static void
1267igb_handle_que(void *context, int pending)
1268{
1269	struct igb_queue *que = context;
1270	struct adapter *adapter = que->adapter;
1271	struct tx_ring *txr = que->txr;
1272	struct ifnet	*ifp = adapter->ifp;
1273
1274	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1275		bool	more;
1276
1277		more = igb_rxeof(que, -1, NULL);
1278
1279		IGB_TX_LOCK(txr);
1280		if (igb_txeof(txr))
1281			more = TRUE;
1282#if __FreeBSD_version >= 800000
1283		if (!drbr_empty(ifp, txr->br))
1284			igb_mq_start_locked(ifp, txr, NULL);
1285#else
1286		igb_start_locked(txr, ifp);
1287#endif
1288		IGB_TX_UNLOCK(txr);
1289		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1290			taskqueue_enqueue(que->tq, &que->que_task);
1291			return;
1292		}
1293	}
1294
1295#ifdef DEVICE_POLLING
1296	if (ifp->if_capenable & IFCAP_POLLING)
1297		return;
1298#endif
1299	/* Reenable this interrupt */
1300	if (que->eims)
1301		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1302	else
1303		igb_enable_intr(adapter);
1304}
1305
1306/* Deal with link in a sleepable context */
1307static void
1308igb_handle_link(void *context, int pending)
1309{
1310	struct adapter *adapter = context;
1311
1312	adapter->hw.mac.get_link_status = 1;
1313	igb_update_link_status(adapter);
1314}
1315
1316/*********************************************************************
1317 *
1318 *  MSI/Legacy Deferred
1319 *  Interrupt Service routine
1320 *
1321 *********************************************************************/
1322static int
1323igb_irq_fast(void *arg)
1324{
1325	struct adapter		*adapter = arg;
1326	struct igb_queue	*que = adapter->queues;
1327	u32			reg_icr;
1328
1329
1330	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1331
1332	/* Hot eject?  */
1333	if (reg_icr == 0xffffffff)
1334		return FILTER_STRAY;
1335
1336	/* Definitely not our interrupt.  */
1337	if (reg_icr == 0x0)
1338		return FILTER_STRAY;
1339
1340	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1341		return FILTER_STRAY;
1342
1343	/*
1344	 * Mask interrupts until the taskqueue is finished running.  This is
1345	 * cheap, just assume that it is needed.  This also works around the
1346	 * MSI message reordering errata on certain systems.
1347	 */
1348	igb_disable_intr(adapter);
1349	taskqueue_enqueue(que->tq, &que->que_task);
1350
1351	/* Link status change */
1352	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1353		taskqueue_enqueue(que->tq, &adapter->link_task);
1354
1355	if (reg_icr & E1000_ICR_RXO)
1356		adapter->rx_overruns++;
1357	return FILTER_HANDLED;
1358}
1359
1360#ifdef DEVICE_POLLING
1361/*********************************************************************
1362 *
1363 *  Legacy polling routine : if using this code you MUST be sure that
1364 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1365 *
1366 *********************************************************************/
1367#if __FreeBSD_version >= 800000
1368#define POLL_RETURN_COUNT(a) (a)
1369static int
1370#else
1371#define POLL_RETURN_COUNT(a)
1372static void
1373#endif
1374igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1375{
1376	struct adapter		*adapter = ifp->if_softc;
1377	struct igb_queue	*que = adapter->queues;
1378	struct tx_ring		*txr = adapter->tx_rings;
1379	u32			reg_icr, rx_done = 0;
1380	u32			loop = IGB_MAX_LOOP;
1381	bool			more;
1382
1383	IGB_CORE_LOCK(adapter);
1384	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1385		IGB_CORE_UNLOCK(adapter);
1386		return POLL_RETURN_COUNT(rx_done);
1387	}
1388
1389	if (cmd == POLL_AND_CHECK_STATUS) {
1390		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1391		/* Link status change */
1392		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1393			igb_handle_link(adapter, 0);
1394
1395		if (reg_icr & E1000_ICR_RXO)
1396			adapter->rx_overruns++;
1397	}
1398	IGB_CORE_UNLOCK(adapter);
1399
1400	igb_rxeof(que, count, &rx_done);
1401
1402	IGB_TX_LOCK(txr);
1403	do {
1404		more = igb_txeof(txr);
1405	} while (loop-- && more);
1406#if __FreeBSD_version >= 800000
1407	if (!drbr_empty(ifp, txr->br))
1408		igb_mq_start_locked(ifp, txr, NULL);
1409#else
1410	igb_start_locked(txr, ifp);
1411#endif
1412	IGB_TX_UNLOCK(txr);
1413	return POLL_RETURN_COUNT(rx_done);
1414}
1415#endif /* DEVICE_POLLING */
1416
1417/*********************************************************************
1418 *
1419 *  MSIX TX Interrupt Service routine
1420 *
1421 **********************************************************************/
1422static void
1423igb_msix_que(void *arg)
1424{
1425	struct igb_queue *que = arg;
1426	struct adapter *adapter = que->adapter;
1427	struct tx_ring *txr = que->txr;
1428	struct rx_ring *rxr = que->rxr;
1429	u32		newitr = 0;
1430	bool		more_tx, more_rx;
1431
1432	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1433	++que->irqs;
1434
1435	IGB_TX_LOCK(txr);
1436	more_tx = igb_txeof(txr);
1437	IGB_TX_UNLOCK(txr);
1438
1439	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1440
1441	if (igb_enable_aim == FALSE)
1442		goto no_calc;
1443	/*
1444	** Do Adaptive Interrupt Moderation:
1445        **  - Write out last calculated setting
1446	**  - Calculate based on average size over
1447	**    the last interval.
1448	*/
1449        if (que->eitr_setting)
1450                E1000_WRITE_REG(&adapter->hw,
1451                    E1000_EITR(que->msix), que->eitr_setting);
1452
1453        que->eitr_setting = 0;
1454
1455        /* Idle, do nothing */
1456        if ((txr->bytes == 0) && (rxr->bytes == 0))
1457                goto no_calc;
1458
1459        /* Used half Default if sub-gig */
1460        if (adapter->link_speed != 1000)
1461                newitr = IGB_DEFAULT_ITR / 2;
1462        else {
1463		if ((txr->bytes) && (txr->packets))
1464                	newitr = txr->bytes/txr->packets;
1465		if ((rxr->bytes) && (rxr->packets))
1466			newitr = max(newitr,
1467			    (rxr->bytes / rxr->packets));
1468                newitr += 24; /* account for hardware frame, crc */
1469		/* set an upper boundary */
1470		newitr = min(newitr, 3000);
1471		/* Be nice to the mid range */
1472                if ((newitr > 300) && (newitr < 1200))
1473                        newitr = (newitr / 3);
1474                else
1475                        newitr = (newitr / 2);
1476        }
1477        newitr &= 0x7FFC;  /* Mask invalid bits */
1478        if (adapter->hw.mac.type == e1000_82575)
1479                newitr |= newitr << 16;
1480        else
1481                newitr |= E1000_EITR_CNT_IGNR;
1482
1483        /* save for next interrupt */
1484        que->eitr_setting = newitr;
1485
1486        /* Reset state */
1487        txr->bytes = 0;
1488        txr->packets = 0;
1489        rxr->bytes = 0;
1490        rxr->packets = 0;
1491
1492no_calc:
1493	/* Schedule a clean task if needed*/
1494	if (more_tx || more_rx ||
1495	    (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE))
1496		taskqueue_enqueue(que->tq, &que->que_task);
1497	else
1498		/* Reenable this interrupt */
1499		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1500	return;
1501}
1502
1503
1504/*********************************************************************
1505 *
1506 *  MSIX Link Interrupt Service routine
1507 *
1508 **********************************************************************/
1509
1510static void
1511igb_msix_link(void *arg)
1512{
1513	struct adapter	*adapter = arg;
1514	u32       	icr;
1515
1516	++adapter->link_irq;
1517	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1518	if (!(icr & E1000_ICR_LSC))
1519		goto spurious;
1520	igb_handle_link(adapter, 0);
1521
1522spurious:
1523	/* Rearm */
1524	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1525	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1526	return;
1527}
1528
1529
1530/*********************************************************************
1531 *
1532 *  Media Ioctl callback
1533 *
1534 *  This routine is called whenever the user queries the status of
1535 *  the interface using ifconfig.
1536 *
1537 **********************************************************************/
1538static void
1539igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1540{
1541	struct adapter *adapter = ifp->if_softc;
1542	u_char fiber_type = IFM_1000_SX;
1543
1544	INIT_DEBUGOUT("igb_media_status: begin");
1545
1546	IGB_CORE_LOCK(adapter);
1547	igb_update_link_status(adapter);
1548
1549	ifmr->ifm_status = IFM_AVALID;
1550	ifmr->ifm_active = IFM_ETHER;
1551
1552	if (!adapter->link_active) {
1553		IGB_CORE_UNLOCK(adapter);
1554		return;
1555	}
1556
1557	ifmr->ifm_status |= IFM_ACTIVE;
1558
1559	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1560	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1561		ifmr->ifm_active |= fiber_type | IFM_FDX;
1562	else {
1563		switch (adapter->link_speed) {
1564		case 10:
1565			ifmr->ifm_active |= IFM_10_T;
1566			break;
1567		case 100:
1568			ifmr->ifm_active |= IFM_100_TX;
1569			break;
1570		case 1000:
1571			ifmr->ifm_active |= IFM_1000_T;
1572			break;
1573		}
1574		if (adapter->link_duplex == FULL_DUPLEX)
1575			ifmr->ifm_active |= IFM_FDX;
1576		else
1577			ifmr->ifm_active |= IFM_HDX;
1578	}
1579	IGB_CORE_UNLOCK(adapter);
1580}
1581
1582/*********************************************************************
1583 *
1584 *  Media Ioctl callback
1585 *
1586 *  This routine is called when the user changes speed/duplex using
1587 *  media/mediopt option with ifconfig.
1588 *
1589 **********************************************************************/
1590static int
1591igb_media_change(struct ifnet *ifp)
1592{
1593	struct adapter *adapter = ifp->if_softc;
1594	struct ifmedia  *ifm = &adapter->media;
1595
1596	INIT_DEBUGOUT("igb_media_change: begin");
1597
1598	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1599		return (EINVAL);
1600
1601	IGB_CORE_LOCK(adapter);
1602	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1603	case IFM_AUTO:
1604		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1605		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1606		break;
1607	case IFM_1000_LX:
1608	case IFM_1000_SX:
1609	case IFM_1000_T:
1610		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1611		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1612		break;
1613	case IFM_100_TX:
1614		adapter->hw.mac.autoneg = FALSE;
1615		adapter->hw.phy.autoneg_advertised = 0;
1616		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1617			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1618		else
1619			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1620		break;
1621	case IFM_10_T:
1622		adapter->hw.mac.autoneg = FALSE;
1623		adapter->hw.phy.autoneg_advertised = 0;
1624		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1625			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1626		else
1627			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1628		break;
1629	default:
1630		device_printf(adapter->dev, "Unsupported media type\n");
1631	}
1632
1633	igb_init_locked(adapter);
1634	IGB_CORE_UNLOCK(adapter);
1635
1636	return (0);
1637}
1638
1639
1640/*********************************************************************
1641 *
1642 *  This routine maps the mbufs to Advanced TX descriptors.
1643 *  used by the 82575 adapter.
1644 *
1645 **********************************************************************/
1646
1647static int
1648igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1649{
1650	struct adapter		*adapter = txr->adapter;
1651	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1652	bus_dmamap_t		map;
1653	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1654	union e1000_adv_tx_desc	*txd = NULL;
1655	struct mbuf		*m_head;
1656	u32			olinfo_status = 0, cmd_type_len = 0;
1657	int			nsegs, i, j, error, first, last = 0;
1658	u32			hdrlen = 0;
1659
1660	m_head = *m_headp;
1661
1662
1663	/* Set basic descriptor constants */
1664	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1665	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1666	if (m_head->m_flags & M_VLANTAG)
1667		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1668
1669	/*
1670         * Map the packet for DMA.
1671	 *
1672	 * Capture the first descriptor index,
1673	 * this descriptor will have the index
1674	 * of the EOP which is the only one that
1675	 * now gets a DONE bit writeback.
1676	 */
1677	first = txr->next_avail_desc;
1678	tx_buffer = &txr->tx_buffers[first];
1679	tx_buffer_mapped = tx_buffer;
1680	map = tx_buffer->map;
1681
1682	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1683	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1684
1685	if (error == EFBIG) {
1686		struct mbuf *m;
1687
1688		m = m_defrag(*m_headp, M_DONTWAIT);
1689		if (m == NULL) {
1690			adapter->mbuf_defrag_failed++;
1691			m_freem(*m_headp);
1692			*m_headp = NULL;
1693			return (ENOBUFS);
1694		}
1695		*m_headp = m;
1696
1697		/* Try it again */
1698		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1699		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1700
1701		if (error == ENOMEM) {
1702			adapter->no_tx_dma_setup++;
1703			return (error);
1704		} else if (error != 0) {
1705			adapter->no_tx_dma_setup++;
1706			m_freem(*m_headp);
1707			*m_headp = NULL;
1708			return (error);
1709		}
1710	} else if (error == ENOMEM) {
1711		adapter->no_tx_dma_setup++;
1712		return (error);
1713	} else if (error != 0) {
1714		adapter->no_tx_dma_setup++;
1715		m_freem(*m_headp);
1716		*m_headp = NULL;
1717		return (error);
1718	}
1719
1720	/* Check again to be sure we have enough descriptors */
1721        if (nsegs > (txr->tx_avail - 2)) {
1722                txr->no_desc_avail++;
1723		bus_dmamap_unload(txr->txtag, map);
1724		return (ENOBUFS);
1725        }
1726	m_head = *m_headp;
1727
1728        /*
1729         * Set up the context descriptor:
1730         * used when any hardware offload is done.
1731	 * This includes CSUM, VLAN, and TSO. It
1732	 * will use the first descriptor.
1733         */
1734        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1735		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1736			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1737			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1738			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1739		} else
1740			return (ENXIO);
1741	} else if (igb_tx_ctx_setup(txr, m_head))
1742		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1743
1744	/* Calculate payload length */
1745	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1746	    << E1000_ADVTXD_PAYLEN_SHIFT);
1747
1748	/* 82575 needs the queue index added */
1749	if (adapter->hw.mac.type == e1000_82575)
1750		olinfo_status |= txr->me << 4;
1751
1752	/* Set up our transmit descriptors */
1753	i = txr->next_avail_desc;
1754	for (j = 0; j < nsegs; j++) {
1755		bus_size_t seg_len;
1756		bus_addr_t seg_addr;
1757
1758		tx_buffer = &txr->tx_buffers[i];
1759		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1760		seg_addr = segs[j].ds_addr;
1761		seg_len  = segs[j].ds_len;
1762
1763		txd->read.buffer_addr = htole64(seg_addr);
1764		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1765		txd->read.olinfo_status = htole32(olinfo_status);
1766		last = i;
1767		if (++i == adapter->num_tx_desc)
1768			i = 0;
1769		tx_buffer->m_head = NULL;
1770		tx_buffer->next_eop = -1;
1771	}
1772
1773	txr->next_avail_desc = i;
1774	txr->tx_avail -= nsegs;
1775
1776        tx_buffer->m_head = m_head;
1777	tx_buffer_mapped->map = tx_buffer->map;
1778	tx_buffer->map = map;
1779        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1780
1781        /*
1782         * Last Descriptor of Packet
1783	 * needs End Of Packet (EOP)
1784	 * and Report Status (RS)
1785         */
1786        txd->read.cmd_type_len |=
1787	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1788	/*
1789	 * Keep track in the first buffer which
1790	 * descriptor will be written back
1791	 */
1792	tx_buffer = &txr->tx_buffers[first];
1793	tx_buffer->next_eop = last;
1794	txr->watchdog_time = ticks;
1795
1796	/*
1797	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1798	 * that this frame is available to transmit.
1799	 */
1800	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1801	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1802	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1803	++txr->tx_packets;
1804
1805	return (0);
1806
1807}
1808
1809static void
1810igb_set_promisc(struct adapter *adapter)
1811{
1812	struct ifnet	*ifp = adapter->ifp;
1813	struct e1000_hw *hw = &adapter->hw;
1814	u32		reg;
1815
1816	if (adapter->vf_ifp) {
1817		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1818		return;
1819	}
1820
1821	reg = E1000_READ_REG(hw, E1000_RCTL);
1822	if (ifp->if_flags & IFF_PROMISC) {
1823		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1824		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1825	} else if (ifp->if_flags & IFF_ALLMULTI) {
1826		reg |= E1000_RCTL_MPE;
1827		reg &= ~E1000_RCTL_UPE;
1828		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1829	}
1830}
1831
1832static void
1833igb_disable_promisc(struct adapter *adapter)
1834{
1835	struct e1000_hw *hw = &adapter->hw;
1836	u32		reg;
1837
1838	if (adapter->vf_ifp) {
1839		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1840		return;
1841	}
1842	reg = E1000_READ_REG(hw, E1000_RCTL);
1843	reg &=  (~E1000_RCTL_UPE);
1844	reg &=  (~E1000_RCTL_MPE);
1845	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1846}
1847
1848
1849/*********************************************************************
1850 *  Multicast Update
1851 *
1852 *  This routine is called whenever multicast address list is updated.
1853 *
1854 **********************************************************************/
1855
1856static void
1857igb_set_multi(struct adapter *adapter)
1858{
1859	struct ifnet	*ifp = adapter->ifp;
1860	struct ifmultiaddr *ifma;
1861	u32 reg_rctl = 0;
1862	u8  *mta;
1863
1864	int mcnt = 0;
1865
1866	IOCTL_DEBUGOUT("igb_set_multi: begin");
1867
1868	mta = adapter->mta;
1869	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
1870	    MAX_NUM_MULTICAST_ADDRESSES);
1871
1872#if __FreeBSD_version < 800000
1873	IF_ADDR_LOCK(ifp);
1874#else
1875	if_maddr_rlock(ifp);
1876#endif
1877	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1878		if (ifma->ifma_addr->sa_family != AF_LINK)
1879			continue;
1880
1881		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1882			break;
1883
1884		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1885		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1886		mcnt++;
1887	}
1888#if __FreeBSD_version < 800000
1889	IF_ADDR_UNLOCK(ifp);
1890#else
1891	if_maddr_runlock(ifp);
1892#endif
1893
1894	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1895		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1896		reg_rctl |= E1000_RCTL_MPE;
1897		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1898	} else
1899		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1900}
1901
1902
1903/*********************************************************************
1904 *  Timer routine:
1905 *  	This routine checks for link status,
1906 *	updates statistics, and does the watchdog.
1907 *
1908 **********************************************************************/
1909
1910static void
1911igb_local_timer(void *arg)
1912{
1913	struct adapter		*adapter = arg;
1914	device_t		dev = adapter->dev;
1915	struct tx_ring		*txr = adapter->tx_rings;
1916
1917
1918	IGB_CORE_LOCK_ASSERT(adapter);
1919
1920	igb_update_link_status(adapter);
1921	igb_update_stats_counters(adapter);
1922
1923	/*
1924	** If flow control has paused us since last checking
1925	** it invalidates the watchdog timing, so dont run it.
1926	*/
1927	if (adapter->pause_frames) {
1928		adapter->pause_frames = 0;
1929		goto out;
1930	}
1931
1932        /*
1933        ** Watchdog: check for time since any descriptor was cleaned
1934        */
1935	for (int i = 0; i < adapter->num_queues; i++, txr++)
1936		if (txr->queue_status == IGB_QUEUE_HUNG)
1937			goto timeout;
1938out:
1939	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1940#ifndef DEVICE_POLLING
1941	/* Fire off all queue interrupts - deadlock protection */
1942	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
1943#endif
1944	return;
1945
1946timeout:
1947	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1948	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1949            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1950            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1951	device_printf(dev,"TX(%d) desc avail = %d,"
1952            "Next TX to Clean = %d\n",
1953            txr->me, txr->tx_avail, txr->next_to_clean);
1954	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1955	adapter->watchdog_events++;
1956	igb_init_locked(adapter);
1957}
1958
1959static void
1960igb_update_link_status(struct adapter *adapter)
1961{
1962	struct e1000_hw *hw = &adapter->hw;
1963	struct ifnet *ifp = adapter->ifp;
1964	device_t dev = adapter->dev;
1965	struct tx_ring *txr = adapter->tx_rings;
1966	u32 link_check = 0;
1967
1968	/* Get the cached link value or read for real */
1969        switch (hw->phy.media_type) {
1970        case e1000_media_type_copper:
1971                if (hw->mac.get_link_status) {
1972			/* Do the work to read phy */
1973                        e1000_check_for_link(hw);
1974                        link_check = !hw->mac.get_link_status;
1975                } else
1976                        link_check = TRUE;
1977                break;
1978        case e1000_media_type_fiber:
1979                e1000_check_for_link(hw);
1980                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1981                                 E1000_STATUS_LU);
1982                break;
1983        case e1000_media_type_internal_serdes:
1984                e1000_check_for_link(hw);
1985                link_check = adapter->hw.mac.serdes_has_link;
1986                break;
1987	/* VF device is type_unknown */
1988        case e1000_media_type_unknown:
1989                e1000_check_for_link(hw);
1990		link_check = !hw->mac.get_link_status;
1991		/* Fall thru */
1992        default:
1993                break;
1994        }
1995
1996	/* Now we check if a transition has happened */
1997	if (link_check && (adapter->link_active == 0)) {
1998		e1000_get_speed_and_duplex(&adapter->hw,
1999		    &adapter->link_speed, &adapter->link_duplex);
2000		if (bootverbose)
2001			device_printf(dev, "Link is up %d Mbps %s\n",
2002			    adapter->link_speed,
2003			    ((adapter->link_duplex == FULL_DUPLEX) ?
2004			    "Full Duplex" : "Half Duplex"));
2005		adapter->link_active = 1;
2006		ifp->if_baudrate = adapter->link_speed * 1000000;
2007		/* This can sleep */
2008		if_link_state_change(ifp, LINK_STATE_UP);
2009	} else if (!link_check && (adapter->link_active == 1)) {
2010		ifp->if_baudrate = adapter->link_speed = 0;
2011		adapter->link_duplex = 0;
2012		if (bootverbose)
2013			device_printf(dev, "Link is Down\n");
2014		adapter->link_active = 0;
2015		/* This can sleep */
2016		if_link_state_change(ifp, LINK_STATE_DOWN);
2017		/* Turn off watchdogs */
2018		for (int i = 0; i < adapter->num_queues; i++, txr++)
2019			txr->queue_status = IGB_QUEUE_IDLE;
2020	}
2021}
2022
2023/*********************************************************************
2024 *
2025 *  This routine disables all traffic on the adapter by issuing a
2026 *  global reset on the MAC and deallocates TX/RX buffers.
2027 *
2028 **********************************************************************/
2029
2030static void
2031igb_stop(void *arg)
2032{
2033	struct adapter	*adapter = arg;
2034	struct ifnet	*ifp = adapter->ifp;
2035	struct tx_ring *txr = adapter->tx_rings;
2036
2037	IGB_CORE_LOCK_ASSERT(adapter);
2038
2039	INIT_DEBUGOUT("igb_stop: begin");
2040
2041	igb_disable_intr(adapter);
2042
2043	callout_stop(&adapter->timer);
2044
2045	/* Tell the stack that the interface is no longer active */
2046	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2047
2048	/* Unarm watchdog timer. */
2049	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2050		IGB_TX_LOCK(txr);
2051		txr->queue_status = IGB_QUEUE_IDLE;
2052		IGB_TX_UNLOCK(txr);
2053	}
2054
2055	e1000_reset_hw(&adapter->hw);
2056	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2057
2058	e1000_led_off(&adapter->hw);
2059	e1000_cleanup_led(&adapter->hw);
2060}
2061
2062
2063/*********************************************************************
2064 *
2065 *  Determine hardware revision.
2066 *
2067 **********************************************************************/
2068static void
2069igb_identify_hardware(struct adapter *adapter)
2070{
2071	device_t dev = adapter->dev;
2072
2073	/* Make sure our PCI config space has the necessary stuff set */
2074	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2075	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2076	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2077		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2078		    "bits were not set!\n");
2079		adapter->hw.bus.pci_cmd_word |=
2080		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2081		pci_write_config(dev, PCIR_COMMAND,
2082		    adapter->hw.bus.pci_cmd_word, 2);
2083	}
2084
2085	/* Save off the information about this board */
2086	adapter->hw.vendor_id = pci_get_vendor(dev);
2087	adapter->hw.device_id = pci_get_device(dev);
2088	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2089	adapter->hw.subsystem_vendor_id =
2090	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2091	adapter->hw.subsystem_device_id =
2092	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2093
2094	/* Set MAC type early for PCI setup */
2095	e1000_set_mac_type(&adapter->hw);
2096
2097	/* Are we a VF device? */
2098	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2099	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2100		adapter->vf_ifp = 1;
2101	else
2102		adapter->vf_ifp = 0;
2103}
2104
2105static int
2106igb_allocate_pci_resources(struct adapter *adapter)
2107{
2108	device_t	dev = adapter->dev;
2109	int		rid;
2110
2111	rid = PCIR_BAR(0);
2112	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2113	    &rid, RF_ACTIVE);
2114	if (adapter->pci_mem == NULL) {
2115		device_printf(dev, "Unable to allocate bus resource: memory\n");
2116		return (ENXIO);
2117	}
2118	adapter->osdep.mem_bus_space_tag =
2119	    rman_get_bustag(adapter->pci_mem);
2120	adapter->osdep.mem_bus_space_handle =
2121	    rman_get_bushandle(adapter->pci_mem);
2122	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2123
2124	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2125
2126	/* This will setup either MSI/X or MSI */
2127	adapter->msix = igb_setup_msix(adapter);
2128	adapter->hw.back = &adapter->osdep;
2129
2130	return (0);
2131}
2132
2133/*********************************************************************
2134 *
2135 *  Setup the Legacy or MSI Interrupt handler
2136 *
2137 **********************************************************************/
2138static int
2139igb_allocate_legacy(struct adapter *adapter)
2140{
2141	device_t		dev = adapter->dev;
2142	struct igb_queue	*que = adapter->queues;
2143	int			error, rid = 0;
2144
2145	/* Turn off all interrupts */
2146	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2147
2148	/* MSI RID is 1 */
2149	if (adapter->msix == 1)
2150		rid = 1;
2151
2152	/* We allocate a single interrupt resource */
2153	adapter->res = bus_alloc_resource_any(dev,
2154	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2155	if (adapter->res == NULL) {
2156		device_printf(dev, "Unable to allocate bus resource: "
2157		    "interrupt\n");
2158		return (ENXIO);
2159	}
2160
2161	/*
2162	 * Try allocating a fast interrupt and the associated deferred
2163	 * processing contexts.
2164	 */
2165	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2166	/* Make tasklet for deferred link handling */
2167	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2168	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2169	    taskqueue_thread_enqueue, &que->tq);
2170	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2171	    device_get_nameunit(adapter->dev));
2172	if ((error = bus_setup_intr(dev, adapter->res,
2173	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2174	    adapter, &adapter->tag)) != 0) {
2175		device_printf(dev, "Failed to register fast interrupt "
2176			    "handler: %d\n", error);
2177		taskqueue_free(que->tq);
2178		que->tq = NULL;
2179		return (error);
2180	}
2181
2182	return (0);
2183}
2184
2185
2186/*********************************************************************
2187 *
2188 *  Setup the MSIX Queue Interrupt handlers:
2189 *
2190 **********************************************************************/
2191static int
2192igb_allocate_msix(struct adapter *adapter)
2193{
2194	device_t		dev = adapter->dev;
2195	struct igb_queue	*que = adapter->queues;
2196	int			error, rid, vector = 0;
2197
2198
2199	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2200		rid = vector +1;
2201		que->res = bus_alloc_resource_any(dev,
2202		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2203		if (que->res == NULL) {
2204			device_printf(dev,
2205			    "Unable to allocate bus resource: "
2206			    "MSIX Queue Interrupt\n");
2207			return (ENXIO);
2208		}
2209		error = bus_setup_intr(dev, que->res,
2210	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2211		    igb_msix_que, que, &que->tag);
2212		if (error) {
2213			que->res = NULL;
2214			device_printf(dev, "Failed to register Queue handler");
2215			return (error);
2216		}
2217#if __FreeBSD_version >= 800504
2218		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2219#endif
2220		que->msix = vector;
2221		if (adapter->hw.mac.type == e1000_82575)
2222			que->eims = E1000_EICR_TX_QUEUE0 << i;
2223		else
2224			que->eims = 1 << vector;
2225		/*
2226		** Bind the msix vector, and thus the
2227		** rings to the corresponding cpu.
2228		*/
2229		if (adapter->num_queues > 1)
2230			bus_bind_intr(dev, que->res, i);
2231		/* Make tasklet for deferred handling */
2232		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2233		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2234		    taskqueue_thread_enqueue, &que->tq);
2235		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2236		    device_get_nameunit(adapter->dev));
2237	}
2238
2239	/* And Link */
2240	rid = vector + 1;
2241	adapter->res = bus_alloc_resource_any(dev,
2242	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2243	if (adapter->res == NULL) {
2244		device_printf(dev,
2245		    "Unable to allocate bus resource: "
2246		    "MSIX Link Interrupt\n");
2247		return (ENXIO);
2248	}
2249	if ((error = bus_setup_intr(dev, adapter->res,
2250	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2251	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2252		device_printf(dev, "Failed to register Link handler");
2253		return (error);
2254	}
2255#if __FreeBSD_version >= 800504
2256	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2257#endif
2258	adapter->linkvec = vector;
2259
2260	return (0);
2261}
2262
2263
2264static void
2265igb_configure_queues(struct adapter *adapter)
2266{
2267	struct	e1000_hw	*hw = &adapter->hw;
2268	struct	igb_queue	*que;
2269	u32			tmp, ivar = 0, newitr = 0;
2270
2271	/* First turn on RSS capability */
2272	if (adapter->hw.mac.type != e1000_82575)
2273		E1000_WRITE_REG(hw, E1000_GPIE,
2274		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2275		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2276
2277	/* Turn on MSIX */
2278	switch (adapter->hw.mac.type) {
2279	case e1000_82580:
2280	case e1000_i350:
2281	case e1000_vfadapt:
2282	case e1000_vfadapt_i350:
2283		/* RX entries */
2284		for (int i = 0; i < adapter->num_queues; i++) {
2285			u32 index = i >> 1;
2286			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2287			que = &adapter->queues[i];
2288			if (i & 1) {
2289				ivar &= 0xFF00FFFF;
2290				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2291			} else {
2292				ivar &= 0xFFFFFF00;
2293				ivar |= que->msix | E1000_IVAR_VALID;
2294			}
2295			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2296		}
2297		/* TX entries */
2298		for (int i = 0; i < adapter->num_queues; i++) {
2299			u32 index = i >> 1;
2300			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2301			que = &adapter->queues[i];
2302			if (i & 1) {
2303				ivar &= 0x00FFFFFF;
2304				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2305			} else {
2306				ivar &= 0xFFFF00FF;
2307				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2308			}
2309			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2310			adapter->que_mask |= que->eims;
2311		}
2312
2313		/* And for the link interrupt */
2314		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2315		adapter->link_mask = 1 << adapter->linkvec;
2316		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2317		break;
2318	case e1000_82576:
2319		/* RX entries */
2320		for (int i = 0; i < adapter->num_queues; i++) {
2321			u32 index = i & 0x7; /* Each IVAR has two entries */
2322			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2323			que = &adapter->queues[i];
2324			if (i < 8) {
2325				ivar &= 0xFFFFFF00;
2326				ivar |= que->msix | E1000_IVAR_VALID;
2327			} else {
2328				ivar &= 0xFF00FFFF;
2329				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2330			}
2331			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2332			adapter->que_mask |= que->eims;
2333		}
2334		/* TX entries */
2335		for (int i = 0; i < adapter->num_queues; i++) {
2336			u32 index = i & 0x7; /* Each IVAR has two entries */
2337			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2338			que = &adapter->queues[i];
2339			if (i < 8) {
2340				ivar &= 0xFFFF00FF;
2341				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2342			} else {
2343				ivar &= 0x00FFFFFF;
2344				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2345			}
2346			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2347			adapter->que_mask |= que->eims;
2348		}
2349
2350		/* And for the link interrupt */
2351		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2352		adapter->link_mask = 1 << adapter->linkvec;
2353		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2354		break;
2355
2356	case e1000_82575:
2357                /* enable MSI-X support*/
2358		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2359                tmp |= E1000_CTRL_EXT_PBA_CLR;
2360                /* Auto-Mask interrupts upon ICR read. */
2361                tmp |= E1000_CTRL_EXT_EIAME;
2362                tmp |= E1000_CTRL_EXT_IRCA;
2363                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2364
2365		/* Queues */
2366		for (int i = 0; i < adapter->num_queues; i++) {
2367			que = &adapter->queues[i];
2368			tmp = E1000_EICR_RX_QUEUE0 << i;
2369			tmp |= E1000_EICR_TX_QUEUE0 << i;
2370			que->eims = tmp;
2371			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2372			    i, que->eims);
2373			adapter->que_mask |= que->eims;
2374		}
2375
2376		/* Link */
2377		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2378		    E1000_EIMS_OTHER);
2379		adapter->link_mask |= E1000_EIMS_OTHER;
2380	default:
2381		break;
2382	}
2383
2384	/* Set the starting interrupt rate */
2385	if (igb_max_interrupt_rate > 0)
2386		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2387
2388        if (hw->mac.type == e1000_82575)
2389                newitr |= newitr << 16;
2390        else
2391                newitr |= E1000_EITR_CNT_IGNR;
2392
2393	for (int i = 0; i < adapter->num_queues; i++) {
2394		que = &adapter->queues[i];
2395		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2396	}
2397
2398	return;
2399}
2400
2401
2402static void
2403igb_free_pci_resources(struct adapter *adapter)
2404{
2405	struct		igb_queue *que = adapter->queues;
2406	device_t	dev = adapter->dev;
2407	int		rid;
2408
2409	/*
2410	** There is a slight possibility of a failure mode
2411	** in attach that will result in entering this function
2412	** before interrupt resources have been initialized, and
2413	** in that case we do not want to execute the loops below
2414	** We can detect this reliably by the state of the adapter
2415	** res pointer.
2416	*/
2417	if (adapter->res == NULL)
2418		goto mem;
2419
2420	/*
2421	 * First release all the interrupt resources:
2422	 */
2423	for (int i = 0; i < adapter->num_queues; i++, que++) {
2424		rid = que->msix + 1;
2425		if (que->tag != NULL) {
2426			bus_teardown_intr(dev, que->res, que->tag);
2427			que->tag = NULL;
2428		}
2429		if (que->res != NULL)
2430			bus_release_resource(dev,
2431			    SYS_RES_IRQ, rid, que->res);
2432	}
2433
2434	/* Clean the Legacy or Link interrupt last */
2435	if (adapter->linkvec) /* we are doing MSIX */
2436		rid = adapter->linkvec + 1;
2437	else
2438		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2439
2440	if (adapter->tag != NULL) {
2441		bus_teardown_intr(dev, adapter->res, adapter->tag);
2442		adapter->tag = NULL;
2443	}
2444	if (adapter->res != NULL)
2445		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2446
2447mem:
2448	if (adapter->msix)
2449		pci_release_msi(dev);
2450
2451	if (adapter->msix_mem != NULL)
2452		bus_release_resource(dev, SYS_RES_MEMORY,
2453		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2454
2455	if (adapter->pci_mem != NULL)
2456		bus_release_resource(dev, SYS_RES_MEMORY,
2457		    PCIR_BAR(0), adapter->pci_mem);
2458
2459}
2460
2461/*
2462 * Setup Either MSI/X or MSI
2463 */
2464static int
2465igb_setup_msix(struct adapter *adapter)
2466{
2467	device_t dev = adapter->dev;
2468	int rid, want, queues, msgs;
2469
2470	/* tuneable override */
2471	if (igb_enable_msix == 0)
2472		goto msi;
2473
2474	/* First try MSI/X */
2475	rid = PCIR_BAR(IGB_MSIX_BAR);
2476	adapter->msix_mem = bus_alloc_resource_any(dev,
2477	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2478       	if (!adapter->msix_mem) {
2479		/* May not be enabled */
2480		device_printf(adapter->dev,
2481		    "Unable to map MSIX table \n");
2482		goto msi;
2483	}
2484
2485	msgs = pci_msix_count(dev);
2486	if (msgs == 0) { /* system has msix disabled */
2487		bus_release_resource(dev, SYS_RES_MEMORY,
2488		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2489		adapter->msix_mem = NULL;
2490		goto msi;
2491	}
2492
2493	/* Figure out a reasonable auto config value */
2494	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2495
2496	/* Manual override */
2497	if (igb_num_queues != 0)
2498		queues = igb_num_queues;
2499	if (queues > 8)  /* max queues */
2500		queues = 8;
2501
2502	/* Can have max of 4 queues on 82575 */
2503	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2504		queues = 4;
2505
2506	/* Limit the VF devices to one queue */
2507	if (adapter->vf_ifp)
2508		queues = 1;
2509
2510	/*
2511	** One vector (RX/TX pair) per queue
2512	** plus an additional for Link interrupt
2513	*/
2514	want = queues + 1;
2515	if (msgs >= want)
2516		msgs = want;
2517	else {
2518               	device_printf(adapter->dev,
2519		    "MSIX Configuration Problem, "
2520		    "%d vectors configured, but %d queues wanted!\n",
2521		    msgs, want);
2522		return (ENXIO);
2523	}
2524	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2525               	device_printf(adapter->dev,
2526		    "Using MSIX interrupts with %d vectors\n", msgs);
2527		adapter->num_queues = queues;
2528		return (msgs);
2529	}
2530msi:
2531       	msgs = pci_msi_count(dev);
2532       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2533               	device_printf(adapter->dev,"Using MSI interrupt\n");
2534	return (msgs);
2535}
2536
2537/*********************************************************************
2538 *
2539 *  Set up an fresh starting state
2540 *
2541 **********************************************************************/
2542static void
2543igb_reset(struct adapter *adapter)
2544{
2545	device_t	dev = adapter->dev;
2546	struct e1000_hw *hw = &adapter->hw;
2547	struct e1000_fc_info *fc = &hw->fc;
2548	struct ifnet	*ifp = adapter->ifp;
2549	u32		pba = 0;
2550	u16		hwm;
2551
2552	INIT_DEBUGOUT("igb_reset: begin");
2553
2554	/* Let the firmware know the OS is in control */
2555	igb_get_hw_control(adapter);
2556
2557	/*
2558	 * Packet Buffer Allocation (PBA)
2559	 * Writing PBA sets the receive portion of the buffer
2560	 * the remainder is used for the transmit buffer.
2561	 */
2562	switch (hw->mac.type) {
2563	case e1000_82575:
2564		pba = E1000_PBA_32K;
2565		break;
2566	case e1000_82576:
2567	case e1000_vfadapt:
2568		pba = E1000_READ_REG(hw, E1000_RXPBS);
2569		pba &= E1000_RXPBS_SIZE_MASK_82576;
2570		break;
2571	case e1000_82580:
2572	case e1000_i350:
2573	case e1000_vfadapt_i350:
2574		pba = E1000_READ_REG(hw, E1000_RXPBS);
2575		pba = e1000_rxpbs_adjust_82580(pba);
2576		break;
2577		pba = E1000_PBA_35K;
2578	default:
2579		break;
2580	}
2581
2582	/* Special needs in case of Jumbo frames */
2583	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2584		u32 tx_space, min_tx, min_rx;
2585		pba = E1000_READ_REG(hw, E1000_PBA);
2586		tx_space = pba >> 16;
2587		pba &= 0xffff;
2588		min_tx = (adapter->max_frame_size +
2589		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2590		min_tx = roundup2(min_tx, 1024);
2591		min_tx >>= 10;
2592                min_rx = adapter->max_frame_size;
2593                min_rx = roundup2(min_rx, 1024);
2594                min_rx >>= 10;
2595		if (tx_space < min_tx &&
2596		    ((min_tx - tx_space) < pba)) {
2597			pba = pba - (min_tx - tx_space);
2598			/*
2599                         * if short on rx space, rx wins
2600                         * and must trump tx adjustment
2601			 */
2602                        if (pba < min_rx)
2603                                pba = min_rx;
2604		}
2605		E1000_WRITE_REG(hw, E1000_PBA, pba);
2606	}
2607
2608	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2609
2610	/*
2611	 * These parameters control the automatic generation (Tx) and
2612	 * response (Rx) to Ethernet PAUSE frames.
2613	 * - High water mark should allow for at least two frames to be
2614	 *   received after sending an XOFF.
2615	 * - Low water mark works best when it is very near the high water mark.
2616	 *   This allows the receiver to restart by sending XON when it has
2617	 *   drained a bit.
2618	 */
2619	hwm = min(((pba << 10) * 9 / 10),
2620	    ((pba << 10) - 2 * adapter->max_frame_size));
2621
2622	if (hw->mac.type < e1000_82576) {
2623		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2624		fc->low_water = fc->high_water - 8;
2625	} else {
2626		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2627		fc->low_water = fc->high_water - 16;
2628	}
2629
2630	fc->pause_time = IGB_FC_PAUSE_TIME;
2631	fc->send_xon = TRUE;
2632
2633	/* Set Flow control, use the tunable location if sane */
2634	if ((igb_fc_setting >= 0) && (igb_fc_setting < 4))
2635		fc->requested_mode = igb_fc_setting;
2636	else
2637		fc->requested_mode = e1000_fc_none;
2638
2639	fc->current_mode = fc->requested_mode;
2640
2641	/* Issue a global reset */
2642	e1000_reset_hw(hw);
2643	E1000_WRITE_REG(hw, E1000_WUC, 0);
2644
2645	if (e1000_init_hw(hw) < 0)
2646		device_printf(dev, "Hardware Initialization Failed\n");
2647
2648	/* Setup DMA Coalescing */
2649	if (hw->mac.type == e1000_i350) {
2650		u32 reg;
2651
2652		hwm = (pba << 10) - (2 * adapter->max_frame_size);
2653		/*
2654		 * 0x80000000 - enable DMA COAL
2655		 * 0x10000000 - use L0s as low power
2656		 * 0x20000000 - use L1 as low power
2657		 * X << 16 - exit dma coal when rx data exceeds X kB
2658		 * Y - upper limit to stay in dma coal in units of 32usecs
2659		 */
2660		E1000_WRITE_REG(hw, E1000_DMACR,
2661		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
2662
2663		/* set hwm to PBA -  2 * max frame size */
2664		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2665		/*
2666		 * This sets the time to wait before requesting transition to
2667		 * low power state to number of usecs needed to receive 1 512
2668		 * byte frame at gigabit line rate
2669		 */
2670		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2671
2672		/* free space in tx packet buffer to wake from DMA coal */
2673		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2674		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2675
2676		/* make low power state decision controlled by DMA coal */
2677		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2678		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2679		    reg | E1000_PCIEMISC_LX_DECISION);
2680	}
2681
2682	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2683	e1000_get_phy_info(hw);
2684	e1000_check_for_link(hw);
2685	return;
2686}
2687
2688/*********************************************************************
2689 *
2690 *  Setup networking device structure and register an interface.
2691 *
2692 **********************************************************************/
2693static int
2694igb_setup_interface(device_t dev, struct adapter *adapter)
2695{
2696	struct ifnet   *ifp;
2697
2698	INIT_DEBUGOUT("igb_setup_interface: begin");
2699
2700	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2701	if (ifp == NULL) {
2702		device_printf(dev, "can not allocate ifnet structure\n");
2703		return (-1);
2704	}
2705	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2706	ifp->if_mtu = ETHERMTU;
2707	ifp->if_init =  igb_init;
2708	ifp->if_softc = adapter;
2709	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2710	ifp->if_ioctl = igb_ioctl;
2711	ifp->if_start = igb_start;
2712#if __FreeBSD_version >= 800000
2713	ifp->if_transmit = igb_mq_start;
2714	ifp->if_qflush = igb_qflush;
2715#endif
2716	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2717	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2718	IFQ_SET_READY(&ifp->if_snd);
2719
2720	ether_ifattach(ifp, adapter->hw.mac.addr);
2721
2722	ifp->if_capabilities = ifp->if_capenable = 0;
2723
2724	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2725	ifp->if_capabilities |= IFCAP_TSO4;
2726	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2727	ifp->if_capenable = ifp->if_capabilities;
2728
2729	/* Don't enable LRO by default */
2730	ifp->if_capabilities |= IFCAP_LRO;
2731
2732#ifdef DEVICE_POLLING
2733	ifp->if_capabilities |= IFCAP_POLLING;
2734#endif
2735
2736	/*
2737	 * Tell the upper layer(s) we
2738	 * support full VLAN capability.
2739	 */
2740	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2741	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2742	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2743
2744	/*
2745	** Dont turn this on by default, if vlans are
2746	** created on another pseudo device (eg. lagg)
2747	** then vlan events are not passed thru, breaking
2748	** operation, but with HW FILTER off it works. If
2749	** using vlans directly on the em driver you can
2750	** enable this and get full hardware tag filtering.
2751	*/
2752	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2753
2754	/*
2755	 * Specify the media types supported by this adapter and register
2756	 * callbacks to update media and link information
2757	 */
2758	ifmedia_init(&adapter->media, IFM_IMASK,
2759	    igb_media_change, igb_media_status);
2760	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2761	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2762		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2763			    0, NULL);
2764		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2765	} else {
2766		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2767		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2768			    0, NULL);
2769		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2770			    0, NULL);
2771		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2772			    0, NULL);
2773		if (adapter->hw.phy.type != e1000_phy_ife) {
2774			ifmedia_add(&adapter->media,
2775				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2776			ifmedia_add(&adapter->media,
2777				IFM_ETHER | IFM_1000_T, 0, NULL);
2778		}
2779	}
2780	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2781	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2782	return (0);
2783}
2784
2785
2786/*
2787 * Manage DMA'able memory.
2788 */
2789static void
2790igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2791{
2792	if (error)
2793		return;
2794	*(bus_addr_t *) arg = segs[0].ds_addr;
2795}
2796
2797static int
2798igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2799        struct igb_dma_alloc *dma, int mapflags)
2800{
2801	int error;
2802
2803	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2804				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2805				BUS_SPACE_MAXADDR,	/* lowaddr */
2806				BUS_SPACE_MAXADDR,	/* highaddr */
2807				NULL, NULL,		/* filter, filterarg */
2808				size,			/* maxsize */
2809				1,			/* nsegments */
2810				size,			/* maxsegsize */
2811				0,			/* flags */
2812				NULL,			/* lockfunc */
2813				NULL,			/* lockarg */
2814				&dma->dma_tag);
2815	if (error) {
2816		device_printf(adapter->dev,
2817		    "%s: bus_dma_tag_create failed: %d\n",
2818		    __func__, error);
2819		goto fail_0;
2820	}
2821
2822	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2823	    BUS_DMA_NOWAIT, &dma->dma_map);
2824	if (error) {
2825		device_printf(adapter->dev,
2826		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2827		    __func__, (uintmax_t)size, error);
2828		goto fail_2;
2829	}
2830
2831	dma->dma_paddr = 0;
2832	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2833	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2834	if (error || dma->dma_paddr == 0) {
2835		device_printf(adapter->dev,
2836		    "%s: bus_dmamap_load failed: %d\n",
2837		    __func__, error);
2838		goto fail_3;
2839	}
2840
2841	return (0);
2842
2843fail_3:
2844	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2845fail_2:
2846	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2847	bus_dma_tag_destroy(dma->dma_tag);
2848fail_0:
2849	dma->dma_map = NULL;
2850	dma->dma_tag = NULL;
2851
2852	return (error);
2853}
2854
2855static void
2856igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2857{
2858	if (dma->dma_tag == NULL)
2859		return;
2860	if (dma->dma_map != NULL) {
2861		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2862		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2863		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2864		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2865		dma->dma_map = NULL;
2866	}
2867	bus_dma_tag_destroy(dma->dma_tag);
2868	dma->dma_tag = NULL;
2869}
2870
2871
2872/*********************************************************************
2873 *
2874 *  Allocate memory for the transmit and receive rings, and then
2875 *  the descriptors associated with each, called only once at attach.
2876 *
2877 **********************************************************************/
2878static int
2879igb_allocate_queues(struct adapter *adapter)
2880{
2881	device_t dev = adapter->dev;
2882	struct igb_queue	*que = NULL;
2883	struct tx_ring		*txr = NULL;
2884	struct rx_ring		*rxr = NULL;
2885	int rsize, tsize, error = E1000_SUCCESS;
2886	int txconf = 0, rxconf = 0;
2887
2888	/* First allocate the top level queue structs */
2889	if (!(adapter->queues =
2890	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2891	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2892		device_printf(dev, "Unable to allocate queue memory\n");
2893		error = ENOMEM;
2894		goto fail;
2895	}
2896
2897	/* Next allocate the TX ring struct memory */
2898	if (!(adapter->tx_rings =
2899	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2900	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2901		device_printf(dev, "Unable to allocate TX ring memory\n");
2902		error = ENOMEM;
2903		goto tx_fail;
2904	}
2905
2906	/* Now allocate the RX */
2907	if (!(adapter->rx_rings =
2908	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2909	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2910		device_printf(dev, "Unable to allocate RX ring memory\n");
2911		error = ENOMEM;
2912		goto rx_fail;
2913	}
2914
2915	tsize = roundup2(adapter->num_tx_desc *
2916	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2917	/*
2918	 * Now set up the TX queues, txconf is needed to handle the
2919	 * possibility that things fail midcourse and we need to
2920	 * undo memory gracefully
2921	 */
2922	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2923		/* Set up some basics */
2924		txr = &adapter->tx_rings[i];
2925		txr->adapter = adapter;
2926		txr->me = i;
2927
2928		/* Initialize the TX lock */
2929		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2930		    device_get_nameunit(dev), txr->me);
2931		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2932
2933		if (igb_dma_malloc(adapter, tsize,
2934			&txr->txdma, BUS_DMA_NOWAIT)) {
2935			device_printf(dev,
2936			    "Unable to allocate TX Descriptor memory\n");
2937			error = ENOMEM;
2938			goto err_tx_desc;
2939		}
2940		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2941		bzero((void *)txr->tx_base, tsize);
2942
2943        	/* Now allocate transmit buffers for the ring */
2944        	if (igb_allocate_transmit_buffers(txr)) {
2945			device_printf(dev,
2946			    "Critical Failure setting up transmit buffers\n");
2947			error = ENOMEM;
2948			goto err_tx_desc;
2949        	}
2950#if __FreeBSD_version >= 800000
2951		/* Allocate a buf ring */
2952		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2953		    M_WAITOK, &txr->tx_mtx);
2954#endif
2955	}
2956
2957	/*
2958	 * Next the RX queues...
2959	 */
2960	rsize = roundup2(adapter->num_rx_desc *
2961	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2962	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2963		rxr = &adapter->rx_rings[i];
2964		rxr->adapter = adapter;
2965		rxr->me = i;
2966
2967		/* Initialize the RX lock */
2968		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2969		    device_get_nameunit(dev), txr->me);
2970		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2971
2972		if (igb_dma_malloc(adapter, rsize,
2973			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2974			device_printf(dev,
2975			    "Unable to allocate RxDescriptor memory\n");
2976			error = ENOMEM;
2977			goto err_rx_desc;
2978		}
2979		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2980		bzero((void *)rxr->rx_base, rsize);
2981
2982        	/* Allocate receive buffers for the ring*/
2983		if (igb_allocate_receive_buffers(rxr)) {
2984			device_printf(dev,
2985			    "Critical Failure setting up receive buffers\n");
2986			error = ENOMEM;
2987			goto err_rx_desc;
2988		}
2989	}
2990
2991	/*
2992	** Finally set up the queue holding structs
2993	*/
2994	for (int i = 0; i < adapter->num_queues; i++) {
2995		que = &adapter->queues[i];
2996		que->adapter = adapter;
2997		que->txr = &adapter->tx_rings[i];
2998		que->rxr = &adapter->rx_rings[i];
2999	}
3000
3001	return (0);
3002
3003err_rx_desc:
3004	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3005		igb_dma_free(adapter, &rxr->rxdma);
3006err_tx_desc:
3007	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3008		igb_dma_free(adapter, &txr->txdma);
3009	free(adapter->rx_rings, M_DEVBUF);
3010rx_fail:
3011#if __FreeBSD_version >= 800000
3012	buf_ring_free(txr->br, M_DEVBUF);
3013#endif
3014	free(adapter->tx_rings, M_DEVBUF);
3015tx_fail:
3016	free(adapter->queues, M_DEVBUF);
3017fail:
3018	return (error);
3019}
3020
3021/*********************************************************************
3022 *
3023 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3024 *  the information needed to transmit a packet on the wire. This is
3025 *  called only once at attach, setup is done every reset.
3026 *
3027 **********************************************************************/
3028static int
3029igb_allocate_transmit_buffers(struct tx_ring *txr)
3030{
3031	struct adapter *adapter = txr->adapter;
3032	device_t dev = adapter->dev;
3033	struct igb_tx_buffer *txbuf;
3034	int error, i;
3035
3036	/*
3037	 * Setup DMA descriptor areas.
3038	 */
3039	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3040			       1, 0,			/* alignment, bounds */
3041			       BUS_SPACE_MAXADDR,	/* lowaddr */
3042			       BUS_SPACE_MAXADDR,	/* highaddr */
3043			       NULL, NULL,		/* filter, filterarg */
3044			       IGB_TSO_SIZE,		/* maxsize */
3045			       IGB_MAX_SCATTER,		/* nsegments */
3046			       PAGE_SIZE,		/* maxsegsize */
3047			       0,			/* flags */
3048			       NULL,			/* lockfunc */
3049			       NULL,			/* lockfuncarg */
3050			       &txr->txtag))) {
3051		device_printf(dev,"Unable to allocate TX DMA tag\n");
3052		goto fail;
3053	}
3054
3055	if (!(txr->tx_buffers =
3056	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3057	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3058		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3059		error = ENOMEM;
3060		goto fail;
3061	}
3062
3063        /* Create the descriptor buffer dma maps */
3064	txbuf = txr->tx_buffers;
3065	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3066		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3067		if (error != 0) {
3068			device_printf(dev, "Unable to create TX DMA map\n");
3069			goto fail;
3070		}
3071	}
3072
3073	return 0;
3074fail:
3075	/* We free all, it handles case where we are in the middle */
3076	igb_free_transmit_structures(adapter);
3077	return (error);
3078}
3079
3080/*********************************************************************
3081 *
3082 *  Initialize a transmit ring.
3083 *
3084 **********************************************************************/
3085static void
3086igb_setup_transmit_ring(struct tx_ring *txr)
3087{
3088	struct adapter *adapter = txr->adapter;
3089	struct igb_tx_buffer *txbuf;
3090	int i;
3091
3092	/* Clear the old descriptor contents */
3093	IGB_TX_LOCK(txr);
3094	bzero((void *)txr->tx_base,
3095	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3096	/* Reset indices */
3097	txr->next_avail_desc = 0;
3098	txr->next_to_clean = 0;
3099
3100	/* Free any existing tx buffers. */
3101        txbuf = txr->tx_buffers;
3102	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3103		if (txbuf->m_head != NULL) {
3104			bus_dmamap_sync(txr->txtag, txbuf->map,
3105			    BUS_DMASYNC_POSTWRITE);
3106			bus_dmamap_unload(txr->txtag, txbuf->map);
3107			m_freem(txbuf->m_head);
3108			txbuf->m_head = NULL;
3109		}
3110		/* clear the watch index */
3111		txbuf->next_eop = -1;
3112        }
3113
3114	/* Set number of descriptors available */
3115	txr->tx_avail = adapter->num_tx_desc;
3116
3117	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3118	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3119	IGB_TX_UNLOCK(txr);
3120}
3121
3122/*********************************************************************
3123 *
3124 *  Initialize all transmit rings.
3125 *
3126 **********************************************************************/
3127static void
3128igb_setup_transmit_structures(struct adapter *adapter)
3129{
3130	struct tx_ring *txr = adapter->tx_rings;
3131
3132	for (int i = 0; i < adapter->num_queues; i++, txr++)
3133		igb_setup_transmit_ring(txr);
3134
3135	return;
3136}
3137
3138/*********************************************************************
3139 *
3140 *  Enable transmit unit.
3141 *
3142 **********************************************************************/
3143static void
3144igb_initialize_transmit_units(struct adapter *adapter)
3145{
3146	struct tx_ring	*txr = adapter->tx_rings;
3147	struct e1000_hw *hw = &adapter->hw;
3148	u32		tctl, txdctl;
3149
3150	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3151	tctl = txdctl = 0;
3152
3153	/* Setup the Tx Descriptor Rings */
3154	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3155		u64 bus_addr = txr->txdma.dma_paddr;
3156
3157		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3158		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3159		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3160		    (uint32_t)(bus_addr >> 32));
3161		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3162		    (uint32_t)bus_addr);
3163
3164		/* Setup the HW Tx Head and Tail descriptor pointers */
3165		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3166		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3167
3168		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3169		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3170		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3171
3172		txr->queue_status = IGB_QUEUE_IDLE;
3173
3174		txdctl |= IGB_TX_PTHRESH;
3175		txdctl |= IGB_TX_HTHRESH << 8;
3176		txdctl |= IGB_TX_WTHRESH << 16;
3177		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3178		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3179	}
3180
3181	if (adapter->vf_ifp)
3182		return;
3183
3184	e1000_config_collision_dist(hw);
3185
3186	/* Program the Transmit Control Register */
3187	tctl = E1000_READ_REG(hw, E1000_TCTL);
3188	tctl &= ~E1000_TCTL_CT;
3189	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3190		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3191
3192	/* This write will effectively turn on the transmit unit. */
3193	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3194}
3195
3196/*********************************************************************
3197 *
3198 *  Free all transmit rings.
3199 *
3200 **********************************************************************/
3201static void
3202igb_free_transmit_structures(struct adapter *adapter)
3203{
3204	struct tx_ring *txr = adapter->tx_rings;
3205
3206	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3207		IGB_TX_LOCK(txr);
3208		igb_free_transmit_buffers(txr);
3209		igb_dma_free(adapter, &txr->txdma);
3210		IGB_TX_UNLOCK(txr);
3211		IGB_TX_LOCK_DESTROY(txr);
3212	}
3213	free(adapter->tx_rings, M_DEVBUF);
3214}
3215
3216/*********************************************************************
3217 *
3218 *  Free transmit ring related data structures.
3219 *
3220 **********************************************************************/
3221static void
3222igb_free_transmit_buffers(struct tx_ring *txr)
3223{
3224	struct adapter *adapter = txr->adapter;
3225	struct igb_tx_buffer *tx_buffer;
3226	int             i;
3227
3228	INIT_DEBUGOUT("free_transmit_ring: begin");
3229
3230	if (txr->tx_buffers == NULL)
3231		return;
3232
3233	tx_buffer = txr->tx_buffers;
3234	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3235		if (tx_buffer->m_head != NULL) {
3236			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3237			    BUS_DMASYNC_POSTWRITE);
3238			bus_dmamap_unload(txr->txtag,
3239			    tx_buffer->map);
3240			m_freem(tx_buffer->m_head);
3241			tx_buffer->m_head = NULL;
3242			if (tx_buffer->map != NULL) {
3243				bus_dmamap_destroy(txr->txtag,
3244				    tx_buffer->map);
3245				tx_buffer->map = NULL;
3246			}
3247		} else if (tx_buffer->map != NULL) {
3248			bus_dmamap_unload(txr->txtag,
3249			    tx_buffer->map);
3250			bus_dmamap_destroy(txr->txtag,
3251			    tx_buffer->map);
3252			tx_buffer->map = NULL;
3253		}
3254	}
3255#if __FreeBSD_version >= 800000
3256	if (txr->br != NULL)
3257		buf_ring_free(txr->br, M_DEVBUF);
3258#endif
3259	if (txr->tx_buffers != NULL) {
3260		free(txr->tx_buffers, M_DEVBUF);
3261		txr->tx_buffers = NULL;
3262	}
3263	if (txr->txtag != NULL) {
3264		bus_dma_tag_destroy(txr->txtag);
3265		txr->txtag = NULL;
3266	}
3267	return;
3268}
3269
3270/**********************************************************************
3271 *
3272 *  Setup work for hardware segmentation offload (TSO)
3273 *
3274 **********************************************************************/
3275static boolean_t
3276igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3277{
3278	struct adapter *adapter = txr->adapter;
3279	struct e1000_adv_tx_context_desc *TXD;
3280	struct igb_tx_buffer        *tx_buffer;
3281	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3282	u32 mss_l4len_idx = 0;
3283	u16 vtag = 0;
3284	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3285	struct ether_vlan_header *eh;
3286	struct ip *ip;
3287	struct tcphdr *th;
3288
3289
3290	/*
3291	 * Determine where frame payload starts.
3292	 * Jump over vlan headers if already present
3293	 */
3294	eh = mtod(mp, struct ether_vlan_header *);
3295	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3296		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3297	else
3298		ehdrlen = ETHER_HDR_LEN;
3299
3300	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3301	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3302		return FALSE;
3303
3304	/* Only supports IPV4 for now */
3305	ctxd = txr->next_avail_desc;
3306	tx_buffer = &txr->tx_buffers[ctxd];
3307	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3308
3309	ip = (struct ip *)(mp->m_data + ehdrlen);
3310	if (ip->ip_p != IPPROTO_TCP)
3311                return FALSE;   /* 0 */
3312	ip->ip_sum = 0;
3313	ip_hlen = ip->ip_hl << 2;
3314	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3315	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3316	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3317	tcp_hlen = th->th_off << 2;
3318	/*
3319	 * Calculate header length, this is used
3320	 * in the transmit desc in igb_xmit
3321	 */
3322	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3323
3324	/* VLAN MACLEN IPLEN */
3325	if (mp->m_flags & M_VLANTAG) {
3326		vtag = htole16(mp->m_pkthdr.ether_vtag);
3327		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3328	}
3329
3330	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3331	vlan_macip_lens |= ip_hlen;
3332	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3333
3334	/* ADV DTYPE TUCMD */
3335	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3336	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3337	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3338	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3339
3340	/* MSS L4LEN IDX */
3341	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3342	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3343	/* 82575 needs the queue index added */
3344	if (adapter->hw.mac.type == e1000_82575)
3345		mss_l4len_idx |= txr->me << 4;
3346	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3347
3348	TXD->seqnum_seed = htole32(0);
3349	tx_buffer->m_head = NULL;
3350	tx_buffer->next_eop = -1;
3351
3352	if (++ctxd == adapter->num_tx_desc)
3353		ctxd = 0;
3354
3355	txr->tx_avail--;
3356	txr->next_avail_desc = ctxd;
3357	return TRUE;
3358}
3359
3360
3361/*********************************************************************
3362 *
3363 *  Context Descriptor setup for VLAN or CSUM
3364 *
3365 **********************************************************************/
3366
3367static bool
3368igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3369{
3370	struct adapter *adapter = txr->adapter;
3371	struct e1000_adv_tx_context_desc *TXD;
3372	struct igb_tx_buffer        *tx_buffer;
3373	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3374	struct ether_vlan_header *eh;
3375	struct ip *ip = NULL;
3376	struct ip6_hdr *ip6;
3377	int  ehdrlen, ctxd, ip_hlen = 0;
3378	u16	etype, vtag = 0;
3379	u8	ipproto = 0;
3380	bool	offload = TRUE;
3381
3382	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3383		offload = FALSE;
3384
3385	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3386	ctxd = txr->next_avail_desc;
3387	tx_buffer = &txr->tx_buffers[ctxd];
3388	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3389
3390	/*
3391	** In advanced descriptors the vlan tag must
3392	** be placed into the context descriptor, thus
3393	** we need to be here just for that setup.
3394	*/
3395	if (mp->m_flags & M_VLANTAG) {
3396		vtag = htole16(mp->m_pkthdr.ether_vtag);
3397		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3398	} else if (offload == FALSE)
3399		return FALSE;
3400
3401	/*
3402	 * Determine where frame payload starts.
3403	 * Jump over vlan headers if already present,
3404	 * helpful for QinQ too.
3405	 */
3406	eh = mtod(mp, struct ether_vlan_header *);
3407	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3408		etype = ntohs(eh->evl_proto);
3409		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3410	} else {
3411		etype = ntohs(eh->evl_encap_proto);
3412		ehdrlen = ETHER_HDR_LEN;
3413	}
3414
3415	/* Set the ether header length */
3416	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3417
3418	switch (etype) {
3419		case ETHERTYPE_IP:
3420			ip = (struct ip *)(mp->m_data + ehdrlen);
3421			ip_hlen = ip->ip_hl << 2;
3422			if (mp->m_len < ehdrlen + ip_hlen) {
3423				offload = FALSE;
3424				break;
3425			}
3426			ipproto = ip->ip_p;
3427			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3428			break;
3429		case ETHERTYPE_IPV6:
3430			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3431			ip_hlen = sizeof(struct ip6_hdr);
3432			ipproto = ip6->ip6_nxt;
3433			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3434			break;
3435		default:
3436			offload = FALSE;
3437			break;
3438	}
3439
3440	vlan_macip_lens |= ip_hlen;
3441	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3442
3443	switch (ipproto) {
3444		case IPPROTO_TCP:
3445			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3446				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3447			break;
3448		case IPPROTO_UDP:
3449			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3450				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3451			break;
3452#if __FreeBSD_version >= 800000
3453		case IPPROTO_SCTP:
3454			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3455				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3456			break;
3457#endif
3458		default:
3459			offload = FALSE;
3460			break;
3461	}
3462
3463	/* 82575 needs the queue index added */
3464	if (adapter->hw.mac.type == e1000_82575)
3465		mss_l4len_idx = txr->me << 4;
3466
3467	/* Now copy bits into descriptor */
3468	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3469	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3470	TXD->seqnum_seed = htole32(0);
3471	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3472
3473	tx_buffer->m_head = NULL;
3474	tx_buffer->next_eop = -1;
3475
3476	/* We've consumed the first desc, adjust counters */
3477	if (++ctxd == adapter->num_tx_desc)
3478		ctxd = 0;
3479	txr->next_avail_desc = ctxd;
3480	--txr->tx_avail;
3481
3482        return (offload);
3483}
3484
3485
3486/**********************************************************************
3487 *
3488 *  Examine each tx_buffer in the used queue. If the hardware is done
3489 *  processing the packet then free associated resources. The
3490 *  tx_buffer is put back on the free queue.
3491 *
3492 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3493 **********************************************************************/
3494static bool
3495igb_txeof(struct tx_ring *txr)
3496{
3497	struct adapter	*adapter = txr->adapter;
3498        int first, last, done, processed;
3499        struct igb_tx_buffer *tx_buffer;
3500        struct e1000_tx_desc   *tx_desc, *eop_desc;
3501	struct ifnet   *ifp = adapter->ifp;
3502
3503	IGB_TX_LOCK_ASSERT(txr);
3504
3505        if (txr->tx_avail == adapter->num_tx_desc) {
3506		txr->queue_status = IGB_QUEUE_IDLE;
3507                return FALSE;
3508	}
3509
3510	processed = 0;
3511        first = txr->next_to_clean;
3512        tx_desc = &txr->tx_base[first];
3513        tx_buffer = &txr->tx_buffers[first];
3514	last = tx_buffer->next_eop;
3515        eop_desc = &txr->tx_base[last];
3516
3517	/*
3518	 * What this does is get the index of the
3519	 * first descriptor AFTER the EOP of the
3520	 * first packet, that way we can do the
3521	 * simple comparison on the inner while loop.
3522	 */
3523	if (++last == adapter->num_tx_desc)
3524 		last = 0;
3525	done = last;
3526
3527        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3528            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3529
3530        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3531		/* We clean the range of the packet */
3532		while (first != done) {
3533                	tx_desc->upper.data = 0;
3534                	tx_desc->lower.data = 0;
3535                	tx_desc->buffer_addr = 0;
3536                	++txr->tx_avail;
3537			++processed;
3538
3539			if (tx_buffer->m_head) {
3540				txr->bytes +=
3541				    tx_buffer->m_head->m_pkthdr.len;
3542				bus_dmamap_sync(txr->txtag,
3543				    tx_buffer->map,
3544				    BUS_DMASYNC_POSTWRITE);
3545				bus_dmamap_unload(txr->txtag,
3546				    tx_buffer->map);
3547
3548                        	m_freem(tx_buffer->m_head);
3549                        	tx_buffer->m_head = NULL;
3550                	}
3551			tx_buffer->next_eop = -1;
3552			txr->watchdog_time = ticks;
3553
3554	                if (++first == adapter->num_tx_desc)
3555				first = 0;
3556
3557	                tx_buffer = &txr->tx_buffers[first];
3558			tx_desc = &txr->tx_base[first];
3559		}
3560		++txr->packets;
3561		++ifp->if_opackets;
3562		/* See if we can continue to the next packet */
3563		last = tx_buffer->next_eop;
3564		if (last != -1) {
3565        		eop_desc = &txr->tx_base[last];
3566			/* Get new done point */
3567			if (++last == adapter->num_tx_desc) last = 0;
3568			done = last;
3569		} else
3570			break;
3571        }
3572        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3573            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3574
3575        txr->next_to_clean = first;
3576
3577	/*
3578	** Watchdog calculation, we know there's
3579	** work outstanding or the first return
3580	** would have been taken, so none processed
3581	** for too long indicates a hang.
3582	*/
3583	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3584		txr->queue_status = IGB_QUEUE_HUNG;
3585
3586        /*
3587         * If we have a minimum free, clear IFF_DRV_OACTIVE
3588         * to tell the stack that it is OK to send packets.
3589         */
3590        if (txr->tx_avail > IGB_TX_OP_THRESHOLD)
3591                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3592
3593	/* All clean, turn off the watchdog */
3594	if (txr->tx_avail == adapter->num_tx_desc) {
3595		txr->queue_status = IGB_QUEUE_IDLE;
3596		return (FALSE);
3597	}
3598
3599	return (TRUE);
3600}
3601
3602
3603/*********************************************************************
3604 *
3605 *  Refresh mbuf buffers for RX descriptor rings
3606 *   - now keeps its own state so discards due to resource
3607 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3608 *     it just returns, keeping its placeholder, thus it can simply
3609 *     be recalled to try again.
3610 *
3611 **********************************************************************/
3612static void
3613igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3614{
3615	struct adapter		*adapter = rxr->adapter;
3616	bus_dma_segment_t	hseg[1];
3617	bus_dma_segment_t	pseg[1];
3618	struct igb_rx_buf	*rxbuf;
3619	struct mbuf		*mh, *mp;
3620	int			i, nsegs, error, cleaned;
3621
3622	i = rxr->next_to_refresh;
3623	rxr->needs_refresh = FALSE;
3624	cleaned = -1; /* Signify no completions */
3625	while (i != limit) {
3626		rxbuf = &rxr->rx_buffers[i];
3627		/* No hdr mbuf used with header split off */
3628		if (rxr->hdr_split == FALSE)
3629			goto no_split;
3630		if (rxbuf->m_head == NULL) {
3631			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3632			if (mh == NULL) {
3633				rxr->needs_refresh = TRUE;
3634				goto update;
3635			}
3636		} else
3637			mh = rxbuf->m_head;
3638
3639		mh->m_pkthdr.len = mh->m_len = MHLEN;
3640		mh->m_len = MHLEN;
3641		mh->m_flags |= M_PKTHDR;
3642		/* Get the memory mapping */
3643		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3644		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3645		if (error != 0) {
3646			printf("Refresh mbufs: hdr dmamap load"
3647			    " failure - %d\n", error);
3648			m_free(mh);
3649			rxbuf->m_head = NULL;
3650			goto update;
3651		}
3652		rxbuf->m_head = mh;
3653		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3654		    BUS_DMASYNC_PREREAD);
3655		rxr->rx_base[i].read.hdr_addr =
3656		    htole64(hseg[0].ds_addr);
3657no_split:
3658		if (rxbuf->m_pack == NULL) {
3659			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3660			    M_PKTHDR, adapter->rx_mbuf_sz);
3661			if (mp == NULL) {
3662				rxr->needs_refresh = TRUE;
3663				goto update;
3664			}
3665		} else
3666			mp = rxbuf->m_pack;
3667
3668		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3669		/* Get the memory mapping */
3670		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3671		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3672		if (error != 0) {
3673			printf("Refresh mbufs: payload dmamap load"
3674			    " failure - %d\n", error);
3675			m_free(mp);
3676			rxbuf->m_pack = NULL;
3677			goto update;
3678		}
3679		rxbuf->m_pack = mp;
3680		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3681		    BUS_DMASYNC_PREREAD);
3682		rxr->rx_base[i].read.pkt_addr =
3683		    htole64(pseg[0].ds_addr);
3684
3685		cleaned = i;
3686		/* Calculate next index */
3687		if (++i == adapter->num_rx_desc)
3688			i = 0;
3689		/* This is the work marker for refresh */
3690		rxr->next_to_refresh = i;
3691	}
3692update:
3693	if (cleaned != -1) /* If we refreshed some, bump tail */
3694		E1000_WRITE_REG(&adapter->hw,
3695		    E1000_RDT(rxr->me), cleaned);
3696	return;
3697}
3698
3699
3700/*********************************************************************
3701 *
3702 *  Allocate memory for rx_buffer structures. Since we use one
3703 *  rx_buffer per received packet, the maximum number of rx_buffer's
3704 *  that we'll need is equal to the number of receive descriptors
3705 *  that we've allocated.
3706 *
3707 **********************************************************************/
3708static int
3709igb_allocate_receive_buffers(struct rx_ring *rxr)
3710{
3711	struct	adapter 	*adapter = rxr->adapter;
3712	device_t 		dev = adapter->dev;
3713	struct igb_rx_buf	*rxbuf;
3714	int             	i, bsize, error;
3715
3716	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3717	if (!(rxr->rx_buffers =
3718	    (struct igb_rx_buf *) malloc(bsize,
3719	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3720		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3721		error = ENOMEM;
3722		goto fail;
3723	}
3724
3725	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3726				   1, 0,		/* alignment, bounds */
3727				   BUS_SPACE_MAXADDR,	/* lowaddr */
3728				   BUS_SPACE_MAXADDR,	/* highaddr */
3729				   NULL, NULL,		/* filter, filterarg */
3730				   MSIZE,		/* maxsize */
3731				   1,			/* nsegments */
3732				   MSIZE,		/* maxsegsize */
3733				   0,			/* flags */
3734				   NULL,		/* lockfunc */
3735				   NULL,		/* lockfuncarg */
3736				   &rxr->htag))) {
3737		device_printf(dev, "Unable to create RX DMA tag\n");
3738		goto fail;
3739	}
3740
3741	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3742				   1, 0,		/* alignment, bounds */
3743				   BUS_SPACE_MAXADDR,	/* lowaddr */
3744				   BUS_SPACE_MAXADDR,	/* highaddr */
3745				   NULL, NULL,		/* filter, filterarg */
3746				   MJUM9BYTES,		/* maxsize */
3747				   1,			/* nsegments */
3748				   MJUM9BYTES,		/* maxsegsize */
3749				   0,			/* flags */
3750				   NULL,		/* lockfunc */
3751				   NULL,		/* lockfuncarg */
3752				   &rxr->ptag))) {
3753		device_printf(dev, "Unable to create RX payload DMA tag\n");
3754		goto fail;
3755	}
3756
3757	for (i = 0; i < adapter->num_rx_desc; i++) {
3758		rxbuf = &rxr->rx_buffers[i];
3759		error = bus_dmamap_create(rxr->htag,
3760		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3761		if (error) {
3762			device_printf(dev,
3763			    "Unable to create RX head DMA maps\n");
3764			goto fail;
3765		}
3766		error = bus_dmamap_create(rxr->ptag,
3767		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3768		if (error) {
3769			device_printf(dev,
3770			    "Unable to create RX packet DMA maps\n");
3771			goto fail;
3772		}
3773	}
3774
3775	return (0);
3776
3777fail:
3778	/* Frees all, but can handle partial completion */
3779	igb_free_receive_structures(adapter);
3780	return (error);
3781}
3782
3783
3784static void
3785igb_free_receive_ring(struct rx_ring *rxr)
3786{
3787	struct	adapter		*adapter;
3788	struct igb_rx_buf	*rxbuf;
3789	int i;
3790
3791	adapter = rxr->adapter;
3792	for (i = 0; i < adapter->num_rx_desc; i++) {
3793		rxbuf = &rxr->rx_buffers[i];
3794		if (rxbuf->m_head != NULL) {
3795			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3796			    BUS_DMASYNC_POSTREAD);
3797			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3798			rxbuf->m_head->m_flags |= M_PKTHDR;
3799			m_freem(rxbuf->m_head);
3800		}
3801		if (rxbuf->m_pack != NULL) {
3802			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3803			    BUS_DMASYNC_POSTREAD);
3804			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3805			rxbuf->m_pack->m_flags |= M_PKTHDR;
3806			m_freem(rxbuf->m_pack);
3807		}
3808		rxbuf->m_head = NULL;
3809		rxbuf->m_pack = NULL;
3810	}
3811}
3812
3813
3814/*********************************************************************
3815 *
3816 *  Initialize a receive ring and its buffers.
3817 *
3818 **********************************************************************/
3819static int
3820igb_setup_receive_ring(struct rx_ring *rxr)
3821{
3822	struct	adapter		*adapter;
3823	struct  ifnet		*ifp;
3824	device_t		dev;
3825	struct igb_rx_buf	*rxbuf;
3826	bus_dma_segment_t	pseg[1], hseg[1];
3827	struct lro_ctrl		*lro = &rxr->lro;
3828	int			rsize, nsegs, error = 0;
3829
3830	adapter = rxr->adapter;
3831	dev = adapter->dev;
3832	ifp = adapter->ifp;
3833
3834	/* Clear the ring contents */
3835	IGB_RX_LOCK(rxr);
3836	rsize = roundup2(adapter->num_rx_desc *
3837	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3838	bzero((void *)rxr->rx_base, rsize);
3839
3840	/*
3841	** Free current RX buffer structures and their mbufs
3842	*/
3843	igb_free_receive_ring(rxr);
3844
3845	/* Configure for header split? */
3846	if (igb_header_split)
3847		rxr->hdr_split = TRUE;
3848
3849        /* Now replenish the ring mbufs */
3850	for (int j = 0; j < adapter->num_rx_desc; ++j) {
3851		struct mbuf	*mh, *mp;
3852
3853		rxbuf = &rxr->rx_buffers[j];
3854		if (rxr->hdr_split == FALSE)
3855			goto skip_head;
3856
3857		/* First the header */
3858		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3859		if (rxbuf->m_head == NULL) {
3860			error = ENOBUFS;
3861                        goto fail;
3862		}
3863		m_adj(rxbuf->m_head, ETHER_ALIGN);
3864		mh = rxbuf->m_head;
3865		mh->m_len = mh->m_pkthdr.len = MHLEN;
3866		mh->m_flags |= M_PKTHDR;
3867		/* Get the memory mapping */
3868		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3869		    rxbuf->hmap, rxbuf->m_head, hseg,
3870		    &nsegs, BUS_DMA_NOWAIT);
3871		if (error != 0) /* Nothing elegant to do here */
3872                        goto fail;
3873		bus_dmamap_sync(rxr->htag,
3874		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3875		/* Update descriptor */
3876		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3877
3878skip_head:
3879		/* Now the payload cluster */
3880		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3881		    M_PKTHDR, adapter->rx_mbuf_sz);
3882		if (rxbuf->m_pack == NULL) {
3883			error = ENOBUFS;
3884                        goto fail;
3885		}
3886		mp = rxbuf->m_pack;
3887		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3888		/* Get the memory mapping */
3889		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3890		    rxbuf->pmap, mp, pseg,
3891		    &nsegs, BUS_DMA_NOWAIT);
3892		if (error != 0)
3893                        goto fail;
3894		bus_dmamap_sync(rxr->ptag,
3895		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
3896		/* Update descriptor */
3897		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3898        }
3899
3900	/* Setup our descriptor indices */
3901	rxr->next_to_check = 0;
3902	rxr->next_to_refresh = 0;
3903	rxr->lro_enabled = FALSE;
3904	rxr->rx_split_packets = 0;
3905	rxr->rx_bytes = 0;
3906
3907	rxr->fmp = NULL;
3908	rxr->lmp = NULL;
3909	rxr->discard = FALSE;
3910
3911	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3912	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3913
3914	/*
3915	** Now set up the LRO interface, we
3916	** also only do head split when LRO
3917	** is enabled, since so often they
3918	** are undesireable in similar setups.
3919	*/
3920	if (ifp->if_capenable & IFCAP_LRO) {
3921		error = tcp_lro_init(lro);
3922		if (error) {
3923			device_printf(dev, "LRO Initialization failed!\n");
3924			goto fail;
3925		}
3926		INIT_DEBUGOUT("RX LRO Initialized\n");
3927		rxr->lro_enabled = TRUE;
3928		lro->ifp = adapter->ifp;
3929	}
3930
3931	IGB_RX_UNLOCK(rxr);
3932	return (0);
3933
3934fail:
3935	igb_free_receive_ring(rxr);
3936	IGB_RX_UNLOCK(rxr);
3937	return (error);
3938}
3939
3940/*********************************************************************
3941 *
3942 *  Initialize all receive rings.
3943 *
3944 **********************************************************************/
3945static int
3946igb_setup_receive_structures(struct adapter *adapter)
3947{
3948	struct rx_ring *rxr = adapter->rx_rings;
3949	int i;
3950
3951	for (i = 0; i < adapter->num_queues; i++, rxr++)
3952		if (igb_setup_receive_ring(rxr))
3953			goto fail;
3954
3955	return (0);
3956fail:
3957	/*
3958	 * Free RX buffers allocated so far, we will only handle
3959	 * the rings that completed, the failing case will have
3960	 * cleaned up for itself. 'i' is the endpoint.
3961	 */
3962	for (int j = 0; j > i; ++j) {
3963		rxr = &adapter->rx_rings[i];
3964		IGB_RX_LOCK(rxr);
3965		igb_free_receive_ring(rxr);
3966		IGB_RX_UNLOCK(rxr);
3967	}
3968
3969	return (ENOBUFS);
3970}
3971
3972/*********************************************************************
3973 *
3974 *  Enable receive unit.
3975 *
3976 **********************************************************************/
3977static void
3978igb_initialize_receive_units(struct adapter *adapter)
3979{
3980	struct rx_ring	*rxr = adapter->rx_rings;
3981	struct ifnet	*ifp = adapter->ifp;
3982	struct e1000_hw *hw = &adapter->hw;
3983	u32		rctl, rxcsum, psize, srrctl = 0;
3984
3985	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3986
3987	/*
3988	 * Make sure receives are disabled while setting
3989	 * up the descriptor ring
3990	 */
3991	rctl = E1000_READ_REG(hw, E1000_RCTL);
3992	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3993
3994	/*
3995	** Set up for header split
3996	*/
3997	if (rxr->hdr_split) {
3998		/* Use a standard mbuf for the header */
3999		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4000		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4001	} else
4002		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4003
4004	/*
4005	** Set up for jumbo frames
4006	*/
4007	if (ifp->if_mtu > ETHERMTU) {
4008		rctl |= E1000_RCTL_LPE;
4009		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4010			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4011			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4012		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4013			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4014			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4015		}
4016		/* Set maximum packet len */
4017		psize = adapter->max_frame_size;
4018		/* are we on a vlan? */
4019		if (adapter->ifp->if_vlantrunk != NULL)
4020			psize += VLAN_TAG_SIZE;
4021		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4022	} else {
4023		rctl &= ~E1000_RCTL_LPE;
4024		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4025		rctl |= E1000_RCTL_SZ_2048;
4026	}
4027
4028	/* Setup the Base and Length of the Rx Descriptor Rings */
4029	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4030		u64 bus_addr = rxr->rxdma.dma_paddr;
4031		u32 rxdctl;
4032
4033		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4034		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4035		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4036		    (uint32_t)(bus_addr >> 32));
4037		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4038		    (uint32_t)bus_addr);
4039		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4040		/* Enable this Queue */
4041		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4042		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4043		rxdctl &= 0xFFF00000;
4044		rxdctl |= IGB_RX_PTHRESH;
4045		rxdctl |= IGB_RX_HTHRESH << 8;
4046		rxdctl |= IGB_RX_WTHRESH << 16;
4047		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4048	}
4049
4050	/*
4051	** Setup for RX MultiQueue
4052	*/
4053	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4054	if (adapter->num_queues >1) {
4055		u32 random[10], mrqc, shift = 0;
4056		union igb_reta {
4057			u32 dword;
4058			u8  bytes[4];
4059		} reta;
4060
4061		arc4rand(&random, sizeof(random), 0);
4062		if (adapter->hw.mac.type == e1000_82575)
4063			shift = 6;
4064		/* Warning FM follows */
4065		for (int i = 0; i < 128; i++) {
4066			reta.bytes[i & 3] =
4067			    (i % adapter->num_queues) << shift;
4068			if ((i & 3) == 3)
4069				E1000_WRITE_REG(hw,
4070				    E1000_RETA(i >> 2), reta.dword);
4071		}
4072		/* Now fill in hash table */
4073		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4074		for (int i = 0; i < 10; i++)
4075			E1000_WRITE_REG_ARRAY(hw,
4076			    E1000_RSSRK(0), i, random[i]);
4077
4078		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4079		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4080		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4081		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4082		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4083		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4084		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4085		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4086
4087		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4088
4089		/*
4090		** NOTE: Receive Full-Packet Checksum Offload
4091		** is mutually exclusive with Multiqueue. However
4092		** this is not the same as TCP/IP checksums which
4093		** still work.
4094		*/
4095		rxcsum |= E1000_RXCSUM_PCSD;
4096#if __FreeBSD_version >= 800000
4097		/* For SCTP Offload */
4098		if ((hw->mac.type == e1000_82576)
4099		    && (ifp->if_capenable & IFCAP_RXCSUM))
4100			rxcsum |= E1000_RXCSUM_CRCOFL;
4101#endif
4102	} else {
4103		/* Non RSS setup */
4104		if (ifp->if_capenable & IFCAP_RXCSUM) {
4105			rxcsum |= E1000_RXCSUM_IPPCSE;
4106#if __FreeBSD_version >= 800000
4107			if (adapter->hw.mac.type == e1000_82576)
4108				rxcsum |= E1000_RXCSUM_CRCOFL;
4109#endif
4110		} else
4111			rxcsum &= ~E1000_RXCSUM_TUOFL;
4112	}
4113	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4114
4115	/* Setup the Receive Control Register */
4116	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4117	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4118		   E1000_RCTL_RDMTS_HALF |
4119		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4120	/* Strip CRC bytes. */
4121	rctl |= E1000_RCTL_SECRC;
4122	/* Make sure VLAN Filters are off */
4123	rctl &= ~E1000_RCTL_VFE;
4124	/* Don't store bad packets */
4125	rctl &= ~E1000_RCTL_SBP;
4126
4127	/* Enable Receives */
4128	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4129
4130	/*
4131	 * Setup the HW Rx Head and Tail Descriptor Pointers
4132	 *   - needs to be after enable
4133	 */
4134	for (int i = 0; i < adapter->num_queues; i++) {
4135		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4136		E1000_WRITE_REG(hw, E1000_RDT(i),
4137		     adapter->num_rx_desc - 1);
4138	}
4139	return;
4140}
4141
4142/*********************************************************************
4143 *
4144 *  Free receive rings.
4145 *
4146 **********************************************************************/
4147static void
4148igb_free_receive_structures(struct adapter *adapter)
4149{
4150	struct rx_ring *rxr = adapter->rx_rings;
4151
4152	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4153		struct lro_ctrl	*lro = &rxr->lro;
4154		igb_free_receive_buffers(rxr);
4155		tcp_lro_free(lro);
4156		igb_dma_free(adapter, &rxr->rxdma);
4157	}
4158
4159	free(adapter->rx_rings, M_DEVBUF);
4160}
4161
4162/*********************************************************************
4163 *
4164 *  Free receive ring data structures.
4165 *
4166 **********************************************************************/
4167static void
4168igb_free_receive_buffers(struct rx_ring *rxr)
4169{
4170	struct adapter		*adapter = rxr->adapter;
4171	struct igb_rx_buf	*rxbuf;
4172	int i;
4173
4174	INIT_DEBUGOUT("free_receive_structures: begin");
4175
4176	/* Cleanup any existing buffers */
4177	if (rxr->rx_buffers != NULL) {
4178		for (i = 0; i < adapter->num_rx_desc; i++) {
4179			rxbuf = &rxr->rx_buffers[i];
4180			if (rxbuf->m_head != NULL) {
4181				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4182				    BUS_DMASYNC_POSTREAD);
4183				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4184				rxbuf->m_head->m_flags |= M_PKTHDR;
4185				m_freem(rxbuf->m_head);
4186			}
4187			if (rxbuf->m_pack != NULL) {
4188				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4189				    BUS_DMASYNC_POSTREAD);
4190				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4191				rxbuf->m_pack->m_flags |= M_PKTHDR;
4192				m_freem(rxbuf->m_pack);
4193			}
4194			rxbuf->m_head = NULL;
4195			rxbuf->m_pack = NULL;
4196			if (rxbuf->hmap != NULL) {
4197				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4198				rxbuf->hmap = NULL;
4199			}
4200			if (rxbuf->pmap != NULL) {
4201				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4202				rxbuf->pmap = NULL;
4203			}
4204		}
4205		if (rxr->rx_buffers != NULL) {
4206			free(rxr->rx_buffers, M_DEVBUF);
4207			rxr->rx_buffers = NULL;
4208		}
4209	}
4210
4211	if (rxr->htag != NULL) {
4212		bus_dma_tag_destroy(rxr->htag);
4213		rxr->htag = NULL;
4214	}
4215	if (rxr->ptag != NULL) {
4216		bus_dma_tag_destroy(rxr->ptag);
4217		rxr->ptag = NULL;
4218	}
4219}
4220
4221static __inline void
4222igb_rx_discard(struct rx_ring *rxr, int i)
4223{
4224	struct igb_rx_buf	*rbuf;
4225
4226	rbuf = &rxr->rx_buffers[i];
4227
4228	/* Partially received? Free the chain */
4229	if (rxr->fmp != NULL) {
4230		rxr->fmp->m_flags |= M_PKTHDR;
4231		m_freem(rxr->fmp);
4232		rxr->fmp = NULL;
4233		rxr->lmp = NULL;
4234	}
4235
4236	/*
4237	** With advanced descriptors the writeback
4238	** clobbers the buffer addrs, so its easier
4239	** to just free the existing mbufs and take
4240	** the normal refresh path to get new buffers
4241	** and mapping.
4242	*/
4243	if (rbuf->m_head) {
4244		m_free(rbuf->m_head);
4245		rbuf->m_head = NULL;
4246	}
4247
4248	if (rbuf->m_pack) {
4249		m_free(rbuf->m_pack);
4250		rbuf->m_pack = NULL;
4251	}
4252
4253	return;
4254}
4255
4256static __inline void
4257igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4258{
4259
4260	/*
4261	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4262	 * should be computed by hardware. Also it should not have VLAN tag in
4263	 * ethernet header.
4264	 */
4265	if (rxr->lro_enabled &&
4266	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4267	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4268	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4269	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4270	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4271	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4272		/*
4273		 * Send to the stack if:
4274		 **  - LRO not enabled, or
4275		 **  - no LRO resources, or
4276		 **  - lro enqueue fails
4277		 */
4278		if (rxr->lro.lro_cnt != 0)
4279			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4280				return;
4281	}
4282	IGB_RX_UNLOCK(rxr);
4283	(*ifp->if_input)(ifp, m);
4284	IGB_RX_LOCK(rxr);
4285}
4286
4287/*********************************************************************
4288 *
4289 *  This routine executes in interrupt context. It replenishes
4290 *  the mbufs in the descriptor and sends data which has been
4291 *  dma'ed into host memory to upper layer.
4292 *
4293 *  We loop at most count times if count is > 0, or until done if
4294 *  count < 0.
4295 *
4296 *  Return TRUE if more to clean, FALSE otherwise
4297 *********************************************************************/
4298static bool
4299igb_rxeof(struct igb_queue *que, int count, int *done)
4300{
4301	struct adapter		*adapter = que->adapter;
4302	struct rx_ring		*rxr = que->rxr;
4303	struct ifnet		*ifp = adapter->ifp;
4304	struct lro_ctrl		*lro = &rxr->lro;
4305	struct lro_entry	*queued;
4306	int			i, processed = 0, rxdone = 0;
4307	u32			ptype, staterr = 0;
4308	union e1000_adv_rx_desc	*cur;
4309
4310	IGB_RX_LOCK(rxr);
4311	/* Sync the ring. */
4312	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4313	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4314
4315	/* Try outstanding refresh first */
4316	if (rxr->needs_refresh == TRUE)
4317		igb_refresh_mbufs(rxr, rxr->next_to_check);
4318
4319	/* Main clean loop */
4320	for (i = rxr->next_to_check; count != 0;) {
4321		struct mbuf		*sendmp, *mh, *mp;
4322		struct igb_rx_buf	*rxbuf;
4323		u16			hlen, plen, hdr, vtag;
4324		bool			eop = FALSE;
4325
4326		cur = &rxr->rx_base[i];
4327		staterr = le32toh(cur->wb.upper.status_error);
4328		if ((staterr & E1000_RXD_STAT_DD) == 0)
4329			break;
4330		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4331			break;
4332		count--;
4333		sendmp = mh = mp = NULL;
4334		cur->wb.upper.status_error = 0;
4335		rxbuf = &rxr->rx_buffers[i];
4336		plen = le16toh(cur->wb.upper.length);
4337		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4338		vtag = le16toh(cur->wb.upper.vlan);
4339		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4340		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4341
4342		/* Make sure all segments of a bad packet are discarded */
4343		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4344		    (rxr->discard)) {
4345			ifp->if_ierrors++;
4346			++rxr->rx_discarded;
4347			if (!eop) /* Catch subsequent segs */
4348				rxr->discard = TRUE;
4349			else
4350				rxr->discard = FALSE;
4351			igb_rx_discard(rxr, i);
4352			goto next_desc;
4353		}
4354
4355		/*
4356		** The way the hardware is configured to
4357		** split, it will ONLY use the header buffer
4358		** when header split is enabled, otherwise we
4359		** get normal behavior, ie, both header and
4360		** payload are DMA'd into the payload buffer.
4361		**
4362		** The fmp test is to catch the case where a
4363		** packet spans multiple descriptors, in that
4364		** case only the first header is valid.
4365		*/
4366		if (rxr->hdr_split && rxr->fmp == NULL) {
4367			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4368			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4369			if (hlen > IGB_HDR_BUF)
4370				hlen = IGB_HDR_BUF;
4371			mh = rxr->rx_buffers[i].m_head;
4372			mh->m_len = hlen;
4373			/* clear buf pointer for refresh */
4374			rxbuf->m_head = NULL;
4375			/*
4376			** Get the payload length, this
4377			** could be zero if its a small
4378			** packet.
4379			*/
4380			if (plen > 0) {
4381				mp = rxr->rx_buffers[i].m_pack;
4382				mp->m_len = plen;
4383				mh->m_next = mp;
4384				/* clear buf pointer */
4385				rxbuf->m_pack = NULL;
4386				rxr->rx_split_packets++;
4387			}
4388		} else {
4389			/*
4390			** Either no header split, or a
4391			** secondary piece of a fragmented
4392			** split packet.
4393			*/
4394			mh = rxr->rx_buffers[i].m_pack;
4395			mh->m_len = plen;
4396			/* clear buf info for refresh */
4397			rxbuf->m_pack = NULL;
4398		}
4399
4400		++processed; /* So we know when to refresh */
4401
4402		/* Initial frame - setup */
4403		if (rxr->fmp == NULL) {
4404			mh->m_pkthdr.len = mh->m_len;
4405			/* Save the head of the chain */
4406			rxr->fmp = mh;
4407			rxr->lmp = mh;
4408			if (mp != NULL) {
4409				/* Add payload if split */
4410				mh->m_pkthdr.len += mp->m_len;
4411				rxr->lmp = mh->m_next;
4412			}
4413		} else {
4414			/* Chain mbuf's together */
4415			rxr->lmp->m_next = mh;
4416			rxr->lmp = rxr->lmp->m_next;
4417			rxr->fmp->m_pkthdr.len += mh->m_len;
4418		}
4419
4420		if (eop) {
4421			rxr->fmp->m_pkthdr.rcvif = ifp;
4422			ifp->if_ipackets++;
4423			rxr->rx_packets++;
4424			/* capture data for AIM */
4425			rxr->packets++;
4426			rxr->bytes += rxr->fmp->m_pkthdr.len;
4427			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4428
4429			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4430				igb_rx_checksum(staterr, rxr->fmp, ptype);
4431
4432			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4433			    (staterr & E1000_RXD_STAT_VP) != 0) {
4434				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4435				rxr->fmp->m_flags |= M_VLANTAG;
4436			}
4437#if __FreeBSD_version >= 800000
4438			rxr->fmp->m_pkthdr.flowid = que->msix;
4439			rxr->fmp->m_flags |= M_FLOWID;
4440#endif
4441			sendmp = rxr->fmp;
4442			/* Make sure to set M_PKTHDR. */
4443			sendmp->m_flags |= M_PKTHDR;
4444			rxr->fmp = NULL;
4445			rxr->lmp = NULL;
4446		}
4447
4448next_desc:
4449		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4450		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4451
4452		/* Advance our pointers to the next descriptor. */
4453		if (++i == adapter->num_rx_desc)
4454			i = 0;
4455		/*
4456		** Send to the stack or LRO
4457		*/
4458		if (sendmp != NULL) {
4459			rxr->next_to_check = i;
4460			igb_rx_input(rxr, ifp, sendmp, ptype);
4461			i = rxr->next_to_check;
4462			rxdone++;
4463		}
4464
4465		/* Every 8 descriptors we go to refresh mbufs */
4466		if (processed == 8) {
4467                        igb_refresh_mbufs(rxr, i);
4468                        processed = 0;
4469		}
4470	}
4471
4472	/* Catch any remainders */
4473	if (processed != 0) {
4474		igb_refresh_mbufs(rxr, i);
4475		processed = 0;
4476	}
4477
4478	rxr->next_to_check = i;
4479
4480	/*
4481	 * Flush any outstanding LRO work
4482	 */
4483	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4484		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4485		tcp_lro_flush(lro, queued);
4486	}
4487
4488	IGB_RX_UNLOCK(rxr);
4489
4490	if (done != NULL)
4491		*done = rxdone;
4492
4493	/*
4494	** We still have cleaning to do?
4495	** Schedule another interrupt if so.
4496	*/
4497	if ((staterr & E1000_RXD_STAT_DD) != 0)
4498		return (TRUE);
4499
4500	return (FALSE);
4501}
4502
4503/*********************************************************************
4504 *
4505 *  Verify that the hardware indicated that the checksum is valid.
4506 *  Inform the stack about the status of checksum so that stack
4507 *  doesn't spend time verifying the checksum.
4508 *
4509 *********************************************************************/
4510static void
4511igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4512{
4513	u16 status = (u16)staterr;
4514	u8  errors = (u8) (staterr >> 24);
4515	int sctp;
4516
4517	/* Ignore Checksum bit is set */
4518	if (status & E1000_RXD_STAT_IXSM) {
4519		mp->m_pkthdr.csum_flags = 0;
4520		return;
4521	}
4522
4523	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4524	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4525		sctp = 1;
4526	else
4527		sctp = 0;
4528	if (status & E1000_RXD_STAT_IPCS) {
4529		/* Did it pass? */
4530		if (!(errors & E1000_RXD_ERR_IPE)) {
4531			/* IP Checksum Good */
4532			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4533			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4534		} else
4535			mp->m_pkthdr.csum_flags = 0;
4536	}
4537
4538	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4539		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4540#if __FreeBSD_version >= 800000
4541		if (sctp) /* reassign */
4542			type = CSUM_SCTP_VALID;
4543#endif
4544		/* Did it pass? */
4545		if (!(errors & E1000_RXD_ERR_TCPE)) {
4546			mp->m_pkthdr.csum_flags |= type;
4547			if (sctp == 0)
4548				mp->m_pkthdr.csum_data = htons(0xffff);
4549		}
4550	}
4551	return;
4552}
4553
4554/*
4555 * This routine is run via an vlan
4556 * config EVENT
4557 */
4558static void
4559igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4560{
4561	struct adapter	*adapter = ifp->if_softc;
4562	u32		index, bit;
4563
4564	if (ifp->if_softc !=  arg)   /* Not our event */
4565		return;
4566
4567	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4568                return;
4569
4570	IGB_CORE_LOCK(adapter);
4571	index = (vtag >> 5) & 0x7F;
4572	bit = vtag & 0x1F;
4573	adapter->shadow_vfta[index] |= (1 << bit);
4574	++adapter->num_vlans;
4575	/* Change hw filter setting */
4576	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4577		igb_setup_vlan_hw_support(adapter);
4578	IGB_CORE_UNLOCK(adapter);
4579}
4580
4581/*
4582 * This routine is run via an vlan
4583 * unconfig EVENT
4584 */
4585static void
4586igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4587{
4588	struct adapter	*adapter = ifp->if_softc;
4589	u32		index, bit;
4590
4591	if (ifp->if_softc !=  arg)
4592		return;
4593
4594	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4595                return;
4596
4597	IGB_CORE_LOCK(adapter);
4598	index = (vtag >> 5) & 0x7F;
4599	bit = vtag & 0x1F;
4600	adapter->shadow_vfta[index] &= ~(1 << bit);
4601	--adapter->num_vlans;
4602	/* Change hw filter setting */
4603	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4604		igb_setup_vlan_hw_support(adapter);
4605	IGB_CORE_UNLOCK(adapter);
4606}
4607
4608static void
4609igb_setup_vlan_hw_support(struct adapter *adapter)
4610{
4611	struct e1000_hw *hw = &adapter->hw;
4612	struct ifnet	*ifp = adapter->ifp;
4613	u32             reg;
4614
4615	if (adapter->vf_ifp) {
4616		e1000_rlpml_set_vf(hw,
4617		    adapter->max_frame_size + VLAN_TAG_SIZE);
4618		return;
4619	}
4620
4621	reg = E1000_READ_REG(hw, E1000_CTRL);
4622	reg |= E1000_CTRL_VME;
4623	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4624
4625	/* Enable the Filter Table */
4626	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4627		reg = E1000_READ_REG(hw, E1000_RCTL);
4628		reg &= ~E1000_RCTL_CFIEN;
4629		reg |= E1000_RCTL_VFE;
4630		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4631	}
4632
4633	/* Update the frame size */
4634	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4635	    adapter->max_frame_size + VLAN_TAG_SIZE);
4636
4637	/* Don't bother with table if no vlans */
4638	if ((adapter->num_vlans == 0) ||
4639	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4640                return;
4641	/*
4642	** A soft reset zero's out the VFTA, so
4643	** we need to repopulate it now.
4644	*/
4645	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4646                if (adapter->shadow_vfta[i] != 0) {
4647			if (adapter->vf_ifp)
4648				e1000_vfta_set_vf(hw,
4649				    adapter->shadow_vfta[i], TRUE);
4650			else
4651				E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4652                           	 i, adapter->shadow_vfta[i]);
4653		}
4654}
4655
4656static void
4657igb_enable_intr(struct adapter *adapter)
4658{
4659	/* With RSS set up what to auto clear */
4660	if (adapter->msix_mem) {
4661		u32 mask = (adapter->que_mask | adapter->link_mask);
4662		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4663		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4664		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4665		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4666		    E1000_IMS_LSC);
4667	} else {
4668		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4669		    IMS_ENABLE_MASK);
4670	}
4671	E1000_WRITE_FLUSH(&adapter->hw);
4672
4673	return;
4674}
4675
4676static void
4677igb_disable_intr(struct adapter *adapter)
4678{
4679	if (adapter->msix_mem) {
4680		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4681		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4682	}
4683	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4684	E1000_WRITE_FLUSH(&adapter->hw);
4685	return;
4686}
4687
4688/*
4689 * Bit of a misnomer, what this really means is
4690 * to enable OS management of the system... aka
4691 * to disable special hardware management features
4692 */
4693static void
4694igb_init_manageability(struct adapter *adapter)
4695{
4696	if (adapter->has_manage) {
4697		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4698		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4699
4700		/* disable hardware interception of ARP */
4701		manc &= ~(E1000_MANC_ARP_EN);
4702
4703                /* enable receiving management packets to the host */
4704		manc |= E1000_MANC_EN_MNG2HOST;
4705		manc2h |= 1 << 5;  /* Mng Port 623 */
4706		manc2h |= 1 << 6;  /* Mng Port 664 */
4707		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4708		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4709	}
4710}
4711
4712/*
4713 * Give control back to hardware management
4714 * controller if there is one.
4715 */
4716static void
4717igb_release_manageability(struct adapter *adapter)
4718{
4719	if (adapter->has_manage) {
4720		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4721
4722		/* re-enable hardware interception of ARP */
4723		manc |= E1000_MANC_ARP_EN;
4724		manc &= ~E1000_MANC_EN_MNG2HOST;
4725
4726		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4727	}
4728}
4729
4730/*
4731 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4732 * For ASF and Pass Through versions of f/w this means that
4733 * the driver is loaded.
4734 *
4735 */
4736static void
4737igb_get_hw_control(struct adapter *adapter)
4738{
4739	u32 ctrl_ext;
4740
4741	if (adapter->vf_ifp)
4742		return;
4743
4744	/* Let firmware know the driver has taken over */
4745	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4746	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4747	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4748}
4749
4750/*
4751 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4752 * For ASF and Pass Through versions of f/w this means that the
4753 * driver is no longer loaded.
4754 *
4755 */
4756static void
4757igb_release_hw_control(struct adapter *adapter)
4758{
4759	u32 ctrl_ext;
4760
4761	if (adapter->vf_ifp)
4762		return;
4763
4764	/* Let firmware taken over control of h/w */
4765	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4766	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4767	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4768}
4769
4770static int
4771igb_is_valid_ether_addr(uint8_t *addr)
4772{
4773	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4774
4775	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4776		return (FALSE);
4777	}
4778
4779	return (TRUE);
4780}
4781
4782
4783/*
4784 * Enable PCI Wake On Lan capability
4785 */
4786static void
4787igb_enable_wakeup(device_t dev)
4788{
4789	u16     cap, status;
4790	u8      id;
4791
4792	/* First find the capabilities pointer*/
4793	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4794	/* Read the PM Capabilities */
4795	id = pci_read_config(dev, cap, 1);
4796	if (id != PCIY_PMG)     /* Something wrong */
4797		return;
4798	/* OK, we have the power capabilities, so
4799	   now get the status register */
4800	cap += PCIR_POWER_STATUS;
4801	status = pci_read_config(dev, cap, 2);
4802	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4803	pci_write_config(dev, cap, status, 2);
4804	return;
4805}
4806
4807static void
4808igb_led_func(void *arg, int onoff)
4809{
4810	struct adapter	*adapter = arg;
4811
4812	IGB_CORE_LOCK(adapter);
4813	if (onoff) {
4814		e1000_setup_led(&adapter->hw);
4815		e1000_led_on(&adapter->hw);
4816	} else {
4817		e1000_led_off(&adapter->hw);
4818		e1000_cleanup_led(&adapter->hw);
4819	}
4820	IGB_CORE_UNLOCK(adapter);
4821}
4822
4823/**********************************************************************
4824 *
4825 *  Update the board statistics counters.
4826 *
4827 **********************************************************************/
4828static void
4829igb_update_stats_counters(struct adapter *adapter)
4830{
4831	struct ifnet		*ifp;
4832        struct e1000_hw		*hw = &adapter->hw;
4833	struct e1000_hw_stats	*stats;
4834
4835	/*
4836	** The virtual function adapter has only a
4837	** small controlled set of stats, do only
4838	** those and return.
4839	*/
4840	if (adapter->vf_ifp) {
4841		igb_update_vf_stats_counters(adapter);
4842		return;
4843	}
4844
4845	stats = (struct e1000_hw_stats	*)adapter->stats;
4846
4847	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4848	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4849		stats->symerrs +=
4850		    E1000_READ_REG(hw,E1000_SYMERRS);
4851		stats->sec += E1000_READ_REG(hw, E1000_SEC);
4852	}
4853
4854	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4855	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4856	stats->scc += E1000_READ_REG(hw, E1000_SCC);
4857	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4858
4859	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4860	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4861	stats->colc += E1000_READ_REG(hw, E1000_COLC);
4862	stats->dc += E1000_READ_REG(hw, E1000_DC);
4863	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4864	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4865	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4866	/*
4867	** For watchdog management we need to know if we have been
4868	** paused during the last interval, so capture that here.
4869	*/
4870        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4871        stats->xoffrxc += adapter->pause_frames;
4872	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
4873	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
4874	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
4875	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
4876	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
4877	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
4878	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
4879	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
4880	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
4881	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
4882	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
4883	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
4884
4885	/* For the 64-bit byte counters the low dword must be read first. */
4886	/* Both registers clear on the read of the high dword */
4887
4888	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
4889	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
4890	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
4891	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
4892
4893	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
4894	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
4895	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
4896	stats->roc += E1000_READ_REG(hw, E1000_ROC);
4897	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
4898
4899	stats->tor += E1000_READ_REG(hw, E1000_TORH);
4900	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
4901
4902	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
4903	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
4904	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
4905	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
4906	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
4907	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
4908	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
4909	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
4910	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
4911	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
4912
4913	/* Interrupt Counts */
4914
4915	stats->iac += E1000_READ_REG(hw, E1000_IAC);
4916	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
4917	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
4918	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
4919	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
4920	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
4921	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
4922	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
4923	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
4924
4925	/* Host to Card Statistics */
4926
4927	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
4928	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
4929	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
4930	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
4931	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
4932	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
4933	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
4934	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
4935	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
4936	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
4937	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
4938	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
4939	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
4940	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
4941
4942	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
4943	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
4944	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
4945	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
4946	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
4947	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
4948
4949	ifp = adapter->ifp;
4950	ifp->if_collisions = stats->colc;
4951
4952	/* Rx Errors */
4953	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
4954	    stats->crcerrs + stats->algnerrc +
4955	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
4956
4957	/* Tx Errors */
4958	ifp->if_oerrors = stats->ecol +
4959	    stats->latecol + adapter->watchdog_events;
4960
4961	/* Driver specific counters */
4962	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
4963	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
4964	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
4965	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
4966	adapter->packet_buf_alloc_tx =
4967	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
4968	adapter->packet_buf_alloc_rx =
4969	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
4970}
4971
4972
4973/**********************************************************************
4974 *
4975 *  Initialize the VF board statistics counters.
4976 *
4977 **********************************************************************/
4978static void
4979igb_vf_init_stats(struct adapter *adapter)
4980{
4981        struct e1000_hw *hw = &adapter->hw;
4982	struct e1000_vf_stats	*stats;
4983
4984	stats = (struct e1000_vf_stats	*)adapter->stats;
4985	if (stats == NULL)
4986		return;
4987        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
4988        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
4989        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
4990        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
4991        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
4992}
4993
4994/**********************************************************************
4995 *
4996 *  Update the VF board statistics counters.
4997 *
4998 **********************************************************************/
4999static void
5000igb_update_vf_stats_counters(struct adapter *adapter)
5001{
5002	struct e1000_hw *hw = &adapter->hw;
5003	struct e1000_vf_stats	*stats;
5004
5005	if (adapter->link_speed == 0)
5006		return;
5007
5008	stats = (struct e1000_vf_stats	*)adapter->stats;
5009
5010	UPDATE_VF_REG(E1000_VFGPRC,
5011	    stats->last_gprc, stats->gprc);
5012	UPDATE_VF_REG(E1000_VFGORC,
5013	    stats->last_gorc, stats->gorc);
5014	UPDATE_VF_REG(E1000_VFGPTC,
5015	    stats->last_gptc, stats->gptc);
5016	UPDATE_VF_REG(E1000_VFGOTC,
5017	    stats->last_gotc, stats->gotc);
5018	UPDATE_VF_REG(E1000_VFMPRC,
5019	    stats->last_mprc, stats->mprc);
5020}
5021
5022/* Export a single 32-bit register via a read-only sysctl. */
5023static int
5024igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5025{
5026	struct adapter *adapter;
5027	u_int val;
5028
5029	adapter = oidp->oid_arg1;
5030	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5031	return (sysctl_handle_int(oidp, &val, 0, req));
5032}
5033
5034/*
5035**  Tuneable interrupt rate handler
5036*/
5037static int
5038igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5039{
5040	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5041	int			error;
5042	u32			reg, usec, rate;
5043
5044	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5045	usec = ((reg & 0x7FFC) >> 2);
5046	if (usec > 0)
5047		rate = 1000000 / usec;
5048	else
5049		rate = 0;
5050	error = sysctl_handle_int(oidp, &rate, 0, req);
5051	if (error || !req->newptr)
5052		return error;
5053	return 0;
5054}
5055
5056/*
5057 * Add sysctl variables, one per statistic, to the system.
5058 */
5059static void
5060igb_add_hw_stats(struct adapter *adapter)
5061{
5062	device_t dev = adapter->dev;
5063
5064	struct tx_ring *txr = adapter->tx_rings;
5065	struct rx_ring *rxr = adapter->rx_rings;
5066
5067	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5068	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5069	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5070	struct e1000_hw_stats *stats = adapter->stats;
5071
5072	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5073	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5074
5075#define QUEUE_NAME_LEN 32
5076	char namebuf[QUEUE_NAME_LEN];
5077
5078	/* Driver Statistics */
5079	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "link_irq",
5080			CTLFLAG_RD, &adapter->link_irq, 0,
5081			"Link MSIX IRQ Handled");
5082	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5083			CTLFLAG_RD, &adapter->dropped_pkts,
5084			"Driver dropped packets");
5085	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5086			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5087			"Driver tx dma failure in xmit");
5088	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5089			CTLFLAG_RD, &adapter->rx_overruns,
5090			"RX overruns");
5091	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5092			CTLFLAG_RD, &adapter->watchdog_events,
5093			"Watchdog timeouts");
5094
5095	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5096			CTLFLAG_RD, &adapter->device_control,
5097			"Device Control Register");
5098	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5099			CTLFLAG_RD, &adapter->rx_control,
5100			"Receiver Control Register");
5101	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5102			CTLFLAG_RD, &adapter->int_mask,
5103			"Interrupt Mask");
5104	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5105			CTLFLAG_RD, &adapter->eint_mask,
5106			"Extended Interrupt Mask");
5107	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5108			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5109			"Transmit Buffer Packet Allocation");
5110	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5111			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5112			"Receive Buffer Packet Allocation");
5113	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5114			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5115			"Flow Control High Watermark");
5116	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5117			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5118			"Flow Control Low Watermark");
5119
5120	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5121		struct lro_ctrl *lro = &rxr->lro;
5122
5123		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5124		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5125					    CTLFLAG_RD, NULL, "Queue Name");
5126		queue_list = SYSCTL_CHILDREN(queue_node);
5127
5128		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5129				CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5130				sizeof(&adapter->queues[i]),
5131				igb_sysctl_interrupt_rate_handler,
5132				"IU", "Interrupt Rate");
5133
5134		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5135				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5136				E1000_TDH(txr->me),
5137				igb_sysctl_reg_handler, "IU",
5138 				"Transmit Descriptor Head");
5139		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5140				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5141				E1000_TDT(txr->me),
5142				igb_sysctl_reg_handler, "IU",
5143 				"Transmit Descriptor Tail");
5144		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5145				CTLFLAG_RD, &txr->no_desc_avail,
5146				"Queue No Descriptor Available");
5147		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5148				CTLFLAG_RD, &txr->tx_packets,
5149				"Queue Packets Transmitted");
5150
5151		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5152				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5153				E1000_RDH(rxr->me),
5154				igb_sysctl_reg_handler, "IU",
5155				"Receive Descriptor Head");
5156		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5157				CTLTYPE_UINT | CTLFLAG_RD, adapter,
5158				E1000_RDT(rxr->me),
5159				igb_sysctl_reg_handler, "IU",
5160				"Receive Descriptor Tail");
5161		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5162				CTLFLAG_RD, &rxr->rx_packets,
5163				"Queue Packets Received");
5164		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5165				CTLFLAG_RD, &rxr->rx_bytes,
5166				"Queue Bytes Received");
5167		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5168				CTLFLAG_RD, &lro->lro_queued, 0,
5169				"LRO Queued");
5170		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5171				CTLFLAG_RD, &lro->lro_flushed, 0,
5172				"LRO Flushed");
5173	}
5174
5175	/* MAC stats get their own sub node */
5176
5177	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5178				    CTLFLAG_RD, NULL, "MAC Statistics");
5179	stat_list = SYSCTL_CHILDREN(stat_node);
5180
5181	/*
5182	** VF adapter has a very limited set of stats
5183	** since its not managing the metal, so to speak.
5184	*/
5185	if (adapter->vf_ifp) {
5186	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5187			CTLFLAG_RD, &stats->gprc,
5188			"Good Packets Received");
5189	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5190			CTLFLAG_RD, &stats->gptc,
5191			"Good Packets Transmitted");
5192 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5193 			CTLFLAG_RD, &stats->gorc,
5194 			"Good Octets Received");
5195 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5196 			CTLFLAG_RD, &stats->gotc,
5197 			"Good Octets Transmitted");
5198	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5199			CTLFLAG_RD, &stats->mprc,
5200			"Multicast Packets Received");
5201		return;
5202	}
5203
5204	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5205			CTLFLAG_RD, &stats->ecol,
5206			"Excessive collisions");
5207	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
5208			CTLFLAG_RD, &stats->scc,
5209			"Single collisions");
5210	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5211			CTLFLAG_RD, &stats->mcc,
5212			"Multiple collisions");
5213	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
5214			CTLFLAG_RD, &stats->latecol,
5215			"Late collisions");
5216	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
5217			CTLFLAG_RD, &stats->colc,
5218			"Collision Count");
5219	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5220			CTLFLAG_RD, &stats->symerrs,
5221			"Symbol Errors");
5222	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5223			CTLFLAG_RD, &stats->sec,
5224			"Sequence Errors");
5225	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
5226			CTLFLAG_RD, &stats->dc,
5227			"Defer Count");
5228	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5229			CTLFLAG_RD, &stats->mpc,
5230			"Missed Packets");
5231	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5232			CTLFLAG_RD, &stats->rnbc,
5233			"Receive No Buffers");
5234	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5235			CTLFLAG_RD, &stats->ruc,
5236			"Receive Undersize");
5237	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5238			CTLFLAG_RD, &stats->rfc,
5239			"Fragmented Packets Received ");
5240	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5241			CTLFLAG_RD, &stats->roc,
5242			"Oversized Packets Received");
5243	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5244			CTLFLAG_RD, &stats->rjc,
5245			"Recevied Jabber");
5246	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5247			CTLFLAG_RD, &stats->rxerrc,
5248			"Receive Errors");
5249	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5250			CTLFLAG_RD, &stats->crcerrs,
5251			"CRC errors");
5252	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5253			CTLFLAG_RD, &stats->algnerrc,
5254			"Alignment Errors");
5255	/* On 82575 these are collision counts */
5256	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5257			CTLFLAG_RD, &stats->cexterr,
5258			"Collision/Carrier extension errors");
5259	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5260			CTLFLAG_RD, &stats->xonrxc,
5261			"XON Received");
5262	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5263			CTLFLAG_RD, &stats->xontxc,
5264			"XON Transmitted");
5265	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5266			CTLFLAG_RD, &stats->xoffrxc,
5267			"XOFF Received");
5268	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5269			CTLFLAG_RD, &stats->xofftxc,
5270			"XOFF Transmitted");
5271	/* Packet Reception Stats */
5272	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5273			CTLFLAG_RD, &stats->tpr,
5274			"Total Packets Received ");
5275	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5276			CTLFLAG_RD, &stats->gprc,
5277			"Good Packets Received");
5278	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5279			CTLFLAG_RD, &stats->bprc,
5280			"Broadcast Packets Received");
5281	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5282			CTLFLAG_RD, &stats->mprc,
5283			"Multicast Packets Received");
5284	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5285			CTLFLAG_RD, &stats->prc64,
5286			"64 byte frames received ");
5287	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5288			CTLFLAG_RD, &stats->prc127,
5289			"65-127 byte frames received");
5290	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5291			CTLFLAG_RD, &stats->prc255,
5292			"128-255 byte frames received");
5293	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5294			CTLFLAG_RD, &stats->prc511,
5295			"256-511 byte frames received");
5296	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5297			CTLFLAG_RD, &stats->prc1023,
5298			"512-1023 byte frames received");
5299	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5300			CTLFLAG_RD, &stats->prc1522,
5301			"1023-1522 byte frames received");
5302 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5303 			CTLFLAG_RD, &stats->gorc,
5304 			"Good Octets Received");
5305
5306	/* Packet Transmission Stats */
5307 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5308 			CTLFLAG_RD, &stats->gotc,
5309 			"Good Octets Transmitted");
5310	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5311			CTLFLAG_RD, &stats->tpt,
5312			"Total Packets Transmitted");
5313	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5314			CTLFLAG_RD, &stats->gptc,
5315			"Good Packets Transmitted");
5316	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5317			CTLFLAG_RD, &stats->bptc,
5318			"Broadcast Packets Transmitted");
5319	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5320			CTLFLAG_RD, &stats->mptc,
5321			"Multicast Packets Transmitted");
5322	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5323			CTLFLAG_RD, &stats->ptc64,
5324			"64 byte frames transmitted ");
5325	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5326			CTLFLAG_RD, &stats->ptc127,
5327			"65-127 byte frames transmitted");
5328	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5329			CTLFLAG_RD, &stats->ptc255,
5330			"128-255 byte frames transmitted");
5331	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5332			CTLFLAG_RD, &stats->ptc511,
5333			"256-511 byte frames transmitted");
5334	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5335			CTLFLAG_RD, &stats->ptc1023,
5336			"512-1023 byte frames transmitted");
5337	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5338			CTLFLAG_RD, &stats->ptc1522,
5339			"1024-1522 byte frames transmitted");
5340	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5341			CTLFLAG_RD, &stats->tsctc,
5342			"TSO Contexts Transmitted");
5343	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5344			CTLFLAG_RD, &stats->tsctfc,
5345			"TSO Contexts Failed");
5346
5347
5348	/* Interrupt Stats */
5349
5350	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5351				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5352	int_list = SYSCTL_CHILDREN(int_node);
5353
5354	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "asserts",
5355			CTLFLAG_RD, &stats->iac,
5356			"Interrupt Assertion Count");
5357
5358	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5359			CTLFLAG_RD, &stats->icrxptc,
5360			"Interrupt Cause Rx Pkt Timer Expire Count");
5361
5362	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5363			CTLFLAG_RD, &stats->icrxatc,
5364			"Interrupt Cause Rx Abs Timer Expire Count");
5365
5366	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5367			CTLFLAG_RD, &stats->ictxptc,
5368			"Interrupt Cause Tx Pkt Timer Expire Count");
5369
5370	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5371			CTLFLAG_RD, &stats->ictxatc,
5372			"Interrupt Cause Tx Abs Timer Expire Count");
5373
5374	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5375			CTLFLAG_RD, &stats->ictxqec,
5376			"Interrupt Cause Tx Queue Empty Count");
5377
5378	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5379			CTLFLAG_RD, &stats->ictxqmtc,
5380			"Interrupt Cause Tx Queue Min Thresh Count");
5381
5382	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5383			CTLFLAG_RD, &stats->icrxdmtc,
5384			"Interrupt Cause Rx Desc Min Thresh Count");
5385
5386	SYSCTL_ADD_UQUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5387			CTLFLAG_RD, &stats->icrxoc,
5388			"Interrupt Cause Receiver Overrun Count");
5389
5390	/* Host to Card Stats */
5391
5392	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5393				    CTLFLAG_RD, NULL,
5394				    "Host to Card Statistics");
5395
5396	host_list = SYSCTL_CHILDREN(host_node);
5397
5398	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5399			CTLFLAG_RD, &stats->cbtmpc,
5400			"Circuit Breaker Tx Packet Count");
5401
5402	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5403			CTLFLAG_RD, &stats->htdpmc,
5404			"Host Transmit Discarded Packets");
5405
5406	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5407			CTLFLAG_RD, &stats->rpthc,
5408			"Rx Packets To Host");
5409
5410	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5411			CTLFLAG_RD, &stats->cbrmpc,
5412			"Circuit Breaker Rx Packet Count");
5413
5414	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5415			CTLFLAG_RD, &stats->cbrdpc,
5416			"Circuit Breaker Rx Dropped Count");
5417
5418	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5419			CTLFLAG_RD, &stats->hgptc,
5420			"Host Good Packets Tx Count");
5421
5422	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5423			CTLFLAG_RD, &stats->htcbdpc,
5424			"Host Tx Circuit Breaker Dropped Count");
5425
5426	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5427			CTLFLAG_RD, &stats->hgorc,
5428			"Host Good Octets Received Count");
5429
5430	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5431			CTLFLAG_RD, &stats->hgotc,
5432			"Host Good Octets Transmit Count");
5433
5434	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "length_errors",
5435			CTLFLAG_RD, &stats->lenerrs,
5436			"Length Errors");
5437
5438	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5439			CTLFLAG_RD, &stats->scvpc,
5440			"SerDes/SGMII Code Violation Pkt Count");
5441
5442	SYSCTL_ADD_UQUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5443			CTLFLAG_RD, &stats->hrmpc,
5444			"Header Redirection Missed Packet Count");
5445}
5446
5447
5448/**********************************************************************
5449 *
5450 *  This routine provides a way to dump out the adapter eeprom,
5451 *  often a useful debug/service tool. This only dumps the first
5452 *  32 words, stuff that matters is in that extent.
5453 *
5454 **********************************************************************/
5455static int
5456igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5457{
5458	struct adapter *adapter;
5459	int error;
5460	int result;
5461
5462	result = -1;
5463	error = sysctl_handle_int(oidp, &result, 0, req);
5464
5465	if (error || !req->newptr)
5466		return (error);
5467
5468	/*
5469	 * This value will cause a hex dump of the
5470	 * first 32 16-bit words of the EEPROM to
5471	 * the screen.
5472	 */
5473	if (result == 1) {
5474		adapter = (struct adapter *)arg1;
5475		igb_print_nvm_info(adapter);
5476        }
5477
5478	return (error);
5479}
5480
5481static void
5482igb_print_nvm_info(struct adapter *adapter)
5483{
5484	u16	eeprom_data;
5485	int	i, j, row = 0;
5486
5487	/* Its a bit crude, but it gets the job done */
5488	printf("\nInterface EEPROM Dump:\n");
5489	printf("Offset\n0x0000  ");
5490	for (i = 0, j = 0; i < 32; i++, j++) {
5491		if (j == 8) { /* Make the offset block */
5492			j = 0; ++row;
5493			printf("\n0x00%x0  ",row);
5494		}
5495		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5496		printf("%04x ", eeprom_data);
5497	}
5498	printf("\n");
5499}
5500
5501static void
5502igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5503	const char *description, int *limit, int value)
5504{
5505	*limit = value;
5506	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5507	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5508	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5509}
5510