if_igb.c revision 209611
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 209611 2010-06-30 17:26:47Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_altq.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <sys/pcpu.h>
61#include <sys/smp.h>
62#include <machine/smp.h>
63#include <machine/bus.h>
64#include <machine/resource.h>
65
66#include <net/bpf.h>
67#include <net/ethernet.h>
68#include <net/if.h>
69#include <net/if_arp.h>
70#include <net/if_dl.h>
71#include <net/if_media.h>
72
73#include <net/if_types.h>
74#include <net/if_vlan_var.h>
75
76#include <netinet/in_systm.h>
77#include <netinet/in.h>
78#include <netinet/if_ether.h>
79#include <netinet/ip.h>
80#include <netinet/ip6.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_lro.h>
83#include <netinet/udp.h>
84
85#include <machine/in_cksum.h>
86#include <dev/led/led.h>
87#include <dev/pci/pcivar.h>
88#include <dev/pci/pcireg.h>
89
90#include "e1000_api.h"
91#include "e1000_82575.h"
92#include "if_igb.h"
93
94/*********************************************************************
95 *  Set this to one to display debug statistics
96 *********************************************************************/
97int	igb_display_debug_stats = 0;
98
99/*********************************************************************
100 *  Driver version:
101 *********************************************************************/
102char igb_driver_version[] = "version - 2.0.1";
103
104
105/*********************************************************************
106 *  PCI Device ID Table
107 *
108 *  Used by probe to select devices to load on
109 *  Last field stores an index into e1000_strings
110 *  Last entry must be all 0s
111 *
112 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113 *********************************************************************/
114
115static igb_vendor_info_t igb_vendor_info_array[] =
116{
117	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	/* required last entry */
139	{ 0, 0, 0, 0, 0}
140};
141
142/*********************************************************************
143 *  Table of branding strings for all supported NICs.
144 *********************************************************************/
145
146static char *igb_strings[] = {
147	"Intel(R) PRO/1000 Network Connection"
148};
149
150/*********************************************************************
151 *  Function prototypes
152 *********************************************************************/
153static int	igb_probe(device_t);
154static int	igb_attach(device_t);
155static int	igb_detach(device_t);
156static int	igb_shutdown(device_t);
157static int	igb_suspend(device_t);
158static int	igb_resume(device_t);
159static void	igb_start(struct ifnet *);
160static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
161#if __FreeBSD_version >= 800000
162static int	igb_mq_start(struct ifnet *, struct mbuf *);
163static int	igb_mq_start_locked(struct ifnet *,
164		    struct tx_ring *, struct mbuf *);
165static void	igb_qflush(struct ifnet *);
166#endif
167static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
168static void	igb_init(void *);
169static void	igb_init_locked(struct adapter *);
170static void	igb_stop(void *);
171static void	igb_media_status(struct ifnet *, struct ifmediareq *);
172static int	igb_media_change(struct ifnet *);
173static void	igb_identify_hardware(struct adapter *);
174static int	igb_allocate_pci_resources(struct adapter *);
175static int	igb_allocate_msix(struct adapter *);
176static int	igb_allocate_legacy(struct adapter *);
177static int	igb_setup_msix(struct adapter *);
178static void	igb_free_pci_resources(struct adapter *);
179static void	igb_local_timer(void *);
180static void	igb_reset(struct adapter *);
181static void	igb_setup_interface(device_t, struct adapter *);
182static int	igb_allocate_queues(struct adapter *);
183static void	igb_configure_queues(struct adapter *);
184
185static int	igb_allocate_transmit_buffers(struct tx_ring *);
186static void	igb_setup_transmit_structures(struct adapter *);
187static void	igb_setup_transmit_ring(struct tx_ring *);
188static void	igb_initialize_transmit_units(struct adapter *);
189static void	igb_free_transmit_structures(struct adapter *);
190static void	igb_free_transmit_buffers(struct tx_ring *);
191
192static int	igb_allocate_receive_buffers(struct rx_ring *);
193static int	igb_setup_receive_structures(struct adapter *);
194static int	igb_setup_receive_ring(struct rx_ring *);
195static void	igb_initialize_receive_units(struct adapter *);
196static void	igb_free_receive_structures(struct adapter *);
197static void	igb_free_receive_buffers(struct rx_ring *);
198static void	igb_free_receive_ring(struct rx_ring *);
199
200static void	igb_enable_intr(struct adapter *);
201static void	igb_disable_intr(struct adapter *);
202static void	igb_update_stats_counters(struct adapter *);
203static bool	igb_txeof(struct tx_ring *);
204
205static __inline	void igb_rx_discard(struct rx_ring *, int);
206static __inline void igb_rx_input(struct rx_ring *,
207		    struct ifnet *, struct mbuf *, u32);
208
209static bool	igb_rxeof(struct igb_queue *, int, int *);
210static void	igb_rx_checksum(u32, struct mbuf *, u32);
211static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
212static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
213static void	igb_set_promisc(struct adapter *);
214static void	igb_disable_promisc(struct adapter *);
215static void	igb_set_multi(struct adapter *);
216static void	igb_update_link_status(struct adapter *);
217static void	igb_refresh_mbufs(struct rx_ring *, int);
218
219static void	igb_register_vlan(void *, struct ifnet *, u16);
220static void	igb_unregister_vlan(void *, struct ifnet *, u16);
221static void	igb_setup_vlan_hw_support(struct adapter *);
222
223static int	igb_xmit(struct tx_ring *, struct mbuf **);
224static int	igb_dma_malloc(struct adapter *, bus_size_t,
225		    struct igb_dma_alloc *, int);
226static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
227static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
228static void	igb_print_nvm_info(struct adapter *);
229static int 	igb_is_valid_ether_addr(u8 *);
230static void     igb_add_hw_stats(struct adapter *);
231
232static void	igb_vf_init_stats(struct adapter *);
233static void	igb_update_vf_stats_counters(struct adapter *);
234
235/* Management and WOL Support */
236static void	igb_init_manageability(struct adapter *);
237static void	igb_release_manageability(struct adapter *);
238static void     igb_get_hw_control(struct adapter *);
239static void     igb_release_hw_control(struct adapter *);
240static void     igb_enable_wakeup(device_t);
241static void     igb_led_func(void *, int);
242
243static int	igb_irq_fast(void *);
244static void	igb_add_rx_process_limit(struct adapter *, const char *,
245		    const char *, int *, int);
246static void	igb_handle_que(void *context, int pending);
247static void	igb_handle_link(void *context, int pending);
248
249/* These are MSIX only irq handlers */
250static void	igb_msix_que(void *);
251static void	igb_msix_link(void *);
252
253#ifdef DEVICE_POLLING
254static poll_handler_t igb_poll;
255#endif /* POLLING */
256
257/*********************************************************************
258 *  FreeBSD Device Interface Entry Points
259 *********************************************************************/
260
261static device_method_t igb_methods[] = {
262	/* Device interface */
263	DEVMETHOD(device_probe, igb_probe),
264	DEVMETHOD(device_attach, igb_attach),
265	DEVMETHOD(device_detach, igb_detach),
266	DEVMETHOD(device_shutdown, igb_shutdown),
267	DEVMETHOD(device_suspend, igb_suspend),
268	DEVMETHOD(device_resume, igb_resume),
269	{0, 0}
270};
271
272static driver_t igb_driver = {
273	"igb", igb_methods, sizeof(struct adapter),
274};
275
276static devclass_t igb_devclass;
277DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
278MODULE_DEPEND(igb, pci, 1, 1, 1);
279MODULE_DEPEND(igb, ether, 1, 1, 1);
280
281/*********************************************************************
282 *  Tunable default values.
283 *********************************************************************/
284
285/* Descriptor defaults */
286static int igb_rxd = IGB_DEFAULT_RXD;
287static int igb_txd = IGB_DEFAULT_TXD;
288TUNABLE_INT("hw.igb.rxd", &igb_rxd);
289TUNABLE_INT("hw.igb.txd", &igb_txd);
290
291/*
292** AIM: Adaptive Interrupt Moderation
293** which means that the interrupt rate
294** is varied over time based on the
295** traffic for that interrupt vector
296*/
297static int igb_enable_aim = TRUE;
298TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
299
300/*
301 * MSIX should be the default for best performance,
302 * but this allows it to be forced off for testing.
303 */
304static int igb_enable_msix = 1;
305TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
306
307/*
308 * Header split has seemed to be beneficial in
309 * many circumstances tested, however there have
310 * been some stability issues, so the default is
311 * off.
312 */
313static bool igb_header_split = FALSE;
314TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
315
316/*
317** This will autoconfigure based on
318** the number of CPUs if left at 0.
319*/
320static int igb_num_queues = 0;
321TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
322
323/* How many packets rxeof tries to clean at a time */
324static int igb_rx_process_limit = 100;
325TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
326
327/* Flow control setting - default to FULL */
328static int igb_fc_setting = e1000_fc_full;
329TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
330
331/*
332** Shadow VFTA table, this is needed because
333** the real filter table gets cleared during
334** a soft reset and the driver needs to be able
335** to repopulate it.
336*/
337static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
338
339
340/*********************************************************************
341 *  Device identification routine
342 *
343 *  igb_probe determines if the driver should be loaded on
344 *  adapter based on PCI vendor/device id of the adapter.
345 *
346 *  return BUS_PROBE_DEFAULT on success, positive on failure
347 *********************************************************************/
348
349static int
350igb_probe(device_t dev)
351{
352	char		adapter_name[60];
353	uint16_t	pci_vendor_id = 0;
354	uint16_t	pci_device_id = 0;
355	uint16_t	pci_subvendor_id = 0;
356	uint16_t	pci_subdevice_id = 0;
357	igb_vendor_info_t *ent;
358
359	INIT_DEBUGOUT("igb_probe: begin");
360
361	pci_vendor_id = pci_get_vendor(dev);
362	if (pci_vendor_id != IGB_VENDOR_ID)
363		return (ENXIO);
364
365	pci_device_id = pci_get_device(dev);
366	pci_subvendor_id = pci_get_subvendor(dev);
367	pci_subdevice_id = pci_get_subdevice(dev);
368
369	ent = igb_vendor_info_array;
370	while (ent->vendor_id != 0) {
371		if ((pci_vendor_id == ent->vendor_id) &&
372		    (pci_device_id == ent->device_id) &&
373
374		    ((pci_subvendor_id == ent->subvendor_id) ||
375		    (ent->subvendor_id == PCI_ANY_ID)) &&
376
377		    ((pci_subdevice_id == ent->subdevice_id) ||
378		    (ent->subdevice_id == PCI_ANY_ID))) {
379			sprintf(adapter_name, "%s %s",
380				igb_strings[ent->index],
381				igb_driver_version);
382			device_set_desc_copy(dev, adapter_name);
383			return (BUS_PROBE_DEFAULT);
384		}
385		ent++;
386	}
387
388	return (ENXIO);
389}
390
391/*********************************************************************
392 *  Device initialization routine
393 *
394 *  The attach entry point is called when the driver is being loaded.
395 *  This routine identifies the type of hardware, allocates all resources
396 *  and initializes the hardware.
397 *
398 *  return 0 on success, positive on failure
399 *********************************************************************/
400
401static int
402igb_attach(device_t dev)
403{
404	struct adapter	*adapter;
405	int		error = 0;
406	u16		eeprom_data;
407
408	INIT_DEBUGOUT("igb_attach: begin");
409
410	adapter = device_get_softc(dev);
411	adapter->dev = adapter->osdep.dev = dev;
412	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
413
414	/* SYSCTL stuff */
415	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
416	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
417	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
418	    igb_sysctl_nvm_info, "I", "NVM Information");
419
420	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
421	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
422	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
423	    &igb_fc_setting, 0, "Flow Control");
424
425	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
426	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
427	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
428	    &igb_enable_aim, 1, "Interrupt Moderation");
429
430	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
431
432	/* Determine hardware and mac info */
433	igb_identify_hardware(adapter);
434
435	/* Setup PCI resources */
436	if (igb_allocate_pci_resources(adapter)) {
437		device_printf(dev, "Allocation of PCI resources failed\n");
438		error = ENXIO;
439		goto err_pci;
440	}
441
442	/* Do Shared Code initialization */
443	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
444		device_printf(dev, "Setup of Shared code failed\n");
445		error = ENXIO;
446		goto err_pci;
447	}
448
449	e1000_get_bus_info(&adapter->hw);
450
451	/* Sysctls for limiting the amount of work done in the taskqueue */
452	igb_add_rx_process_limit(adapter, "rx_processing_limit",
453	    "max number of rx packets to process", &adapter->rx_process_limit,
454	    igb_rx_process_limit);
455
456	/*
457	 * Validate number of transmit and receive descriptors. It
458	 * must not exceed hardware maximum, and must be multiple
459	 * of E1000_DBA_ALIGN.
460	 */
461	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
462	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
463		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
464		    IGB_DEFAULT_TXD, igb_txd);
465		adapter->num_tx_desc = IGB_DEFAULT_TXD;
466	} else
467		adapter->num_tx_desc = igb_txd;
468	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
469	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
470		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
471		    IGB_DEFAULT_RXD, igb_rxd);
472		adapter->num_rx_desc = IGB_DEFAULT_RXD;
473	} else
474		adapter->num_rx_desc = igb_rxd;
475
476	adapter->hw.mac.autoneg = DO_AUTO_NEG;
477	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
478	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
479
480	/* Copper options */
481	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
482		adapter->hw.phy.mdix = AUTO_ALL_MODES;
483		adapter->hw.phy.disable_polarity_correction = FALSE;
484		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
485	}
486
487	/*
488	 * Set the frame limits assuming
489	 * standard ethernet sized frames.
490	 */
491	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
492	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
493
494	/*
495	** Allocate and Setup Queues
496	*/
497	if (igb_allocate_queues(adapter)) {
498		error = ENOMEM;
499		goto err_pci;
500	}
501
502	/* Allocate the appropriate stats memory */
503	if (adapter->hw.mac.type == e1000_vfadapt) {
504		adapter->stats =
505		    (struct e1000_vf_stats *)malloc(sizeof \
506		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
507		igb_vf_init_stats(adapter);
508	} else
509		adapter->stats =
510		    (struct e1000_hw_stats *)malloc(sizeof \
511		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
512
513	/*
514	** Start from a known state, this is
515	** important in reading the nvm and
516	** mac from that.
517	*/
518	e1000_reset_hw(&adapter->hw);
519
520	/* Make sure we have a good EEPROM before we read from it */
521	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
522		/*
523		** Some PCI-E parts fail the first check due to
524		** the link being in sleep state, call it again,
525		** if it fails a second time its a real issue.
526		*/
527		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
528			device_printf(dev,
529			    "The EEPROM Checksum Is Not Valid\n");
530			error = EIO;
531			goto err_late;
532		}
533	}
534
535	/*
536	** Copy the permanent MAC address out of the EEPROM
537	*/
538	if (e1000_read_mac_addr(&adapter->hw) < 0) {
539		device_printf(dev, "EEPROM read error while reading MAC"
540		    " address\n");
541		error = EIO;
542		goto err_late;
543	}
544	/* Check its sanity */
545	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
546		device_printf(dev, "Invalid MAC address\n");
547		error = EIO;
548		goto err_late;
549	}
550
551	/*
552	** Configure Interrupts
553	*/
554	if ((adapter->msix > 1) && (igb_enable_msix))
555		error = igb_allocate_msix(adapter);
556	else /* MSI or Legacy */
557		error = igb_allocate_legacy(adapter);
558	if (error)
559		goto err_late;
560
561	/* Setup OS specific network interface */
562	igb_setup_interface(dev, adapter);
563
564	/* Now get a good starting state */
565	igb_reset(adapter);
566
567	/* Initialize statistics */
568	igb_update_stats_counters(adapter);
569
570	adapter->hw.mac.get_link_status = 1;
571	igb_update_link_status(adapter);
572
573	/* Indicate SOL/IDER usage */
574	if (e1000_check_reset_block(&adapter->hw))
575		device_printf(dev,
576		    "PHY reset is blocked due to SOL/IDER session.\n");
577
578	/* Determine if we have to control management hardware */
579	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
580
581	/*
582	 * Setup Wake-on-Lan
583	 */
584	/* APME bit in EEPROM is mapped to WUC.APME */
585	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
586	if (eeprom_data)
587		adapter->wol = E1000_WUFC_MAG;
588
589	/* Register for VLAN events */
590	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
591	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
592	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
593	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
594
595	igb_add_hw_stats(adapter);
596
597	/* Tell the stack that the interface is not active */
598	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
599
600	adapter->led_dev = led_create(igb_led_func, adapter,
601	    device_get_nameunit(dev));
602
603	INIT_DEBUGOUT("igb_attach: end");
604
605	return (0);
606
607err_late:
608	igb_free_transmit_structures(adapter);
609	igb_free_receive_structures(adapter);
610	igb_release_hw_control(adapter);
611err_pci:
612	igb_free_pci_resources(adapter);
613	IGB_CORE_LOCK_DESTROY(adapter);
614
615	return (error);
616}
617
618/*********************************************************************
619 *  Device removal routine
620 *
621 *  The detach entry point is called when the driver is being removed.
622 *  This routine stops the adapter and deallocates all the resources
623 *  that were allocated for driver operation.
624 *
625 *  return 0 on success, positive on failure
626 *********************************************************************/
627
628static int
629igb_detach(device_t dev)
630{
631	struct adapter	*adapter = device_get_softc(dev);
632	struct ifnet	*ifp = adapter->ifp;
633
634	INIT_DEBUGOUT("igb_detach: begin");
635
636	/* Make sure VLANS are not using driver */
637	if (adapter->ifp->if_vlantrunk != NULL) {
638		device_printf(dev,"Vlan in use, detach first\n");
639		return (EBUSY);
640	}
641
642	if (adapter->led_dev != NULL)
643		led_destroy(adapter->led_dev);
644
645#ifdef DEVICE_POLLING
646	if (ifp->if_capenable & IFCAP_POLLING)
647		ether_poll_deregister(ifp);
648#endif
649
650	IGB_CORE_LOCK(adapter);
651	adapter->in_detach = 1;
652	igb_stop(adapter);
653	IGB_CORE_UNLOCK(adapter);
654
655	e1000_phy_hw_reset(&adapter->hw);
656
657	/* Give control back to firmware */
658	igb_release_manageability(adapter);
659	igb_release_hw_control(adapter);
660
661	if (adapter->wol) {
662		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
663		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
664		igb_enable_wakeup(dev);
665	}
666
667	/* Unregister VLAN events */
668	if (adapter->vlan_attach != NULL)
669		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
670	if (adapter->vlan_detach != NULL)
671		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
672
673	ether_ifdetach(adapter->ifp);
674
675	callout_drain(&adapter->timer);
676
677	igb_free_pci_resources(adapter);
678	bus_generic_detach(dev);
679	if_free(ifp);
680
681	igb_free_transmit_structures(adapter);
682	igb_free_receive_structures(adapter);
683
684	IGB_CORE_LOCK_DESTROY(adapter);
685
686	return (0);
687}
688
689/*********************************************************************
690 *
691 *  Shutdown entry point
692 *
693 **********************************************************************/
694
695static int
696igb_shutdown(device_t dev)
697{
698	return igb_suspend(dev);
699}
700
701/*
702 * Suspend/resume device methods.
703 */
704static int
705igb_suspend(device_t dev)
706{
707	struct adapter *adapter = device_get_softc(dev);
708
709	IGB_CORE_LOCK(adapter);
710
711	igb_stop(adapter);
712
713        igb_release_manageability(adapter);
714	igb_release_hw_control(adapter);
715
716        if (adapter->wol) {
717                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
718                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
719                igb_enable_wakeup(dev);
720        }
721
722	IGB_CORE_UNLOCK(adapter);
723
724	return bus_generic_suspend(dev);
725}
726
727static int
728igb_resume(device_t dev)
729{
730	struct adapter *adapter = device_get_softc(dev);
731	struct ifnet *ifp = adapter->ifp;
732
733	IGB_CORE_LOCK(adapter);
734	igb_init_locked(adapter);
735	igb_init_manageability(adapter);
736
737	if ((ifp->if_flags & IFF_UP) &&
738	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
739		igb_start(ifp);
740
741	IGB_CORE_UNLOCK(adapter);
742
743	return bus_generic_resume(dev);
744}
745
746
747/*********************************************************************
748 *  Transmit entry point
749 *
750 *  igb_start is called by the stack to initiate a transmit.
751 *  The driver will remain in this routine as long as there are
752 *  packets to transmit and transmit resources are available.
753 *  In case resources are not available stack is notified and
754 *  the packet is requeued.
755 **********************************************************************/
756
757static void
758igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
759{
760	struct adapter	*adapter = ifp->if_softc;
761	struct mbuf	*m_head;
762
763	IGB_TX_LOCK_ASSERT(txr);
764
765	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
766	    IFF_DRV_RUNNING)
767		return;
768	if (!adapter->link_active)
769		return;
770
771	/* Call cleanup if number of TX descriptors low */
772	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
773		igb_txeof(txr);
774
775	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
776		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
777			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
778			break;
779		}
780		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
781		if (m_head == NULL)
782			break;
783		/*
784		 *  Encapsulation can modify our pointer, and or make it
785		 *  NULL on failure.  In that event, we can't requeue.
786		 */
787		if (igb_xmit(txr, &m_head)) {
788			if (m_head == NULL)
789				break;
790			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
791			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
792			break;
793		}
794
795		/* Send a copy of the frame to the BPF listener */
796		ETHER_BPF_MTAP(ifp, m_head);
797
798		/* Set watchdog on */
799		txr->watchdog_time = ticks;
800		txr->watchdog_check = TRUE;
801	}
802}
803
804/*
805 * Legacy TX driver routine, called from the
806 * stack, always uses tx[0], and spins for it.
807 * Should not be used with multiqueue tx
808 */
809static void
810igb_start(struct ifnet *ifp)
811{
812	struct adapter	*adapter = ifp->if_softc;
813	struct tx_ring	*txr = adapter->tx_rings;
814
815	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
816		IGB_TX_LOCK(txr);
817		igb_start_locked(txr, ifp);
818		IGB_TX_UNLOCK(txr);
819	}
820	return;
821}
822
823#if __FreeBSD_version >= 800000
824/*
825** Multiqueue Transmit driver
826**
827*/
828static int
829igb_mq_start(struct ifnet *ifp, struct mbuf *m)
830{
831	struct adapter		*adapter = ifp->if_softc;
832	struct igb_queue	*que;
833	struct tx_ring		*txr;
834	int 			i = 0, err = 0;
835
836	/* Which queue to use */
837	if ((m->m_flags & M_FLOWID) != 0)
838		i = m->m_pkthdr.flowid % adapter->num_queues;
839
840	txr = &adapter->tx_rings[i];
841	que = &adapter->queues[i];
842
843	if (IGB_TX_TRYLOCK(txr)) {
844		err = igb_mq_start_locked(ifp, txr, m);
845		IGB_TX_UNLOCK(txr);
846	} else {
847		err = drbr_enqueue(ifp, txr->br, m);
848		taskqueue_enqueue(que->tq, &que->que_task);
849	}
850
851	return (err);
852}
853
854static int
855igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
856{
857	struct adapter  *adapter = txr->adapter;
858        struct mbuf     *next;
859        int             err = 0, enq;
860
861	IGB_TX_LOCK_ASSERT(txr);
862
863	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
864	    IFF_DRV_RUNNING || adapter->link_active == 0) {
865		if (m != NULL)
866			err = drbr_enqueue(ifp, txr->br, m);
867		return (err);
868	}
869
870	/* Call cleanup if number of TX descriptors low */
871	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
872		igb_txeof(txr);
873
874	enq = 0;
875	if (m == NULL) {
876		next = drbr_dequeue(ifp, txr->br);
877	} else if (drbr_needs_enqueue(ifp, txr->br)) {
878		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
879			return (err);
880		next = drbr_dequeue(ifp, txr->br);
881	} else
882		next = m;
883
884	/* Process the queue */
885	while (next != NULL) {
886		if ((err = igb_xmit(txr, &next)) != 0) {
887			if (next != NULL)
888				err = drbr_enqueue(ifp, txr->br, next);
889			break;
890		}
891		enq++;
892		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
893		ETHER_BPF_MTAP(ifp, next);
894		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
895			break;
896		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
897			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
898			break;
899		}
900		next = drbr_dequeue(ifp, txr->br);
901	}
902	if (enq > 0) {
903		/* Set the watchdog */
904		txr->watchdog_check = TRUE;
905		txr->watchdog_time = ticks;
906	}
907	return (err);
908}
909
910/*
911** Flush all ring buffers
912*/
913static void
914igb_qflush(struct ifnet *ifp)
915{
916	struct adapter	*adapter = ifp->if_softc;
917	struct tx_ring	*txr = adapter->tx_rings;
918	struct mbuf	*m;
919
920	for (int i = 0; i < adapter->num_queues; i++, txr++) {
921		IGB_TX_LOCK(txr);
922		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
923			m_freem(m);
924		IGB_TX_UNLOCK(txr);
925	}
926	if_qflush(ifp);
927}
928#endif /* __FreeBSD_version >= 800000 */
929
930/*********************************************************************
931 *  Ioctl entry point
932 *
933 *  igb_ioctl is called when the user wants to configure the
934 *  interface.
935 *
936 *  return 0 on success, positive on failure
937 **********************************************************************/
938
939static int
940igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
941{
942	struct adapter	*adapter = ifp->if_softc;
943	struct ifreq *ifr = (struct ifreq *)data;
944#ifdef INET
945	struct ifaddr *ifa = (struct ifaddr *)data;
946#endif
947	int error = 0;
948
949	if (adapter->in_detach)
950		return (error);
951
952	switch (command) {
953	case SIOCSIFADDR:
954#ifdef INET
955		if (ifa->ifa_addr->sa_family == AF_INET) {
956			/*
957			 * XXX
958			 * Since resetting hardware takes a very long time
959			 * and results in link renegotiation we only
960			 * initialize the hardware only when it is absolutely
961			 * required.
962			 */
963			ifp->if_flags |= IFF_UP;
964			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
965				IGB_CORE_LOCK(adapter);
966				igb_init_locked(adapter);
967				IGB_CORE_UNLOCK(adapter);
968			}
969			if (!(ifp->if_flags & IFF_NOARP))
970				arp_ifinit(ifp, ifa);
971		} else
972#endif
973			error = ether_ioctl(ifp, command, data);
974		break;
975	case SIOCSIFMTU:
976	    {
977		int max_frame_size;
978
979		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
980
981		IGB_CORE_LOCK(adapter);
982		max_frame_size = 9234;
983		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
984		    ETHER_CRC_LEN) {
985			IGB_CORE_UNLOCK(adapter);
986			error = EINVAL;
987			break;
988		}
989
990		ifp->if_mtu = ifr->ifr_mtu;
991		adapter->max_frame_size =
992		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
993		igb_init_locked(adapter);
994		IGB_CORE_UNLOCK(adapter);
995		break;
996	    }
997	case SIOCSIFFLAGS:
998		IOCTL_DEBUGOUT("ioctl rcv'd:\
999		    SIOCSIFFLAGS (Set Interface Flags)");
1000		IGB_CORE_LOCK(adapter);
1001		if (ifp->if_flags & IFF_UP) {
1002			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1003				if ((ifp->if_flags ^ adapter->if_flags) &
1004				    (IFF_PROMISC | IFF_ALLMULTI)) {
1005					igb_disable_promisc(adapter);
1006					igb_set_promisc(adapter);
1007				}
1008			} else
1009				igb_init_locked(adapter);
1010		} else
1011			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1012				igb_stop(adapter);
1013		adapter->if_flags = ifp->if_flags;
1014		IGB_CORE_UNLOCK(adapter);
1015		break;
1016	case SIOCADDMULTI:
1017	case SIOCDELMULTI:
1018		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1019		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1020			IGB_CORE_LOCK(adapter);
1021			igb_disable_intr(adapter);
1022			igb_set_multi(adapter);
1023#ifdef DEVICE_POLLING
1024			if (!(ifp->if_capenable & IFCAP_POLLING))
1025#endif
1026				igb_enable_intr(adapter);
1027			IGB_CORE_UNLOCK(adapter);
1028		}
1029		break;
1030	case SIOCSIFMEDIA:
1031		/* Check SOL/IDER usage */
1032		IGB_CORE_LOCK(adapter);
1033		if (e1000_check_reset_block(&adapter->hw)) {
1034			IGB_CORE_UNLOCK(adapter);
1035			device_printf(adapter->dev, "Media change is"
1036			    " blocked due to SOL/IDER session.\n");
1037			break;
1038		}
1039		IGB_CORE_UNLOCK(adapter);
1040	case SIOCGIFMEDIA:
1041		IOCTL_DEBUGOUT("ioctl rcv'd: \
1042		    SIOCxIFMEDIA (Get/Set Interface Media)");
1043		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1044		break;
1045	case SIOCSIFCAP:
1046	    {
1047		int mask, reinit;
1048
1049		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1050		reinit = 0;
1051		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1052#ifdef DEVICE_POLLING
1053		if (mask & IFCAP_POLLING) {
1054			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1055				error = ether_poll_register(igb_poll, ifp);
1056				if (error)
1057					return (error);
1058				IGB_CORE_LOCK(adapter);
1059				igb_disable_intr(adapter);
1060				ifp->if_capenable |= IFCAP_POLLING;
1061				IGB_CORE_UNLOCK(adapter);
1062			} else {
1063				error = ether_poll_deregister(ifp);
1064				/* Enable interrupt even in error case */
1065				IGB_CORE_LOCK(adapter);
1066				igb_enable_intr(adapter);
1067				ifp->if_capenable &= ~IFCAP_POLLING;
1068				IGB_CORE_UNLOCK(adapter);
1069			}
1070		}
1071#endif
1072		if (mask & IFCAP_HWCSUM) {
1073			ifp->if_capenable ^= IFCAP_HWCSUM;
1074			reinit = 1;
1075		}
1076		if (mask & IFCAP_TSO4) {
1077			ifp->if_capenable ^= IFCAP_TSO4;
1078			reinit = 1;
1079		}
1080		if (mask & IFCAP_VLAN_HWTAGGING) {
1081			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1082			reinit = 1;
1083		}
1084		if (mask & IFCAP_VLAN_HWFILTER) {
1085			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1086			reinit = 1;
1087		}
1088		if (mask & IFCAP_LRO) {
1089			ifp->if_capenable ^= IFCAP_LRO;
1090			reinit = 1;
1091		}
1092		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1093			igb_init(adapter);
1094		VLAN_CAPABILITIES(ifp);
1095		break;
1096	    }
1097
1098	default:
1099		error = ether_ioctl(ifp, command, data);
1100		break;
1101	}
1102
1103	return (error);
1104}
1105
1106
1107/*********************************************************************
1108 *  Init entry point
1109 *
1110 *  This routine is used in two ways. It is used by the stack as
1111 *  init entry point in network interface structure. It is also used
1112 *  by the driver as a hw/sw initialization routine to get to a
1113 *  consistent state.
1114 *
1115 *  return 0 on success, positive on failure
1116 **********************************************************************/
1117
1118static void
1119igb_init_locked(struct adapter *adapter)
1120{
1121	struct ifnet	*ifp = adapter->ifp;
1122	device_t	dev = adapter->dev;
1123
1124	INIT_DEBUGOUT("igb_init: begin");
1125
1126	IGB_CORE_LOCK_ASSERT(adapter);
1127
1128	igb_disable_intr(adapter);
1129	callout_stop(&adapter->timer);
1130
1131	/* Get the latest mac address, User can use a LAA */
1132        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1133              ETHER_ADDR_LEN);
1134
1135	/* Put the address into the Receive Address Array */
1136	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1137
1138	igb_reset(adapter);
1139	igb_update_link_status(adapter);
1140
1141	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1142
1143        /* Use real VLAN Filter support? */
1144	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1145		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1146			/* Use real VLAN Filter support */
1147			igb_setup_vlan_hw_support(adapter);
1148		else {
1149			u32 ctrl;
1150			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1151			ctrl |= E1000_CTRL_VME;
1152			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1153		}
1154	}
1155
1156	/* Set hardware offload abilities */
1157	ifp->if_hwassist = 0;
1158	if (ifp->if_capenable & IFCAP_TXCSUM) {
1159		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1160#if __FreeBSD_version >= 800000
1161		if (adapter->hw.mac.type == e1000_82576)
1162			ifp->if_hwassist |= CSUM_SCTP;
1163#endif
1164	}
1165
1166	if (ifp->if_capenable & IFCAP_TSO4)
1167		ifp->if_hwassist |= CSUM_TSO;
1168
1169	/* Configure for OS presence */
1170	igb_init_manageability(adapter);
1171
1172	/* Prepare transmit descriptors and buffers */
1173	igb_setup_transmit_structures(adapter);
1174	igb_initialize_transmit_units(adapter);
1175
1176	/* Setup Multicast table */
1177	igb_set_multi(adapter);
1178
1179	/*
1180	** Figure out the desired mbuf pool
1181	** for doing jumbo/packetsplit
1182	*/
1183	if (ifp->if_mtu > ETHERMTU)
1184		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1185	else
1186		adapter->rx_mbuf_sz = MCLBYTES;
1187
1188	/* Prepare receive descriptors and buffers */
1189	if (igb_setup_receive_structures(adapter)) {
1190		device_printf(dev, "Could not setup receive structures\n");
1191		return;
1192	}
1193	igb_initialize_receive_units(adapter);
1194
1195	/* Don't lose promiscuous settings */
1196	igb_set_promisc(adapter);
1197
1198	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1199	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1200
1201	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1202	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1203
1204	if (adapter->msix > 1) /* Set up queue routing */
1205		igb_configure_queues(adapter);
1206
1207	/* Set up VLAN tag offload and filter */
1208	igb_setup_vlan_hw_support(adapter);
1209
1210	/* this clears any pending interrupts */
1211	E1000_READ_REG(&adapter->hw, E1000_ICR);
1212#ifdef DEVICE_POLLING
1213	/*
1214	 * Only enable interrupts if we are not polling, make sure
1215	 * they are off otherwise.
1216	 */
1217	if (ifp->if_capenable & IFCAP_POLLING)
1218		igb_disable_intr(adapter);
1219	else
1220#endif /* DEVICE_POLLING */
1221	{
1222	igb_enable_intr(adapter);
1223	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1224	}
1225
1226	/* Don't reset the phy next time init gets called */
1227	adapter->hw.phy.reset_disable = TRUE;
1228}
1229
1230static void
1231igb_init(void *arg)
1232{
1233	struct adapter *adapter = arg;
1234
1235	IGB_CORE_LOCK(adapter);
1236	igb_init_locked(adapter);
1237	IGB_CORE_UNLOCK(adapter);
1238}
1239
1240
1241static void
1242igb_handle_que(void *context, int pending)
1243{
1244	struct igb_queue *que = context;
1245	struct adapter *adapter = que->adapter;
1246	struct tx_ring *txr = que->txr;
1247	struct ifnet	*ifp = adapter->ifp;
1248
1249	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1250		bool	more;
1251
1252		more = igb_rxeof(que, -1, NULL);
1253
1254		IGB_TX_LOCK(txr);
1255		if (igb_txeof(txr))
1256			more = TRUE;
1257#if __FreeBSD_version >= 800000
1258		if (!drbr_empty(ifp, txr->br))
1259			igb_mq_start_locked(ifp, txr, NULL);
1260#else
1261		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1262			igb_start_locked(txr, ifp);
1263#endif
1264		IGB_TX_UNLOCK(txr);
1265		if (more) {
1266			taskqueue_enqueue(que->tq, &que->que_task);
1267			return;
1268		}
1269	}
1270
1271#ifdef DEVICE_POLLING
1272	if (ifp->if_capenable & IFCAP_POLLING)
1273		return;
1274#endif
1275	/* Reenable this interrupt */
1276	if (que->eims)
1277		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1278	else
1279		igb_enable_intr(adapter);
1280}
1281
1282/* Deal with link in a sleepable context */
1283static void
1284igb_handle_link(void *context, int pending)
1285{
1286	struct adapter *adapter = context;
1287
1288	adapter->hw.mac.get_link_status = 1;
1289	igb_update_link_status(adapter);
1290}
1291
1292/*********************************************************************
1293 *
1294 *  MSI/Legacy Deferred
1295 *  Interrupt Service routine
1296 *
1297 *********************************************************************/
1298static int
1299igb_irq_fast(void *arg)
1300{
1301	struct adapter		*adapter = arg;
1302	struct igb_queue	*que = adapter->queues;
1303	u32			reg_icr;
1304
1305
1306	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1307
1308	/* Hot eject?  */
1309	if (reg_icr == 0xffffffff)
1310		return FILTER_STRAY;
1311
1312	/* Definitely not our interrupt.  */
1313	if (reg_icr == 0x0)
1314		return FILTER_STRAY;
1315
1316	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1317		return FILTER_STRAY;
1318
1319	/*
1320	 * Mask interrupts until the taskqueue is finished running.  This is
1321	 * cheap, just assume that it is needed.  This also works around the
1322	 * MSI message reordering errata on certain systems.
1323	 */
1324	igb_disable_intr(adapter);
1325	taskqueue_enqueue(que->tq, &que->que_task);
1326
1327	/* Link status change */
1328	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1329		taskqueue_enqueue(que->tq, &adapter->link_task);
1330
1331	if (reg_icr & E1000_ICR_RXO)
1332		adapter->rx_overruns++;
1333	return FILTER_HANDLED;
1334}
1335
1336#ifdef DEVICE_POLLING
1337/*********************************************************************
1338 *
1339 *  Legacy polling routine : if using this code you MUST be sure that
1340 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1341 *
1342 *********************************************************************/
1343#if __FreeBSD_version >= 800000
1344#define POLL_RETURN_COUNT(a) (a)
1345static int
1346#else
1347#define POLL_RETURN_COUNT(a)
1348static void
1349#endif
1350igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1351{
1352	struct adapter		*adapter = ifp->if_softc;
1353	struct igb_queue	*que = adapter->queues;
1354	struct tx_ring		*txr = adapter->tx_rings;
1355	u32			reg_icr, rx_done = 0;
1356	u32			loop = IGB_MAX_LOOP;
1357	bool			more;
1358
1359	IGB_CORE_LOCK(adapter);
1360	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1361		IGB_CORE_UNLOCK(adapter);
1362		return POLL_RETURN_COUNT(rx_done);
1363	}
1364
1365	if (cmd == POLL_AND_CHECK_STATUS) {
1366		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1367		/* Link status change */
1368		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1369			igb_handle_link(adapter, 0);
1370
1371		if (reg_icr & E1000_ICR_RXO)
1372			adapter->rx_overruns++;
1373	}
1374	IGB_CORE_UNLOCK(adapter);
1375
1376	igb_rxeof(que, count, &rx_done);
1377
1378	IGB_TX_LOCK(txr);
1379	do {
1380		more = igb_txeof(txr);
1381	} while (loop-- && more);
1382#if __FreeBSD_version >= 800000
1383	if (!drbr_empty(ifp, txr->br))
1384		igb_mq_start_locked(ifp, txr, NULL);
1385#else
1386	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1387		igb_start_locked(txr, ifp);
1388#endif
1389	IGB_TX_UNLOCK(txr);
1390	return POLL_RETURN_COUNT(rx_done);
1391}
1392#endif /* DEVICE_POLLING */
1393
1394/*********************************************************************
1395 *
1396 *  MSIX TX Interrupt Service routine
1397 *
1398 **********************************************************************/
1399static void
1400igb_msix_que(void *arg)
1401{
1402	struct igb_queue *que = arg;
1403	struct adapter *adapter = que->adapter;
1404	struct tx_ring *txr = que->txr;
1405	struct rx_ring *rxr = que->rxr;
1406	u32		newitr = 0;
1407	bool		more_tx, more_rx;
1408
1409	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1410	++que->irqs;
1411
1412	IGB_TX_LOCK(txr);
1413	more_tx = igb_txeof(txr);
1414	IGB_TX_UNLOCK(txr);
1415
1416	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1417
1418	if (igb_enable_aim == FALSE)
1419		goto no_calc;
1420	/*
1421	** Do Adaptive Interrupt Moderation:
1422        **  - Write out last calculated setting
1423	**  - Calculate based on average size over
1424	**    the last interval.
1425	*/
1426        if (que->eitr_setting)
1427                E1000_WRITE_REG(&adapter->hw,
1428                    E1000_EITR(que->msix), que->eitr_setting);
1429
1430        que->eitr_setting = 0;
1431
1432        /* Idle, do nothing */
1433        if ((txr->bytes == 0) && (rxr->bytes == 0))
1434                goto no_calc;
1435
1436        /* Used half Default if sub-gig */
1437        if (adapter->link_speed != 1000)
1438                newitr = IGB_DEFAULT_ITR / 2;
1439        else {
1440		if ((txr->bytes) && (txr->packets))
1441                	newitr = txr->bytes/txr->packets;
1442		if ((rxr->bytes) && (rxr->packets))
1443			newitr = max(newitr,
1444			    (rxr->bytes / rxr->packets));
1445                newitr += 24; /* account for hardware frame, crc */
1446		/* set an upper boundary */
1447		newitr = min(newitr, 3000);
1448		/* Be nice to the mid range */
1449                if ((newitr > 300) && (newitr < 1200))
1450                        newitr = (newitr / 3);
1451                else
1452                        newitr = (newitr / 2);
1453        }
1454        newitr &= 0x7FFC;  /* Mask invalid bits */
1455        if (adapter->hw.mac.type == e1000_82575)
1456                newitr |= newitr << 16;
1457        else
1458                newitr |= E1000_EITR_CNT_IGNR;
1459
1460        /* save for next interrupt */
1461        que->eitr_setting = newitr;
1462
1463        /* Reset state */
1464        txr->bytes = 0;
1465        txr->packets = 0;
1466        rxr->bytes = 0;
1467        rxr->packets = 0;
1468
1469no_calc:
1470	/* Schedule a clean task if needed*/
1471	if (more_tx || more_rx)
1472		taskqueue_enqueue(que->tq, &que->que_task);
1473	else
1474		/* Reenable this interrupt */
1475		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1476	return;
1477}
1478
1479
1480/*********************************************************************
1481 *
1482 *  MSIX Link Interrupt Service routine
1483 *
1484 **********************************************************************/
1485
1486static void
1487igb_msix_link(void *arg)
1488{
1489	struct adapter	*adapter = arg;
1490	u32       	icr;
1491
1492	++adapter->link_irq;
1493	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1494	if (!(icr & E1000_ICR_LSC))
1495		goto spurious;
1496	igb_handle_link(adapter, 0);
1497
1498spurious:
1499	/* Rearm */
1500	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1501	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1502	return;
1503}
1504
1505
1506/*********************************************************************
1507 *
1508 *  Media Ioctl callback
1509 *
1510 *  This routine is called whenever the user queries the status of
1511 *  the interface using ifconfig.
1512 *
1513 **********************************************************************/
1514static void
1515igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1516{
1517	struct adapter *adapter = ifp->if_softc;
1518	u_char fiber_type = IFM_1000_SX;
1519
1520	INIT_DEBUGOUT("igb_media_status: begin");
1521
1522	IGB_CORE_LOCK(adapter);
1523	igb_update_link_status(adapter);
1524
1525	ifmr->ifm_status = IFM_AVALID;
1526	ifmr->ifm_active = IFM_ETHER;
1527
1528	if (!adapter->link_active) {
1529		IGB_CORE_UNLOCK(adapter);
1530		return;
1531	}
1532
1533	ifmr->ifm_status |= IFM_ACTIVE;
1534
1535	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1536	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1537		ifmr->ifm_active |= fiber_type | IFM_FDX;
1538	else {
1539		switch (adapter->link_speed) {
1540		case 10:
1541			ifmr->ifm_active |= IFM_10_T;
1542			break;
1543		case 100:
1544			ifmr->ifm_active |= IFM_100_TX;
1545			break;
1546		case 1000:
1547			ifmr->ifm_active |= IFM_1000_T;
1548			break;
1549		}
1550		if (adapter->link_duplex == FULL_DUPLEX)
1551			ifmr->ifm_active |= IFM_FDX;
1552		else
1553			ifmr->ifm_active |= IFM_HDX;
1554	}
1555	IGB_CORE_UNLOCK(adapter);
1556}
1557
1558/*********************************************************************
1559 *
1560 *  Media Ioctl callback
1561 *
1562 *  This routine is called when the user changes speed/duplex using
1563 *  media/mediopt option with ifconfig.
1564 *
1565 **********************************************************************/
1566static int
1567igb_media_change(struct ifnet *ifp)
1568{
1569	struct adapter *adapter = ifp->if_softc;
1570	struct ifmedia  *ifm = &adapter->media;
1571
1572	INIT_DEBUGOUT("igb_media_change: begin");
1573
1574	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1575		return (EINVAL);
1576
1577	IGB_CORE_LOCK(adapter);
1578	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1579	case IFM_AUTO:
1580		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1581		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1582		break;
1583	case IFM_1000_LX:
1584	case IFM_1000_SX:
1585	case IFM_1000_T:
1586		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1587		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1588		break;
1589	case IFM_100_TX:
1590		adapter->hw.mac.autoneg = FALSE;
1591		adapter->hw.phy.autoneg_advertised = 0;
1592		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1593			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1594		else
1595			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1596		break;
1597	case IFM_10_T:
1598		adapter->hw.mac.autoneg = FALSE;
1599		adapter->hw.phy.autoneg_advertised = 0;
1600		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1601			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1602		else
1603			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1604		break;
1605	default:
1606		device_printf(adapter->dev, "Unsupported media type\n");
1607	}
1608
1609	/* As the speed/duplex settings my have changed we need to
1610	 * reset the PHY.
1611	 */
1612	adapter->hw.phy.reset_disable = FALSE;
1613
1614	igb_init_locked(adapter);
1615	IGB_CORE_UNLOCK(adapter);
1616
1617	return (0);
1618}
1619
1620
1621/*********************************************************************
1622 *
1623 *  This routine maps the mbufs to Advanced TX descriptors.
1624 *  used by the 82575 adapter.
1625 *
1626 **********************************************************************/
1627
1628static int
1629igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1630{
1631	struct adapter		*adapter = txr->adapter;
1632	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1633	bus_dmamap_t		map;
1634	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1635	union e1000_adv_tx_desc	*txd = NULL;
1636	struct mbuf		*m_head;
1637	u32			olinfo_status = 0, cmd_type_len = 0;
1638	int			nsegs, i, j, error, first, last = 0;
1639	u32			hdrlen = 0;
1640
1641	m_head = *m_headp;
1642
1643
1644	/* Set basic descriptor constants */
1645	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1646	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1647	if (m_head->m_flags & M_VLANTAG)
1648		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1649
1650        /*
1651         * Force a cleanup if number of TX descriptors
1652         * available hits the threshold
1653         */
1654	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1655		igb_txeof(txr);
1656		/* Now do we at least have a minimal? */
1657		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1658			txr->no_desc_avail++;
1659			return (ENOBUFS);
1660		}
1661	}
1662
1663	/*
1664         * Map the packet for DMA.
1665	 *
1666	 * Capture the first descriptor index,
1667	 * this descriptor will have the index
1668	 * of the EOP which is the only one that
1669	 * now gets a DONE bit writeback.
1670	 */
1671	first = txr->next_avail_desc;
1672	tx_buffer = &txr->tx_buffers[first];
1673	tx_buffer_mapped = tx_buffer;
1674	map = tx_buffer->map;
1675
1676	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1677	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1678
1679	if (error == EFBIG) {
1680		struct mbuf *m;
1681
1682		m = m_defrag(*m_headp, M_DONTWAIT);
1683		if (m == NULL) {
1684			adapter->mbuf_defrag_failed++;
1685			m_freem(*m_headp);
1686			*m_headp = NULL;
1687			return (ENOBUFS);
1688		}
1689		*m_headp = m;
1690
1691		/* Try it again */
1692		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1693		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1694
1695		if (error == ENOMEM) {
1696			adapter->no_tx_dma_setup++;
1697			return (error);
1698		} else if (error != 0) {
1699			adapter->no_tx_dma_setup++;
1700			m_freem(*m_headp);
1701			*m_headp = NULL;
1702			return (error);
1703		}
1704	} else if (error == ENOMEM) {
1705		adapter->no_tx_dma_setup++;
1706		return (error);
1707	} else if (error != 0) {
1708		adapter->no_tx_dma_setup++;
1709		m_freem(*m_headp);
1710		*m_headp = NULL;
1711		return (error);
1712	}
1713
1714	/* Check again to be sure we have enough descriptors */
1715        if (nsegs > (txr->tx_avail - 2)) {
1716                txr->no_desc_avail++;
1717		bus_dmamap_unload(txr->txtag, map);
1718		return (ENOBUFS);
1719        }
1720	m_head = *m_headp;
1721
1722        /*
1723         * Set up the context descriptor:
1724         * used when any hardware offload is done.
1725	 * This includes CSUM, VLAN, and TSO. It
1726	 * will use the first descriptor.
1727         */
1728        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1729		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1730			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1731			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1732			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1733		} else
1734			return (ENXIO);
1735	} else if (igb_tx_ctx_setup(txr, m_head))
1736		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1737
1738	/* Calculate payload length */
1739	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1740	    << E1000_ADVTXD_PAYLEN_SHIFT);
1741
1742	/* 82575 needs the queue index added */
1743	if (adapter->hw.mac.type == e1000_82575)
1744		olinfo_status |= txr->me << 4;
1745
1746	/* Set up our transmit descriptors */
1747	i = txr->next_avail_desc;
1748	for (j = 0; j < nsegs; j++) {
1749		bus_size_t seg_len;
1750		bus_addr_t seg_addr;
1751
1752		tx_buffer = &txr->tx_buffers[i];
1753		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1754		seg_addr = segs[j].ds_addr;
1755		seg_len  = segs[j].ds_len;
1756
1757		txd->read.buffer_addr = htole64(seg_addr);
1758		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1759		txd->read.olinfo_status = htole32(olinfo_status);
1760		last = i;
1761		if (++i == adapter->num_tx_desc)
1762			i = 0;
1763		tx_buffer->m_head = NULL;
1764		tx_buffer->next_eop = -1;
1765	}
1766
1767	txr->next_avail_desc = i;
1768	txr->tx_avail -= nsegs;
1769
1770        tx_buffer->m_head = m_head;
1771	tx_buffer_mapped->map = tx_buffer->map;
1772	tx_buffer->map = map;
1773        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1774
1775        /*
1776         * Last Descriptor of Packet
1777	 * needs End Of Packet (EOP)
1778	 * and Report Status (RS)
1779         */
1780        txd->read.cmd_type_len |=
1781	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1782	/*
1783	 * Keep track in the first buffer which
1784	 * descriptor will be written back
1785	 */
1786	tx_buffer = &txr->tx_buffers[first];
1787	tx_buffer->next_eop = last;
1788	txr->watchdog_time = ticks;
1789
1790	/*
1791	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1792	 * that this frame is available to transmit.
1793	 */
1794	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1795	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1796	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1797	++txr->tx_packets;
1798
1799	return (0);
1800
1801}
1802
1803static void
1804igb_set_promisc(struct adapter *adapter)
1805{
1806	struct ifnet	*ifp = adapter->ifp;
1807	struct e1000_hw *hw = &adapter->hw;
1808	u32		reg;
1809
1810	if (hw->mac.type == e1000_vfadapt) {
1811		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1812		return;
1813	}
1814
1815	reg = E1000_READ_REG(hw, E1000_RCTL);
1816	if (ifp->if_flags & IFF_PROMISC) {
1817		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1818		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1819	} else if (ifp->if_flags & IFF_ALLMULTI) {
1820		reg |= E1000_RCTL_MPE;
1821		reg &= ~E1000_RCTL_UPE;
1822		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1823	}
1824}
1825
1826static void
1827igb_disable_promisc(struct adapter *adapter)
1828{
1829	struct e1000_hw *hw = &adapter->hw;
1830	u32		reg;
1831
1832	if (hw->mac.type == e1000_vfadapt) {
1833		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1834		return;
1835	}
1836	reg = E1000_READ_REG(hw, E1000_RCTL);
1837	reg &=  (~E1000_RCTL_UPE);
1838	reg &=  (~E1000_RCTL_MPE);
1839	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1840}
1841
1842
1843/*********************************************************************
1844 *  Multicast Update
1845 *
1846 *  This routine is called whenever multicast address list is updated.
1847 *
1848 **********************************************************************/
1849
1850static void
1851igb_set_multi(struct adapter *adapter)
1852{
1853	struct ifnet	*ifp = adapter->ifp;
1854	struct ifmultiaddr *ifma;
1855	u32 reg_rctl = 0;
1856	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1857
1858	int mcnt = 0;
1859
1860	IOCTL_DEBUGOUT("igb_set_multi: begin");
1861
1862#if __FreeBSD_version < 800000
1863	IF_ADDR_LOCK(ifp);
1864#else
1865	if_maddr_rlock(ifp);
1866#endif
1867	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1868		if (ifma->ifma_addr->sa_family != AF_LINK)
1869			continue;
1870
1871		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1872			break;
1873
1874		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1875		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1876		mcnt++;
1877	}
1878#if __FreeBSD_version < 800000
1879	IF_ADDR_UNLOCK(ifp);
1880#else
1881	if_maddr_runlock(ifp);
1882#endif
1883
1884	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1885		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1886		reg_rctl |= E1000_RCTL_MPE;
1887		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1888	} else
1889		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1890}
1891
1892
1893/*********************************************************************
1894 *  Timer routine:
1895 *  	This routine checks for link status,
1896 *	updates statistics, and does the watchdog.
1897 *
1898 **********************************************************************/
1899
1900static void
1901igb_local_timer(void *arg)
1902{
1903	struct adapter		*adapter = arg;
1904	device_t		dev = adapter->dev;
1905	struct tx_ring		*txr = adapter->tx_rings;
1906
1907
1908	IGB_CORE_LOCK_ASSERT(adapter);
1909
1910	igb_update_link_status(adapter);
1911	igb_update_stats_counters(adapter);
1912
1913        /*
1914        ** Watchdog: check for time since any descriptor was cleaned
1915        */
1916	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1917		if (txr->watchdog_check == FALSE)
1918			continue;
1919		if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1920			goto timeout;
1921	}
1922
1923	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1924	return;
1925
1926timeout:
1927	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1928	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1929            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1930            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1931	device_printf(dev,"TX(%d) desc avail = %d,"
1932            "Next TX to Clean = %d\n",
1933            txr->me, txr->tx_avail, txr->next_to_clean);
1934	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1935	adapter->watchdog_events++;
1936	igb_init_locked(adapter);
1937}
1938
1939static void
1940igb_update_link_status(struct adapter *adapter)
1941{
1942	struct e1000_hw *hw = &adapter->hw;
1943	struct ifnet *ifp = adapter->ifp;
1944	device_t dev = adapter->dev;
1945	struct tx_ring *txr = adapter->tx_rings;
1946	u32 link_check = 0;
1947
1948	/* Get the cached link value or read for real */
1949        switch (hw->phy.media_type) {
1950        case e1000_media_type_copper:
1951                if (hw->mac.get_link_status) {
1952			/* Do the work to read phy */
1953                        e1000_check_for_link(hw);
1954                        link_check = !hw->mac.get_link_status;
1955                } else
1956                        link_check = TRUE;
1957                break;
1958        case e1000_media_type_fiber:
1959                e1000_check_for_link(hw);
1960                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1961                                 E1000_STATUS_LU);
1962                break;
1963        case e1000_media_type_internal_serdes:
1964                e1000_check_for_link(hw);
1965                link_check = adapter->hw.mac.serdes_has_link;
1966                break;
1967	/* VF device is type_unknown */
1968        case e1000_media_type_unknown:
1969                e1000_check_for_link(hw);
1970		link_check = !hw->mac.get_link_status;
1971		/* Fall thru */
1972        default:
1973                break;
1974        }
1975
1976	/* Now we check if a transition has happened */
1977	if (link_check && (adapter->link_active == 0)) {
1978		e1000_get_speed_and_duplex(&adapter->hw,
1979		    &adapter->link_speed, &adapter->link_duplex);
1980		if (bootverbose)
1981			device_printf(dev, "Link is up %d Mbps %s\n",
1982			    adapter->link_speed,
1983			    ((adapter->link_duplex == FULL_DUPLEX) ?
1984			    "Full Duplex" : "Half Duplex"));
1985		adapter->link_active = 1;
1986		ifp->if_baudrate = adapter->link_speed * 1000000;
1987		/* This can sleep */
1988		if_link_state_change(ifp, LINK_STATE_UP);
1989	} else if (!link_check && (adapter->link_active == 1)) {
1990		ifp->if_baudrate = adapter->link_speed = 0;
1991		adapter->link_duplex = 0;
1992		if (bootverbose)
1993			device_printf(dev, "Link is Down\n");
1994		adapter->link_active = 0;
1995		/* This can sleep */
1996		if_link_state_change(ifp, LINK_STATE_DOWN);
1997		/* Turn off watchdogs */
1998		for (int i = 0; i < adapter->num_queues; i++, txr++)
1999			txr->watchdog_check = FALSE;
2000	}
2001}
2002
2003/*********************************************************************
2004 *
2005 *  This routine disables all traffic on the adapter by issuing a
2006 *  global reset on the MAC and deallocates TX/RX buffers.
2007 *
2008 **********************************************************************/
2009
2010static void
2011igb_stop(void *arg)
2012{
2013	struct adapter	*adapter = arg;
2014	struct ifnet	*ifp = adapter->ifp;
2015	struct tx_ring *txr = adapter->tx_rings;
2016
2017	IGB_CORE_LOCK_ASSERT(adapter);
2018
2019	INIT_DEBUGOUT("igb_stop: begin");
2020
2021	igb_disable_intr(adapter);
2022
2023	callout_stop(&adapter->timer);
2024
2025	/* Tell the stack that the interface is no longer active */
2026	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2027
2028	/* Unarm watchdog timer. */
2029	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2030		IGB_TX_LOCK(txr);
2031		txr->watchdog_check = FALSE;
2032		IGB_TX_UNLOCK(txr);
2033	}
2034
2035	e1000_reset_hw(&adapter->hw);
2036	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2037
2038	e1000_led_off(&adapter->hw);
2039	e1000_cleanup_led(&adapter->hw);
2040}
2041
2042
2043/*********************************************************************
2044 *
2045 *  Determine hardware revision.
2046 *
2047 **********************************************************************/
2048static void
2049igb_identify_hardware(struct adapter *adapter)
2050{
2051	device_t dev = adapter->dev;
2052
2053	/* Make sure our PCI config space has the necessary stuff set */
2054	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2055	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2056	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2057		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2058		    "bits were not set!\n");
2059		adapter->hw.bus.pci_cmd_word |=
2060		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2061		pci_write_config(dev, PCIR_COMMAND,
2062		    adapter->hw.bus.pci_cmd_word, 2);
2063	}
2064
2065	/* Save off the information about this board */
2066	adapter->hw.vendor_id = pci_get_vendor(dev);
2067	adapter->hw.device_id = pci_get_device(dev);
2068	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2069	adapter->hw.subsystem_vendor_id =
2070	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2071	adapter->hw.subsystem_device_id =
2072	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2073}
2074
2075static int
2076igb_allocate_pci_resources(struct adapter *adapter)
2077{
2078	device_t	dev = adapter->dev;
2079	int		rid;
2080
2081	rid = PCIR_BAR(0);
2082	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2083	    &rid, RF_ACTIVE);
2084	if (adapter->pci_mem == NULL) {
2085		device_printf(dev, "Unable to allocate bus resource: memory\n");
2086		return (ENXIO);
2087	}
2088	adapter->osdep.mem_bus_space_tag =
2089	    rman_get_bustag(adapter->pci_mem);
2090	adapter->osdep.mem_bus_space_handle =
2091	    rman_get_bushandle(adapter->pci_mem);
2092	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2093
2094	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2095
2096	/* This will setup either MSI/X or MSI */
2097	adapter->msix = igb_setup_msix(adapter);
2098	adapter->hw.back = &adapter->osdep;
2099
2100	return (0);
2101}
2102
2103/*********************************************************************
2104 *
2105 *  Setup the Legacy or MSI Interrupt handler
2106 *
2107 **********************************************************************/
2108static int
2109igb_allocate_legacy(struct adapter *adapter)
2110{
2111	device_t		dev = adapter->dev;
2112	struct igb_queue	*que = adapter->queues;
2113	int			error, rid = 0;
2114
2115	/* Turn off all interrupts */
2116	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2117
2118	/* MSI RID is 1 */
2119	if (adapter->msix == 1)
2120		rid = 1;
2121
2122	/* We allocate a single interrupt resource */
2123	adapter->res = bus_alloc_resource_any(dev,
2124	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2125	if (adapter->res == NULL) {
2126		device_printf(dev, "Unable to allocate bus resource: "
2127		    "interrupt\n");
2128		return (ENXIO);
2129	}
2130
2131	/*
2132	 * Try allocating a fast interrupt and the associated deferred
2133	 * processing contexts.
2134	 */
2135	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2136	/* Make tasklet for deferred link handling */
2137	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2138	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2139	    taskqueue_thread_enqueue, &que->tq);
2140	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2141	    device_get_nameunit(adapter->dev));
2142	if ((error = bus_setup_intr(dev, adapter->res,
2143	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2144	    adapter, &adapter->tag)) != 0) {
2145		device_printf(dev, "Failed to register fast interrupt "
2146			    "handler: %d\n", error);
2147		taskqueue_free(que->tq);
2148		que->tq = NULL;
2149		return (error);
2150	}
2151
2152	return (0);
2153}
2154
2155
2156/*********************************************************************
2157 *
2158 *  Setup the MSIX Queue Interrupt handlers:
2159 *
2160 **********************************************************************/
2161static int
2162igb_allocate_msix(struct adapter *adapter)
2163{
2164	device_t		dev = adapter->dev;
2165	struct igb_queue	*que = adapter->queues;
2166	int			error, rid, vector = 0;
2167
2168
2169	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2170		rid = vector +1;
2171		que->res = bus_alloc_resource_any(dev,
2172		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2173		if (que->res == NULL) {
2174			device_printf(dev,
2175			    "Unable to allocate bus resource: "
2176			    "MSIX Queue Interrupt\n");
2177			return (ENXIO);
2178		}
2179		error = bus_setup_intr(dev, que->res,
2180	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2181		    igb_msix_que, que, &que->tag);
2182		if (error) {
2183			que->res = NULL;
2184			device_printf(dev, "Failed to register Queue handler");
2185			return (error);
2186		}
2187#if __FreeBSD_version >= 800504
2188		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2189#endif
2190		que->msix = vector;
2191		if (adapter->hw.mac.type == e1000_82575)
2192			que->eims = E1000_EICR_TX_QUEUE0 << i;
2193		else
2194			que->eims = 1 << vector;
2195		/*
2196		** Bind the msix vector, and thus the
2197		** rings to the corresponding cpu.
2198		*/
2199		if (adapter->num_queues > 1)
2200			bus_bind_intr(dev, que->res, i);
2201		/* Make tasklet for deferred handling */
2202		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2203		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2204		    taskqueue_thread_enqueue, &que->tq);
2205		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2206		    device_get_nameunit(adapter->dev));
2207	}
2208
2209	/* And Link */
2210	rid = vector + 1;
2211	adapter->res = bus_alloc_resource_any(dev,
2212	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2213	if (adapter->res == NULL) {
2214		device_printf(dev,
2215		    "Unable to allocate bus resource: "
2216		    "MSIX Link Interrupt\n");
2217		return (ENXIO);
2218	}
2219	if ((error = bus_setup_intr(dev, adapter->res,
2220	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2221	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2222		device_printf(dev, "Failed to register Link handler");
2223		return (error);
2224	}
2225#if __FreeBSD_version >= 800504
2226	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2227#endif
2228	adapter->linkvec = vector;
2229
2230	return (0);
2231}
2232
2233
2234static void
2235igb_configure_queues(struct adapter *adapter)
2236{
2237	struct	e1000_hw	*hw = &adapter->hw;
2238	struct	igb_queue	*que;
2239	u32			tmp, ivar = 0;
2240	u32			newitr = IGB_DEFAULT_ITR;
2241
2242	/* First turn on RSS capability */
2243	if (adapter->hw.mac.type > e1000_82575)
2244		E1000_WRITE_REG(hw, E1000_GPIE,
2245		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2246		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2247
2248	/* Turn on MSIX */
2249	switch (adapter->hw.mac.type) {
2250	case e1000_82580:
2251	case e1000_vfadapt:
2252		/* RX entries */
2253		for (int i = 0; i < adapter->num_queues; i++) {
2254			u32 index = i >> 1;
2255			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2256			que = &adapter->queues[i];
2257			if (i & 1) {
2258				ivar &= 0xFF00FFFF;
2259				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2260			} else {
2261				ivar &= 0xFFFFFF00;
2262				ivar |= que->msix | E1000_IVAR_VALID;
2263			}
2264			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2265		}
2266		/* TX entries */
2267		for (int i = 0; i < adapter->num_queues; i++) {
2268			u32 index = i >> 1;
2269			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2270			que = &adapter->queues[i];
2271			if (i & 1) {
2272				ivar &= 0x00FFFFFF;
2273				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2274			} else {
2275				ivar &= 0xFFFF00FF;
2276				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2277			}
2278			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2279			adapter->eims_mask |= que->eims;
2280		}
2281
2282		/* And for the link interrupt */
2283		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2284		adapter->link_mask = 1 << adapter->linkvec;
2285		adapter->eims_mask |= adapter->link_mask;
2286		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2287		break;
2288	case e1000_82576:
2289		/* RX entries */
2290		for (int i = 0; i < adapter->num_queues; i++) {
2291			u32 index = i & 0x7; /* Each IVAR has two entries */
2292			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2293			que = &adapter->queues[i];
2294			if (i < 8) {
2295				ivar &= 0xFFFFFF00;
2296				ivar |= que->msix | E1000_IVAR_VALID;
2297			} else {
2298				ivar &= 0xFF00FFFF;
2299				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2300			}
2301			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2302			adapter->eims_mask |= que->eims;
2303		}
2304		/* TX entries */
2305		for (int i = 0; i < adapter->num_queues; i++) {
2306			u32 index = i & 0x7; /* Each IVAR has two entries */
2307			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2308			que = &adapter->queues[i];
2309			if (i < 8) {
2310				ivar &= 0xFFFF00FF;
2311				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2312			} else {
2313				ivar &= 0x00FFFFFF;
2314				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2315			}
2316			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2317			adapter->eims_mask |= que->eims;
2318		}
2319
2320		/* And for the link interrupt */
2321		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2322		adapter->link_mask = 1 << adapter->linkvec;
2323		adapter->eims_mask |= adapter->link_mask;
2324		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2325		break;
2326
2327	case e1000_82575:
2328                /* enable MSI-X support*/
2329		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2330                tmp |= E1000_CTRL_EXT_PBA_CLR;
2331                /* Auto-Mask interrupts upon ICR read. */
2332                tmp |= E1000_CTRL_EXT_EIAME;
2333                tmp |= E1000_CTRL_EXT_IRCA;
2334                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2335
2336		/* Queues */
2337		for (int i = 0; i < adapter->num_queues; i++) {
2338			que = &adapter->queues[i];
2339			tmp = E1000_EICR_RX_QUEUE0 << i;
2340			tmp |= E1000_EICR_TX_QUEUE0 << i;
2341			que->eims = tmp;
2342			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2343			    i, que->eims);
2344			adapter->eims_mask |= que->eims;
2345		}
2346
2347		/* Link */
2348		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2349		    E1000_EIMS_OTHER);
2350		adapter->link_mask |= E1000_EIMS_OTHER;
2351		adapter->eims_mask |= adapter->link_mask;
2352	default:
2353		break;
2354	}
2355
2356	/* Set the starting interrupt rate */
2357        if (hw->mac.type == e1000_82575)
2358                newitr |= newitr << 16;
2359        else
2360                newitr |= E1000_EITR_CNT_IGNR;
2361
2362	for (int i = 0; i < adapter->num_queues; i++) {
2363		que = &adapter->queues[i];
2364		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2365	}
2366
2367	return;
2368}
2369
2370
2371static void
2372igb_free_pci_resources(struct adapter *adapter)
2373{
2374	struct		igb_queue *que = adapter->queues;
2375	device_t	dev = adapter->dev;
2376	int		rid;
2377
2378	/*
2379	** There is a slight possibility of a failure mode
2380	** in attach that will result in entering this function
2381	** before interrupt resources have been initialized, and
2382	** in that case we do not want to execute the loops below
2383	** We can detect this reliably by the state of the adapter
2384	** res pointer.
2385	*/
2386	if (adapter->res == NULL)
2387		goto mem;
2388
2389	/*
2390	 * First release all the interrupt resources:
2391	 */
2392	for (int i = 0; i < adapter->num_queues; i++, que++) {
2393		rid = que->msix + 1;
2394		if (que->tag != NULL) {
2395			bus_teardown_intr(dev, que->res, que->tag);
2396			que->tag = NULL;
2397		}
2398		if (que->res != NULL)
2399			bus_release_resource(dev,
2400			    SYS_RES_IRQ, rid, que->res);
2401	}
2402
2403	/* Clean the Legacy or Link interrupt last */
2404	if (adapter->linkvec) /* we are doing MSIX */
2405		rid = adapter->linkvec + 1;
2406	else
2407		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2408
2409	if (adapter->tag != NULL) {
2410		bus_teardown_intr(dev, adapter->res, adapter->tag);
2411		adapter->tag = NULL;
2412	}
2413	if (adapter->res != NULL)
2414		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2415
2416mem:
2417	if (adapter->msix)
2418		pci_release_msi(dev);
2419
2420	if (adapter->msix_mem != NULL)
2421		bus_release_resource(dev, SYS_RES_MEMORY,
2422		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2423
2424	if (adapter->pci_mem != NULL)
2425		bus_release_resource(dev, SYS_RES_MEMORY,
2426		    PCIR_BAR(0), adapter->pci_mem);
2427
2428}
2429
2430/*
2431 * Setup Either MSI/X or MSI
2432 */
2433static int
2434igb_setup_msix(struct adapter *adapter)
2435{
2436	device_t dev = adapter->dev;
2437	int rid, want, queues, msgs;
2438
2439	/* tuneable override */
2440	if (igb_enable_msix == 0)
2441		goto msi;
2442
2443	/* First try MSI/X */
2444	rid = PCIR_BAR(IGB_MSIX_BAR);
2445	adapter->msix_mem = bus_alloc_resource_any(dev,
2446	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2447       	if (!adapter->msix_mem) {
2448		/* May not be enabled */
2449		device_printf(adapter->dev,
2450		    "Unable to map MSIX table \n");
2451		goto msi;
2452	}
2453
2454	msgs = pci_msix_count(dev);
2455	if (msgs == 0) { /* system has msix disabled */
2456		bus_release_resource(dev, SYS_RES_MEMORY,
2457		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2458		adapter->msix_mem = NULL;
2459		goto msi;
2460	}
2461
2462	/* Figure out a reasonable auto config value */
2463	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2464
2465	/* Manual override */
2466	if (igb_num_queues != 0)
2467		queues = igb_num_queues;
2468
2469	/* Can have max of 4 queues on 82575 */
2470	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2471		queues = 4;
2472
2473	/* Limit the VF adapter to one queues */
2474	if ((adapter->hw.mac.type == e1000_vfadapt) && (queues > 2))
2475		queues = 1;
2476
2477	/*
2478	** One vector (RX/TX pair) per queue
2479	** plus an additional for Link interrupt
2480	*/
2481	want = queues + 1;
2482	if (msgs >= want)
2483		msgs = want;
2484	else {
2485               	device_printf(adapter->dev,
2486		    "MSIX Configuration Problem, "
2487		    "%d vectors configured, but %d queues wanted!\n",
2488		    msgs, want);
2489		return (ENXIO);
2490	}
2491	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2492               	device_printf(adapter->dev,
2493		    "Using MSIX interrupts with %d vectors\n", msgs);
2494		adapter->num_queues = queues;
2495		return (msgs);
2496	}
2497msi:
2498       	msgs = pci_msi_count(dev);
2499       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2500               	device_printf(adapter->dev,"Using MSI interrupt\n");
2501	return (msgs);
2502}
2503
2504/*********************************************************************
2505 *
2506 *  Set up an fresh starting state
2507 *
2508 **********************************************************************/
2509static void
2510igb_reset(struct adapter *adapter)
2511{
2512	device_t	dev = adapter->dev;
2513	struct e1000_hw *hw = &adapter->hw;
2514	struct e1000_fc_info *fc = &hw->fc;
2515	struct ifnet	*ifp = adapter->ifp;
2516	u32		pba = 0;
2517	u16		hwm;
2518
2519	INIT_DEBUGOUT("igb_reset: begin");
2520
2521	/* Let the firmware know the OS is in control */
2522	igb_get_hw_control(adapter);
2523
2524	/*
2525	 * Packet Buffer Allocation (PBA)
2526	 * Writing PBA sets the receive portion of the buffer
2527	 * the remainder is used for the transmit buffer.
2528	 */
2529	switch (hw->mac.type) {
2530	case e1000_82575:
2531		pba = E1000_PBA_32K;
2532		break;
2533	case e1000_82576:
2534	case e1000_vfadapt:
2535		pba = E1000_PBA_64K;
2536		break;
2537	case e1000_82580:
2538		pba = E1000_PBA_35K;
2539	default:
2540		break;
2541	}
2542
2543	/* Special needs in case of Jumbo frames */
2544	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2545		u32 tx_space, min_tx, min_rx;
2546		pba = E1000_READ_REG(hw, E1000_PBA);
2547		tx_space = pba >> 16;
2548		pba &= 0xffff;
2549		min_tx = (adapter->max_frame_size +
2550		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2551		min_tx = roundup2(min_tx, 1024);
2552		min_tx >>= 10;
2553                min_rx = adapter->max_frame_size;
2554                min_rx = roundup2(min_rx, 1024);
2555                min_rx >>= 10;
2556		if (tx_space < min_tx &&
2557		    ((min_tx - tx_space) < pba)) {
2558			pba = pba - (min_tx - tx_space);
2559			/*
2560                         * if short on rx space, rx wins
2561                         * and must trump tx adjustment
2562			 */
2563                        if (pba < min_rx)
2564                                pba = min_rx;
2565		}
2566		E1000_WRITE_REG(hw, E1000_PBA, pba);
2567	}
2568
2569	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2570
2571	/*
2572	 * These parameters control the automatic generation (Tx) and
2573	 * response (Rx) to Ethernet PAUSE frames.
2574	 * - High water mark should allow for at least two frames to be
2575	 *   received after sending an XOFF.
2576	 * - Low water mark works best when it is very near the high water mark.
2577	 *   This allows the receiver to restart by sending XON when it has
2578	 *   drained a bit.
2579	 */
2580	hwm = min(((pba << 10) * 9 / 10),
2581	    ((pba << 10) - 2 * adapter->max_frame_size));
2582
2583	if (hw->mac.type < e1000_82576) {
2584		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2585		fc->low_water = fc->high_water - 8;
2586	} else {
2587		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2588		fc->low_water = fc->high_water - 16;
2589	}
2590
2591	fc->pause_time = IGB_FC_PAUSE_TIME;
2592	fc->send_xon = TRUE;
2593
2594	/* Set Flow control, use the tunable location if sane */
2595	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2596		fc->requested_mode = igb_fc_setting;
2597	else
2598		fc->requested_mode = e1000_fc_none;
2599
2600	fc->current_mode = fc->requested_mode;
2601
2602	/* Issue a global reset */
2603	e1000_reset_hw(hw);
2604	E1000_WRITE_REG(hw, E1000_WUC, 0);
2605
2606	if (e1000_init_hw(hw) < 0)
2607		device_printf(dev, "Hardware Initialization Failed\n");
2608
2609	if (hw->mac.type == e1000_82580) {
2610		u32 reg;
2611
2612		hwm = (pba << 10) - (2 * adapter->max_frame_size);
2613		/*
2614		 * 0x80000000 - enable DMA COAL
2615		 * 0x10000000 - use L0s as low power
2616		 * 0x20000000 - use L1 as low power
2617		 * X << 16 - exit dma coal when rx data exceeds X kB
2618		 * Y - upper limit to stay in dma coal in units of 32usecs
2619		 */
2620		E1000_WRITE_REG(hw, E1000_DMACR,
2621		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
2622
2623		/* set hwm to PBA -  2 * max frame size */
2624		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2625		/*
2626		 * This sets the time to wait before requesting transition to
2627		 * low power state to number of usecs needed to receive 1 512
2628		 * byte frame at gigabit line rate
2629		 */
2630		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2631
2632		/* free space in tx packet buffer to wake from DMA coal */
2633		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2634		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2635
2636		/* make low power state decision controlled by DMA coal */
2637		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2638		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2639		    reg | E1000_PCIEMISC_LX_DECISION);
2640	}
2641
2642	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2643	e1000_get_phy_info(hw);
2644	e1000_check_for_link(hw);
2645	return;
2646}
2647
2648/*********************************************************************
2649 *
2650 *  Setup networking device structure and register an interface.
2651 *
2652 **********************************************************************/
2653static void
2654igb_setup_interface(device_t dev, struct adapter *adapter)
2655{
2656	struct ifnet   *ifp;
2657
2658	INIT_DEBUGOUT("igb_setup_interface: begin");
2659
2660	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2661	if (ifp == NULL)
2662		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2663	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2664	ifp->if_mtu = ETHERMTU;
2665	ifp->if_init =  igb_init;
2666	ifp->if_softc = adapter;
2667	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2668	ifp->if_ioctl = igb_ioctl;
2669	ifp->if_start = igb_start;
2670#if __FreeBSD_version >= 800000
2671	ifp->if_transmit = igb_mq_start;
2672	ifp->if_qflush = igb_qflush;
2673#endif
2674	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2675	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2676	IFQ_SET_READY(&ifp->if_snd);
2677
2678	ether_ifattach(ifp, adapter->hw.mac.addr);
2679
2680	ifp->if_capabilities = ifp->if_capenable = 0;
2681
2682	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2683	ifp->if_capabilities |= IFCAP_TSO4;
2684	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2685	if (igb_header_split)
2686		ifp->if_capabilities |= IFCAP_LRO;
2687
2688	ifp->if_capenable = ifp->if_capabilities;
2689#ifdef DEVICE_POLLING
2690	ifp->if_capabilities |= IFCAP_POLLING;
2691#endif
2692
2693	/*
2694	 * Tell the upper layer(s) we
2695	 * support full VLAN capability.
2696	 */
2697	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2698	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2699	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2700
2701	/*
2702	** Dont turn this on by default, if vlans are
2703	** created on another pseudo device (eg. lagg)
2704	** then vlan events are not passed thru, breaking
2705	** operation, but with HW FILTER off it works. If
2706	** using vlans directly on the em driver you can
2707	** enable this and get full hardware tag filtering.
2708	*/
2709	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2710
2711	/*
2712	 * Specify the media types supported by this adapter and register
2713	 * callbacks to update media and link information
2714	 */
2715	ifmedia_init(&adapter->media, IFM_IMASK,
2716	    igb_media_change, igb_media_status);
2717	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2718	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2719		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2720			    0, NULL);
2721		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2722	} else {
2723		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2724		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2725			    0, NULL);
2726		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2727			    0, NULL);
2728		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2729			    0, NULL);
2730		if (adapter->hw.phy.type != e1000_phy_ife) {
2731			ifmedia_add(&adapter->media,
2732				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2733			ifmedia_add(&adapter->media,
2734				IFM_ETHER | IFM_1000_T, 0, NULL);
2735		}
2736	}
2737	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2738	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2739}
2740
2741
2742/*
2743 * Manage DMA'able memory.
2744 */
2745static void
2746igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2747{
2748	if (error)
2749		return;
2750	*(bus_addr_t *) arg = segs[0].ds_addr;
2751}
2752
2753static int
2754igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2755        struct igb_dma_alloc *dma, int mapflags)
2756{
2757	int error;
2758
2759	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2760				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2761				BUS_SPACE_MAXADDR,	/* lowaddr */
2762				BUS_SPACE_MAXADDR,	/* highaddr */
2763				NULL, NULL,		/* filter, filterarg */
2764				size,			/* maxsize */
2765				1,			/* nsegments */
2766				size,			/* maxsegsize */
2767				0,			/* flags */
2768				NULL,			/* lockfunc */
2769				NULL,			/* lockarg */
2770				&dma->dma_tag);
2771	if (error) {
2772		device_printf(adapter->dev,
2773		    "%s: bus_dma_tag_create failed: %d\n",
2774		    __func__, error);
2775		goto fail_0;
2776	}
2777
2778	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2779	    BUS_DMA_NOWAIT, &dma->dma_map);
2780	if (error) {
2781		device_printf(adapter->dev,
2782		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2783		    __func__, (uintmax_t)size, error);
2784		goto fail_2;
2785	}
2786
2787	dma->dma_paddr = 0;
2788	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2789	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2790	if (error || dma->dma_paddr == 0) {
2791		device_printf(adapter->dev,
2792		    "%s: bus_dmamap_load failed: %d\n",
2793		    __func__, error);
2794		goto fail_3;
2795	}
2796
2797	return (0);
2798
2799fail_3:
2800	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2801fail_2:
2802	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2803	bus_dma_tag_destroy(dma->dma_tag);
2804fail_0:
2805	dma->dma_map = NULL;
2806	dma->dma_tag = NULL;
2807
2808	return (error);
2809}
2810
2811static void
2812igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2813{
2814	if (dma->dma_tag == NULL)
2815		return;
2816	if (dma->dma_map != NULL) {
2817		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2818		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2819		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2820		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2821		dma->dma_map = NULL;
2822	}
2823	bus_dma_tag_destroy(dma->dma_tag);
2824	dma->dma_tag = NULL;
2825}
2826
2827
2828/*********************************************************************
2829 *
2830 *  Allocate memory for the transmit and receive rings, and then
2831 *  the descriptors associated with each, called only once at attach.
2832 *
2833 **********************************************************************/
2834static int
2835igb_allocate_queues(struct adapter *adapter)
2836{
2837	device_t dev = adapter->dev;
2838	struct igb_queue	*que = NULL;
2839	struct tx_ring		*txr = NULL;
2840	struct rx_ring		*rxr = NULL;
2841	int rsize, tsize, error = E1000_SUCCESS;
2842	int txconf = 0, rxconf = 0;
2843
2844	/* First allocate the top level queue structs */
2845	if (!(adapter->queues =
2846	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2847	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2848		device_printf(dev, "Unable to allocate queue memory\n");
2849		error = ENOMEM;
2850		goto fail;
2851	}
2852
2853	/* Next allocate the TX ring struct memory */
2854	if (!(adapter->tx_rings =
2855	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2856	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2857		device_printf(dev, "Unable to allocate TX ring memory\n");
2858		error = ENOMEM;
2859		goto tx_fail;
2860	}
2861
2862	/* Now allocate the RX */
2863	if (!(adapter->rx_rings =
2864	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2865	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2866		device_printf(dev, "Unable to allocate RX ring memory\n");
2867		error = ENOMEM;
2868		goto rx_fail;
2869	}
2870
2871	tsize = roundup2(adapter->num_tx_desc *
2872	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2873	/*
2874	 * Now set up the TX queues, txconf is needed to handle the
2875	 * possibility that things fail midcourse and we need to
2876	 * undo memory gracefully
2877	 */
2878	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2879		/* Set up some basics */
2880		txr = &adapter->tx_rings[i];
2881		txr->adapter = adapter;
2882		txr->me = i;
2883
2884		/* Initialize the TX lock */
2885		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2886		    device_get_nameunit(dev), txr->me);
2887		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2888
2889		if (igb_dma_malloc(adapter, tsize,
2890			&txr->txdma, BUS_DMA_NOWAIT)) {
2891			device_printf(dev,
2892			    "Unable to allocate TX Descriptor memory\n");
2893			error = ENOMEM;
2894			goto err_tx_desc;
2895		}
2896		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2897		bzero((void *)txr->tx_base, tsize);
2898
2899        	/* Now allocate transmit buffers for the ring */
2900        	if (igb_allocate_transmit_buffers(txr)) {
2901			device_printf(dev,
2902			    "Critical Failure setting up transmit buffers\n");
2903			error = ENOMEM;
2904			goto err_tx_desc;
2905        	}
2906#if __FreeBSD_version >= 800000
2907		/* Allocate a buf ring */
2908		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2909		    M_WAITOK, &txr->tx_mtx);
2910#endif
2911	}
2912
2913	/*
2914	 * Next the RX queues...
2915	 */
2916	rsize = roundup2(adapter->num_rx_desc *
2917	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2918	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2919		rxr = &adapter->rx_rings[i];
2920		rxr->adapter = adapter;
2921		rxr->me = i;
2922
2923		/* Initialize the RX lock */
2924		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2925		    device_get_nameunit(dev), txr->me);
2926		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2927
2928		if (igb_dma_malloc(adapter, rsize,
2929			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2930			device_printf(dev,
2931			    "Unable to allocate RxDescriptor memory\n");
2932			error = ENOMEM;
2933			goto err_rx_desc;
2934		}
2935		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2936		bzero((void *)rxr->rx_base, rsize);
2937
2938        	/* Allocate receive buffers for the ring*/
2939		if (igb_allocate_receive_buffers(rxr)) {
2940			device_printf(dev,
2941			    "Critical Failure setting up receive buffers\n");
2942			error = ENOMEM;
2943			goto err_rx_desc;
2944		}
2945	}
2946
2947	/*
2948	** Finally set up the queue holding structs
2949	*/
2950	for (int i = 0; i < adapter->num_queues; i++) {
2951		que = &adapter->queues[i];
2952		que->adapter = adapter;
2953		que->txr = &adapter->tx_rings[i];
2954		que->rxr = &adapter->rx_rings[i];
2955	}
2956
2957	return (0);
2958
2959err_rx_desc:
2960	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2961		igb_dma_free(adapter, &rxr->rxdma);
2962err_tx_desc:
2963	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2964		igb_dma_free(adapter, &txr->txdma);
2965	free(adapter->rx_rings, M_DEVBUF);
2966rx_fail:
2967#if __FreeBSD_version >= 800000
2968	buf_ring_free(txr->br, M_DEVBUF);
2969#endif
2970	free(adapter->tx_rings, M_DEVBUF);
2971tx_fail:
2972	free(adapter->queues, M_DEVBUF);
2973fail:
2974	return (error);
2975}
2976
2977/*********************************************************************
2978 *
2979 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2980 *  the information needed to transmit a packet on the wire. This is
2981 *  called only once at attach, setup is done every reset.
2982 *
2983 **********************************************************************/
2984static int
2985igb_allocate_transmit_buffers(struct tx_ring *txr)
2986{
2987	struct adapter *adapter = txr->adapter;
2988	device_t dev = adapter->dev;
2989	struct igb_tx_buffer *txbuf;
2990	int error, i;
2991
2992	/*
2993	 * Setup DMA descriptor areas.
2994	 */
2995	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2996			       1, 0,			/* alignment, bounds */
2997			       BUS_SPACE_MAXADDR,	/* lowaddr */
2998			       BUS_SPACE_MAXADDR,	/* highaddr */
2999			       NULL, NULL,		/* filter, filterarg */
3000			       IGB_TSO_SIZE,		/* maxsize */
3001			       IGB_MAX_SCATTER,		/* nsegments */
3002			       PAGE_SIZE,		/* maxsegsize */
3003			       0,			/* flags */
3004			       NULL,			/* lockfunc */
3005			       NULL,			/* lockfuncarg */
3006			       &txr->txtag))) {
3007		device_printf(dev,"Unable to allocate TX DMA tag\n");
3008		goto fail;
3009	}
3010
3011	if (!(txr->tx_buffers =
3012	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3013	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3014		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3015		error = ENOMEM;
3016		goto fail;
3017	}
3018
3019        /* Create the descriptor buffer dma maps */
3020	txbuf = txr->tx_buffers;
3021	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3022		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3023		if (error != 0) {
3024			device_printf(dev, "Unable to create TX DMA map\n");
3025			goto fail;
3026		}
3027	}
3028
3029	return 0;
3030fail:
3031	/* We free all, it handles case where we are in the middle */
3032	igb_free_transmit_structures(adapter);
3033	return (error);
3034}
3035
3036/*********************************************************************
3037 *
3038 *  Initialize a transmit ring.
3039 *
3040 **********************************************************************/
3041static void
3042igb_setup_transmit_ring(struct tx_ring *txr)
3043{
3044	struct adapter *adapter = txr->adapter;
3045	struct igb_tx_buffer *txbuf;
3046	int i;
3047
3048	/* Clear the old descriptor contents */
3049	IGB_TX_LOCK(txr);
3050	bzero((void *)txr->tx_base,
3051	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3052	/* Reset indices */
3053	txr->next_avail_desc = 0;
3054	txr->next_to_clean = 0;
3055
3056	/* Free any existing tx buffers. */
3057        txbuf = txr->tx_buffers;
3058	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3059		if (txbuf->m_head != NULL) {
3060			bus_dmamap_sync(txr->txtag, txbuf->map,
3061			    BUS_DMASYNC_POSTWRITE);
3062			bus_dmamap_unload(txr->txtag, txbuf->map);
3063			m_freem(txbuf->m_head);
3064			txbuf->m_head = NULL;
3065		}
3066		/* clear the watch index */
3067		txbuf->next_eop = -1;
3068        }
3069
3070	/* Set number of descriptors available */
3071	txr->tx_avail = adapter->num_tx_desc;
3072
3073	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3074	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3075	IGB_TX_UNLOCK(txr);
3076}
3077
3078/*********************************************************************
3079 *
3080 *  Initialize all transmit rings.
3081 *
3082 **********************************************************************/
3083static void
3084igb_setup_transmit_structures(struct adapter *adapter)
3085{
3086	struct tx_ring *txr = adapter->tx_rings;
3087
3088	for (int i = 0; i < adapter->num_queues; i++, txr++)
3089		igb_setup_transmit_ring(txr);
3090
3091	return;
3092}
3093
3094/*********************************************************************
3095 *
3096 *  Enable transmit unit.
3097 *
3098 **********************************************************************/
3099static void
3100igb_initialize_transmit_units(struct adapter *adapter)
3101{
3102	struct tx_ring	*txr = adapter->tx_rings;
3103	struct e1000_hw *hw = &adapter->hw;
3104	u32		tctl, txdctl;
3105
3106	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3107	tctl = txdctl = 0;
3108
3109	/* Setup the Tx Descriptor Rings */
3110	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3111		u64 bus_addr = txr->txdma.dma_paddr;
3112
3113		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3114		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3115		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3116		    (uint32_t)(bus_addr >> 32));
3117		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3118		    (uint32_t)bus_addr);
3119
3120		/* Setup the HW Tx Head and Tail descriptor pointers */
3121		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3122		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3123
3124		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3125		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3126		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3127
3128		txr->watchdog_check = FALSE;
3129
3130		txdctl |= IGB_TX_PTHRESH;
3131		txdctl |= IGB_TX_HTHRESH << 8;
3132		txdctl |= IGB_TX_WTHRESH << 16;
3133		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3134		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3135	}
3136
3137	if (adapter->hw.mac.type == e1000_vfadapt)
3138		return;
3139
3140	/* Program the Transmit Control Register */
3141	tctl = E1000_READ_REG(hw, E1000_TCTL);
3142	tctl &= ~E1000_TCTL_CT;
3143	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3144		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3145
3146	e1000_config_collision_dist(hw);
3147
3148	/* This write will effectively turn on the transmit unit. */
3149	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3150}
3151
3152/*********************************************************************
3153 *
3154 *  Free all transmit rings.
3155 *
3156 **********************************************************************/
3157static void
3158igb_free_transmit_structures(struct adapter *adapter)
3159{
3160	struct tx_ring *txr = adapter->tx_rings;
3161
3162	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3163		IGB_TX_LOCK(txr);
3164		igb_free_transmit_buffers(txr);
3165		igb_dma_free(adapter, &txr->txdma);
3166		IGB_TX_UNLOCK(txr);
3167		IGB_TX_LOCK_DESTROY(txr);
3168	}
3169	free(adapter->tx_rings, M_DEVBUF);
3170}
3171
3172/*********************************************************************
3173 *
3174 *  Free transmit ring related data structures.
3175 *
3176 **********************************************************************/
3177static void
3178igb_free_transmit_buffers(struct tx_ring *txr)
3179{
3180	struct adapter *adapter = txr->adapter;
3181	struct igb_tx_buffer *tx_buffer;
3182	int             i;
3183
3184	INIT_DEBUGOUT("free_transmit_ring: begin");
3185
3186	if (txr->tx_buffers == NULL)
3187		return;
3188
3189	tx_buffer = txr->tx_buffers;
3190	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3191		if (tx_buffer->m_head != NULL) {
3192			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3193			    BUS_DMASYNC_POSTWRITE);
3194			bus_dmamap_unload(txr->txtag,
3195			    tx_buffer->map);
3196			m_freem(tx_buffer->m_head);
3197			tx_buffer->m_head = NULL;
3198			if (tx_buffer->map != NULL) {
3199				bus_dmamap_destroy(txr->txtag,
3200				    tx_buffer->map);
3201				tx_buffer->map = NULL;
3202			}
3203		} else if (tx_buffer->map != NULL) {
3204			bus_dmamap_unload(txr->txtag,
3205			    tx_buffer->map);
3206			bus_dmamap_destroy(txr->txtag,
3207			    tx_buffer->map);
3208			tx_buffer->map = NULL;
3209		}
3210	}
3211#if __FreeBSD_version >= 800000
3212	if (txr->br != NULL)
3213		buf_ring_free(txr->br, M_DEVBUF);
3214#endif
3215	if (txr->tx_buffers != NULL) {
3216		free(txr->tx_buffers, M_DEVBUF);
3217		txr->tx_buffers = NULL;
3218	}
3219	if (txr->txtag != NULL) {
3220		bus_dma_tag_destroy(txr->txtag);
3221		txr->txtag = NULL;
3222	}
3223	return;
3224}
3225
3226/**********************************************************************
3227 *
3228 *  Setup work for hardware segmentation offload (TSO)
3229 *
3230 **********************************************************************/
3231static boolean_t
3232igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3233{
3234	struct adapter *adapter = txr->adapter;
3235	struct e1000_adv_tx_context_desc *TXD;
3236	struct igb_tx_buffer        *tx_buffer;
3237	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3238	u32 mss_l4len_idx = 0;
3239	u16 vtag = 0;
3240	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3241	struct ether_vlan_header *eh;
3242	struct ip *ip;
3243	struct tcphdr *th;
3244
3245
3246	/*
3247	 * Determine where frame payload starts.
3248	 * Jump over vlan headers if already present
3249	 */
3250	eh = mtod(mp, struct ether_vlan_header *);
3251	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3252		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3253	else
3254		ehdrlen = ETHER_HDR_LEN;
3255
3256	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3257	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3258		return FALSE;
3259
3260	/* Only supports IPV4 for now */
3261	ctxd = txr->next_avail_desc;
3262	tx_buffer = &txr->tx_buffers[ctxd];
3263	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3264
3265	ip = (struct ip *)(mp->m_data + ehdrlen);
3266	if (ip->ip_p != IPPROTO_TCP)
3267                return FALSE;   /* 0 */
3268	ip->ip_sum = 0;
3269	ip_hlen = ip->ip_hl << 2;
3270	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3271	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3272	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3273	tcp_hlen = th->th_off << 2;
3274	/*
3275	 * Calculate header length, this is used
3276	 * in the transmit desc in igb_xmit
3277	 */
3278	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3279
3280	/* VLAN MACLEN IPLEN */
3281	if (mp->m_flags & M_VLANTAG) {
3282		vtag = htole16(mp->m_pkthdr.ether_vtag);
3283		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3284	}
3285
3286	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3287	vlan_macip_lens |= ip_hlen;
3288	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3289
3290	/* ADV DTYPE TUCMD */
3291	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3292	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3293	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3294	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3295
3296	/* MSS L4LEN IDX */
3297	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3298	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3299	/* 82575 needs the queue index added */
3300	if (adapter->hw.mac.type == e1000_82575)
3301		mss_l4len_idx |= txr->me << 4;
3302	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3303
3304	TXD->seqnum_seed = htole32(0);
3305	tx_buffer->m_head = NULL;
3306	tx_buffer->next_eop = -1;
3307
3308	if (++ctxd == adapter->num_tx_desc)
3309		ctxd = 0;
3310
3311	txr->tx_avail--;
3312	txr->next_avail_desc = ctxd;
3313	return TRUE;
3314}
3315
3316
3317/*********************************************************************
3318 *
3319 *  Context Descriptor setup for VLAN or CSUM
3320 *
3321 **********************************************************************/
3322
3323static bool
3324igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3325{
3326	struct adapter *adapter = txr->adapter;
3327	struct e1000_adv_tx_context_desc *TXD;
3328	struct igb_tx_buffer        *tx_buffer;
3329	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3330	struct ether_vlan_header *eh;
3331	struct ip *ip = NULL;
3332	struct ip6_hdr *ip6;
3333	int  ehdrlen, ctxd, ip_hlen = 0;
3334	u16	etype, vtag = 0;
3335	u8	ipproto = 0;
3336	bool	offload = TRUE;
3337
3338	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3339		offload = FALSE;
3340
3341	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3342	ctxd = txr->next_avail_desc;
3343	tx_buffer = &txr->tx_buffers[ctxd];
3344	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3345
3346	/*
3347	** In advanced descriptors the vlan tag must
3348	** be placed into the context descriptor, thus
3349	** we need to be here just for that setup.
3350	*/
3351	if (mp->m_flags & M_VLANTAG) {
3352		vtag = htole16(mp->m_pkthdr.ether_vtag);
3353		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3354	} else if (offload == FALSE)
3355		return FALSE;
3356
3357	/*
3358	 * Determine where frame payload starts.
3359	 * Jump over vlan headers if already present,
3360	 * helpful for QinQ too.
3361	 */
3362	eh = mtod(mp, struct ether_vlan_header *);
3363	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3364		etype = ntohs(eh->evl_proto);
3365		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3366	} else {
3367		etype = ntohs(eh->evl_encap_proto);
3368		ehdrlen = ETHER_HDR_LEN;
3369	}
3370
3371	/* Set the ether header length */
3372	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3373
3374	switch (etype) {
3375		case ETHERTYPE_IP:
3376			ip = (struct ip *)(mp->m_data + ehdrlen);
3377			ip_hlen = ip->ip_hl << 2;
3378			if (mp->m_len < ehdrlen + ip_hlen) {
3379				offload = FALSE;
3380				break;
3381			}
3382			ipproto = ip->ip_p;
3383			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3384			break;
3385		case ETHERTYPE_IPV6:
3386			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3387			ip_hlen = sizeof(struct ip6_hdr);
3388			if (mp->m_len < ehdrlen + ip_hlen)
3389				return (FALSE);
3390			ipproto = ip6->ip6_nxt;
3391			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3392			break;
3393		default:
3394			offload = FALSE;
3395			break;
3396	}
3397
3398	vlan_macip_lens |= ip_hlen;
3399	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3400
3401	switch (ipproto) {
3402		case IPPROTO_TCP:
3403			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3404				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3405			break;
3406		case IPPROTO_UDP:
3407			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3408				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3409			break;
3410#if __FreeBSD_version >= 800000
3411		case IPPROTO_SCTP:
3412			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3413				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3414			break;
3415#endif
3416		default:
3417			offload = FALSE;
3418			break;
3419	}
3420
3421	/* 82575 needs the queue index added */
3422	if (adapter->hw.mac.type == e1000_82575)
3423		mss_l4len_idx = txr->me << 4;
3424
3425	/* Now copy bits into descriptor */
3426	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3427	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3428	TXD->seqnum_seed = htole32(0);
3429	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3430
3431	tx_buffer->m_head = NULL;
3432	tx_buffer->next_eop = -1;
3433
3434	/* We've consumed the first desc, adjust counters */
3435	if (++ctxd == adapter->num_tx_desc)
3436		ctxd = 0;
3437	txr->next_avail_desc = ctxd;
3438	--txr->tx_avail;
3439
3440        return (offload);
3441}
3442
3443
3444/**********************************************************************
3445 *
3446 *  Examine each tx_buffer in the used queue. If the hardware is done
3447 *  processing the packet then free associated resources. The
3448 *  tx_buffer is put back on the free queue.
3449 *
3450 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3451 **********************************************************************/
3452static bool
3453igb_txeof(struct tx_ring *txr)
3454{
3455	struct adapter	*adapter = txr->adapter;
3456        int first, last, done;
3457        struct igb_tx_buffer *tx_buffer;
3458        struct e1000_tx_desc   *tx_desc, *eop_desc;
3459	struct ifnet   *ifp = adapter->ifp;
3460
3461	IGB_TX_LOCK_ASSERT(txr);
3462
3463        if (txr->tx_avail == adapter->num_tx_desc)
3464                return FALSE;
3465
3466        first = txr->next_to_clean;
3467        tx_desc = &txr->tx_base[first];
3468        tx_buffer = &txr->tx_buffers[first];
3469	last = tx_buffer->next_eop;
3470        eop_desc = &txr->tx_base[last];
3471
3472	/*
3473	 * What this does is get the index of the
3474	 * first descriptor AFTER the EOP of the
3475	 * first packet, that way we can do the
3476	 * simple comparison on the inner while loop.
3477	 */
3478	if (++last == adapter->num_tx_desc)
3479 		last = 0;
3480	done = last;
3481
3482        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3483            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3484
3485        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3486		/* We clean the range of the packet */
3487		while (first != done) {
3488                	tx_desc->upper.data = 0;
3489                	tx_desc->lower.data = 0;
3490                	tx_desc->buffer_addr = 0;
3491                	++txr->tx_avail;
3492
3493			if (tx_buffer->m_head) {
3494				txr->bytes +=
3495				    tx_buffer->m_head->m_pkthdr.len;
3496				bus_dmamap_sync(txr->txtag,
3497				    tx_buffer->map,
3498				    BUS_DMASYNC_POSTWRITE);
3499				bus_dmamap_unload(txr->txtag,
3500				    tx_buffer->map);
3501
3502                        	m_freem(tx_buffer->m_head);
3503                        	tx_buffer->m_head = NULL;
3504                	}
3505			tx_buffer->next_eop = -1;
3506			txr->watchdog_time = ticks;
3507
3508	                if (++first == adapter->num_tx_desc)
3509				first = 0;
3510
3511	                tx_buffer = &txr->tx_buffers[first];
3512			tx_desc = &txr->tx_base[first];
3513		}
3514		++txr->packets;
3515		++ifp->if_opackets;
3516		/* See if we can continue to the next packet */
3517		last = tx_buffer->next_eop;
3518		if (last != -1) {
3519        		eop_desc = &txr->tx_base[last];
3520			/* Get new done point */
3521			if (++last == adapter->num_tx_desc) last = 0;
3522			done = last;
3523		} else
3524			break;
3525        }
3526        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3527            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3528
3529        txr->next_to_clean = first;
3530
3531        /*
3532         * If we have enough room, clear IFF_DRV_OACTIVE
3533         * to tell the stack that it is OK to send packets.
3534         */
3535        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3536                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3537		/* All clean, turn off the watchdog */
3538                if (txr->tx_avail == adapter->num_tx_desc) {
3539			txr->watchdog_check = FALSE;
3540			return (FALSE);
3541		}
3542        }
3543
3544	return (TRUE);
3545}
3546
3547
3548/*********************************************************************
3549 *
3550 *  Refresh mbuf buffers for RX descriptor rings
3551 *   - now keeps its own state so discards due to resource
3552 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3553 *     it just returns, keeping its placeholder, thus it can simply
3554 *     be recalled to try again.
3555 *
3556 **********************************************************************/
3557static void
3558igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3559{
3560	struct adapter		*adapter = rxr->adapter;
3561	bus_dma_segment_t	hseg[1];
3562	bus_dma_segment_t	pseg[1];
3563	struct igb_rx_buf	*rxbuf;
3564	struct mbuf		*mh, *mp;
3565	int			i, nsegs, error, cleaned;
3566
3567	i = rxr->next_to_refresh;
3568	cleaned = -1; /* Signify no completions */
3569	while (i != limit) {
3570		rxbuf = &rxr->rx_buffers[i];
3571		if ((rxbuf->m_head == NULL) && (rxr->hdr_split)) {
3572			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3573			if (mh == NULL)
3574				goto update;
3575			mh->m_pkthdr.len = mh->m_len = MHLEN;
3576			mh->m_len = MHLEN;
3577			mh->m_flags |= M_PKTHDR;
3578			m_adj(mh, ETHER_ALIGN);
3579			/* Get the memory mapping */
3580			error = bus_dmamap_load_mbuf_sg(rxr->htag,
3581			    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3582			if (error != 0) {
3583				printf("GET BUF: dmamap load"
3584				    " failure - %d\n", error);
3585				m_free(mh);
3586				goto update;
3587			}
3588			rxbuf->m_head = mh;
3589			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3590			    BUS_DMASYNC_PREREAD);
3591			rxr->rx_base[i].read.hdr_addr =
3592			    htole64(hseg[0].ds_addr);
3593		}
3594
3595		if (rxbuf->m_pack == NULL) {
3596			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3597			    M_PKTHDR, adapter->rx_mbuf_sz);
3598			if (mp == NULL)
3599				goto update;
3600			mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3601			/* Get the memory mapping */
3602			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3603			    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3604			if (error != 0) {
3605				printf("GET BUF: dmamap load"
3606				    " failure - %d\n", error);
3607				m_free(mp);
3608				goto update;
3609			}
3610			rxbuf->m_pack = mp;
3611			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3612			    BUS_DMASYNC_PREREAD);
3613			rxr->rx_base[i].read.pkt_addr =
3614			    htole64(pseg[0].ds_addr);
3615		}
3616
3617		cleaned = i;
3618		/* Calculate next index */
3619		if (++i == adapter->num_rx_desc)
3620			i = 0;
3621		/* This is the work marker for refresh */
3622		rxr->next_to_refresh = i;
3623	}
3624update:
3625	if (cleaned != -1) /* If we refreshed some, bump tail */
3626		E1000_WRITE_REG(&adapter->hw,
3627		    E1000_RDT(rxr->me), cleaned);
3628	return;
3629}
3630
3631
3632/*********************************************************************
3633 *
3634 *  Allocate memory for rx_buffer structures. Since we use one
3635 *  rx_buffer per received packet, the maximum number of rx_buffer's
3636 *  that we'll need is equal to the number of receive descriptors
3637 *  that we've allocated.
3638 *
3639 **********************************************************************/
3640static int
3641igb_allocate_receive_buffers(struct rx_ring *rxr)
3642{
3643	struct	adapter 	*adapter = rxr->adapter;
3644	device_t 		dev = adapter->dev;
3645	struct igb_rx_buf	*rxbuf;
3646	int             	i, bsize, error;
3647
3648	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3649	if (!(rxr->rx_buffers =
3650	    (struct igb_rx_buf *) malloc(bsize,
3651	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3652		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3653		error = ENOMEM;
3654		goto fail;
3655	}
3656
3657	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3658				   1, 0,		/* alignment, bounds */
3659				   BUS_SPACE_MAXADDR,	/* lowaddr */
3660				   BUS_SPACE_MAXADDR,	/* highaddr */
3661				   NULL, NULL,		/* filter, filterarg */
3662				   MSIZE,		/* maxsize */
3663				   1,			/* nsegments */
3664				   MSIZE,		/* maxsegsize */
3665				   0,			/* flags */
3666				   NULL,		/* lockfunc */
3667				   NULL,		/* lockfuncarg */
3668				   &rxr->htag))) {
3669		device_printf(dev, "Unable to create RX DMA tag\n");
3670		goto fail;
3671	}
3672
3673	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3674				   1, 0,		/* alignment, bounds */
3675				   BUS_SPACE_MAXADDR,	/* lowaddr */
3676				   BUS_SPACE_MAXADDR,	/* highaddr */
3677				   NULL, NULL,		/* filter, filterarg */
3678				   MJUMPAGESIZE,	/* maxsize */
3679				   1,			/* nsegments */
3680				   MJUMPAGESIZE,	/* maxsegsize */
3681				   0,			/* flags */
3682				   NULL,		/* lockfunc */
3683				   NULL,		/* lockfuncarg */
3684				   &rxr->ptag))) {
3685		device_printf(dev, "Unable to create RX payload DMA tag\n");
3686		goto fail;
3687	}
3688
3689	for (i = 0; i < adapter->num_rx_desc; i++) {
3690		rxbuf = &rxr->rx_buffers[i];
3691		error = bus_dmamap_create(rxr->htag,
3692		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3693		if (error) {
3694			device_printf(dev,
3695			    "Unable to create RX head DMA maps\n");
3696			goto fail;
3697		}
3698		error = bus_dmamap_create(rxr->ptag,
3699		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3700		if (error) {
3701			device_printf(dev,
3702			    "Unable to create RX packet DMA maps\n");
3703			goto fail;
3704		}
3705	}
3706
3707	return (0);
3708
3709fail:
3710	/* Frees all, but can handle partial completion */
3711	igb_free_receive_structures(adapter);
3712	return (error);
3713}
3714
3715
3716static void
3717igb_free_receive_ring(struct rx_ring *rxr)
3718{
3719	struct	adapter		*adapter;
3720	struct igb_rx_buf	*rxbuf;
3721	int i;
3722
3723	adapter = rxr->adapter;
3724	for (i = 0; i < adapter->num_rx_desc; i++) {
3725		rxbuf = &rxr->rx_buffers[i];
3726		if (rxbuf->m_head != NULL) {
3727			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3728			    BUS_DMASYNC_POSTREAD);
3729			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3730			rxbuf->m_head->m_flags |= M_PKTHDR;
3731			m_freem(rxbuf->m_head);
3732		}
3733		if (rxbuf->m_pack != NULL) {
3734			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3735			    BUS_DMASYNC_POSTREAD);
3736			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3737			rxbuf->m_pack->m_flags |= M_PKTHDR;
3738			m_freem(rxbuf->m_pack);
3739		}
3740		rxbuf->m_head = NULL;
3741		rxbuf->m_pack = NULL;
3742	}
3743}
3744
3745
3746/*********************************************************************
3747 *
3748 *  Initialize a receive ring and its buffers.
3749 *
3750 **********************************************************************/
3751static int
3752igb_setup_receive_ring(struct rx_ring *rxr)
3753{
3754	struct	adapter		*adapter;
3755	struct  ifnet		*ifp;
3756	device_t		dev;
3757	struct igb_rx_buf	*rxbuf;
3758	bus_dma_segment_t	pseg[1], hseg[1];
3759	struct lro_ctrl		*lro = &rxr->lro;
3760	int			rsize, nsegs, error = 0;
3761
3762	adapter = rxr->adapter;
3763	dev = adapter->dev;
3764	ifp = adapter->ifp;
3765
3766	/* Clear the ring contents */
3767	IGB_RX_LOCK(rxr);
3768	rsize = roundup2(adapter->num_rx_desc *
3769	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3770	bzero((void *)rxr->rx_base, rsize);
3771
3772	/*
3773	** Free current RX buffer structures and their mbufs
3774	*/
3775	igb_free_receive_ring(rxr);
3776
3777	/* Configure for header split? */
3778	if (igb_header_split)
3779		rxr->hdr_split = TRUE;
3780
3781        /* Now replenish the ring mbufs */
3782	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3783		struct mbuf	*mh, *mp;
3784
3785		rxbuf = &rxr->rx_buffers[j];
3786		if (rxr->hdr_split == FALSE)
3787			goto skip_head;
3788
3789		/* First the header */
3790		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3791		if (rxbuf->m_head == NULL) {
3792			error = ENOBUFS;
3793                        goto fail;
3794		}
3795		m_adj(rxbuf->m_head, ETHER_ALIGN);
3796		mh = rxbuf->m_head;
3797		mh->m_len = mh->m_pkthdr.len = MHLEN;
3798		mh->m_flags |= M_PKTHDR;
3799		/* Get the memory mapping */
3800		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3801		    rxbuf->hmap, rxbuf->m_head, hseg,
3802		    &nsegs, BUS_DMA_NOWAIT);
3803		if (error != 0) /* Nothing elegant to do here */
3804                        goto fail;
3805		bus_dmamap_sync(rxr->htag,
3806		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3807		/* Update descriptor */
3808		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3809
3810skip_head:
3811		/* Now the payload cluster */
3812		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3813		    M_PKTHDR, adapter->rx_mbuf_sz);
3814		if (rxbuf->m_pack == NULL) {
3815			error = ENOBUFS;
3816                        goto fail;
3817		}
3818		mp = rxbuf->m_pack;
3819		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3820		/* Get the memory mapping */
3821		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3822		    rxbuf->pmap, mp, pseg,
3823		    &nsegs, BUS_DMA_NOWAIT);
3824		if (error != 0)
3825                        goto fail;
3826		bus_dmamap_sync(rxr->ptag,
3827		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
3828		/* Update descriptor */
3829		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3830        }
3831
3832	/* Setup our descriptor indices */
3833	rxr->next_to_check = 0;
3834	rxr->next_to_refresh = 0;
3835	rxr->lro_enabled = FALSE;
3836	rxr->rx_split_packets = 0;
3837	rxr->rx_bytes = 0;
3838
3839	rxr->fmp = NULL;
3840	rxr->lmp = NULL;
3841	rxr->discard = FALSE;
3842
3843	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3844	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3845
3846	/*
3847	** Now set up the LRO interface, we
3848	** also only do head split when LRO
3849	** is enabled, since so often they
3850	** are undesireable in similar setups.
3851	*/
3852	if (ifp->if_capenable & IFCAP_LRO) {
3853		int err = tcp_lro_init(lro);
3854		if (err) {
3855			device_printf(dev, "LRO Initialization failed!\n");
3856			goto fail;
3857		}
3858		INIT_DEBUGOUT("RX LRO Initialized\n");
3859		rxr->lro_enabled = TRUE;
3860		lro->ifp = adapter->ifp;
3861	}
3862
3863	IGB_RX_UNLOCK(rxr);
3864	return (0);
3865
3866fail:
3867	igb_free_receive_ring(rxr);
3868	IGB_RX_UNLOCK(rxr);
3869	return (error);
3870}
3871
3872/*********************************************************************
3873 *
3874 *  Initialize all receive rings.
3875 *
3876 **********************************************************************/
3877static int
3878igb_setup_receive_structures(struct adapter *adapter)
3879{
3880	struct rx_ring *rxr = adapter->rx_rings;
3881	int i;
3882
3883	for (i = 0; i < adapter->num_queues; i++, rxr++)
3884		if (igb_setup_receive_ring(rxr))
3885			goto fail;
3886
3887	return (0);
3888fail:
3889	/*
3890	 * Free RX buffers allocated so far, we will only handle
3891	 * the rings that completed, the failing case will have
3892	 * cleaned up for itself. 'i' is the endpoint.
3893	 */
3894	for (int j = 0; j > i; ++j) {
3895		rxr = &adapter->rx_rings[i];
3896		igb_free_receive_ring(rxr);
3897	}
3898
3899	return (ENOBUFS);
3900}
3901
3902/*********************************************************************
3903 *
3904 *  Enable receive unit.
3905 *
3906 **********************************************************************/
3907static void
3908igb_initialize_receive_units(struct adapter *adapter)
3909{
3910	struct rx_ring	*rxr = adapter->rx_rings;
3911	struct ifnet	*ifp = adapter->ifp;
3912	struct e1000_hw *hw = &adapter->hw;
3913	u32		rctl, rxcsum, psize, srrctl = 0;
3914
3915	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3916
3917	/*
3918	 * Make sure receives are disabled while setting
3919	 * up the descriptor ring
3920	 */
3921	rctl = E1000_READ_REG(hw, E1000_RCTL);
3922	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3923
3924	/*
3925	** Set up for header split
3926	*/
3927	if (rxr->hdr_split) {
3928		/* Use a standard mbuf for the header */
3929		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3930		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3931	} else
3932		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3933
3934	/*
3935	** Set up for jumbo frames
3936	*/
3937	if (ifp->if_mtu > ETHERMTU) {
3938		rctl |= E1000_RCTL_LPE;
3939		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3940		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3941
3942		/* Set maximum packet len */
3943		psize = adapter->max_frame_size;
3944		/* are we on a vlan? */
3945		if (adapter->ifp->if_vlantrunk != NULL)
3946			psize += VLAN_TAG_SIZE;
3947		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3948	} else {
3949		rctl &= ~E1000_RCTL_LPE;
3950		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3951		rctl |= E1000_RCTL_SZ_2048;
3952	}
3953
3954	/* Setup the Base and Length of the Rx Descriptor Rings */
3955	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3956		u64 bus_addr = rxr->rxdma.dma_paddr;
3957		u32 rxdctl;
3958
3959		E1000_WRITE_REG(hw, E1000_RDLEN(i),
3960		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3961		E1000_WRITE_REG(hw, E1000_RDBAH(i),
3962		    (uint32_t)(bus_addr >> 32));
3963		E1000_WRITE_REG(hw, E1000_RDBAL(i),
3964		    (uint32_t)bus_addr);
3965		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3966		/* Enable this Queue */
3967		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3968		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3969		rxdctl &= 0xFFF00000;
3970		rxdctl |= IGB_RX_PTHRESH;
3971		rxdctl |= IGB_RX_HTHRESH << 8;
3972		rxdctl |= IGB_RX_WTHRESH << 16;
3973		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3974	}
3975
3976	/*
3977	** Setup for RX MultiQueue
3978	*/
3979	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3980	if (adapter->num_queues >1) {
3981		u32 random[10], mrqc, shift = 0;
3982		union igb_reta {
3983			u32 dword;
3984			u8  bytes[4];
3985		} reta;
3986
3987		arc4rand(&random, sizeof(random), 0);
3988		if (adapter->hw.mac.type == e1000_82575)
3989			shift = 6;
3990		/* Warning FM follows */
3991		for (int i = 0; i < 128; i++) {
3992			reta.bytes[i & 3] =
3993			    (i % adapter->num_queues) << shift;
3994			if ((i & 3) == 3)
3995				E1000_WRITE_REG(hw,
3996				    E1000_RETA(i >> 2), reta.dword);
3997		}
3998		/* Now fill in hash table */
3999		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4000		for (int i = 0; i < 10; i++)
4001			E1000_WRITE_REG_ARRAY(hw,
4002			    E1000_RSSRK(0), i, random[i]);
4003
4004		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4005		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4006		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4007		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4008		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4009		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4010		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4011		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4012
4013		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4014
4015		/*
4016		** NOTE: Receive Full-Packet Checksum Offload
4017		** is mutually exclusive with Multiqueue. However
4018		** this is not the same as TCP/IP checksums which
4019		** still work.
4020		*/
4021		rxcsum |= E1000_RXCSUM_PCSD;
4022#if __FreeBSD_version >= 800000
4023		/* For SCTP Offload */
4024		if ((hw->mac.type == e1000_82576)
4025		    && (ifp->if_capenable & IFCAP_RXCSUM))
4026			rxcsum |= E1000_RXCSUM_CRCOFL;
4027#endif
4028	} else {
4029		/* Non RSS setup */
4030		if (ifp->if_capenable & IFCAP_RXCSUM) {
4031			rxcsum |= E1000_RXCSUM_IPPCSE;
4032#if __FreeBSD_version >= 800000
4033			if (adapter->hw.mac.type == e1000_82576)
4034				rxcsum |= E1000_RXCSUM_CRCOFL;
4035#endif
4036		} else
4037			rxcsum &= ~E1000_RXCSUM_TUOFL;
4038	}
4039	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4040
4041	/* Setup the Receive Control Register */
4042	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4043	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4044		   E1000_RCTL_RDMTS_HALF |
4045		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4046	/* Strip CRC bytes. */
4047	rctl |= E1000_RCTL_SECRC;
4048	/* Make sure VLAN Filters are off */
4049	rctl &= ~E1000_RCTL_VFE;
4050	/* Don't store bad packets */
4051	rctl &= ~E1000_RCTL_SBP;
4052
4053	/* Enable Receives */
4054	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4055
4056	/*
4057	 * Setup the HW Rx Head and Tail Descriptor Pointers
4058	 *   - needs to be after enable
4059	 */
4060	for (int i = 0; i < adapter->num_queues; i++) {
4061		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4062		E1000_WRITE_REG(hw, E1000_RDT(i),
4063		     adapter->num_rx_desc - 1);
4064	}
4065	return;
4066}
4067
4068/*********************************************************************
4069 *
4070 *  Free receive rings.
4071 *
4072 **********************************************************************/
4073static void
4074igb_free_receive_structures(struct adapter *adapter)
4075{
4076	struct rx_ring *rxr = adapter->rx_rings;
4077
4078	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4079		struct lro_ctrl	*lro = &rxr->lro;
4080		igb_free_receive_buffers(rxr);
4081		tcp_lro_free(lro);
4082		igb_dma_free(adapter, &rxr->rxdma);
4083	}
4084
4085	free(adapter->rx_rings, M_DEVBUF);
4086}
4087
4088/*********************************************************************
4089 *
4090 *  Free receive ring data structures.
4091 *
4092 **********************************************************************/
4093static void
4094igb_free_receive_buffers(struct rx_ring *rxr)
4095{
4096	struct adapter		*adapter = rxr->adapter;
4097	struct igb_rx_buf	*rxbuf;
4098	int i;
4099
4100	INIT_DEBUGOUT("free_receive_structures: begin");
4101
4102	/* Cleanup any existing buffers */
4103	if (rxr->rx_buffers != NULL) {
4104		for (i = 0; i < adapter->num_rx_desc; i++) {
4105			rxbuf = &rxr->rx_buffers[i];
4106			if (rxbuf->m_head != NULL) {
4107				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4108				    BUS_DMASYNC_POSTREAD);
4109				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4110				rxbuf->m_head->m_flags |= M_PKTHDR;
4111				m_freem(rxbuf->m_head);
4112			}
4113			if (rxbuf->m_pack != NULL) {
4114				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4115				    BUS_DMASYNC_POSTREAD);
4116				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4117				rxbuf->m_pack->m_flags |= M_PKTHDR;
4118				m_freem(rxbuf->m_pack);
4119			}
4120			rxbuf->m_head = NULL;
4121			rxbuf->m_pack = NULL;
4122			if (rxbuf->hmap != NULL) {
4123				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4124				rxbuf->hmap = NULL;
4125			}
4126			if (rxbuf->pmap != NULL) {
4127				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4128				rxbuf->pmap = NULL;
4129			}
4130		}
4131		if (rxr->rx_buffers != NULL) {
4132			free(rxr->rx_buffers, M_DEVBUF);
4133			rxr->rx_buffers = NULL;
4134		}
4135	}
4136
4137	if (rxr->htag != NULL) {
4138		bus_dma_tag_destroy(rxr->htag);
4139		rxr->htag = NULL;
4140	}
4141	if (rxr->ptag != NULL) {
4142		bus_dma_tag_destroy(rxr->ptag);
4143		rxr->ptag = NULL;
4144	}
4145}
4146
4147static __inline void
4148igb_rx_discard(struct rx_ring *rxr, int i)
4149{
4150	struct adapter		*adapter = rxr->adapter;
4151	struct igb_rx_buf	*rbuf;
4152	struct mbuf             *mh, *mp;
4153
4154	rbuf = &rxr->rx_buffers[i];
4155	if (rxr->fmp != NULL) {
4156		rxr->fmp->m_flags |= M_PKTHDR;
4157		m_freem(rxr->fmp);
4158		rxr->fmp = NULL;
4159		rxr->lmp = NULL;
4160	}
4161
4162	mh = rbuf->m_head;
4163	mp = rbuf->m_pack;
4164
4165	/* Reuse loaded DMA map and just update mbuf chain */
4166	mh->m_len = MHLEN;
4167	mh->m_flags |= M_PKTHDR;
4168	mh->m_next = NULL;
4169
4170	mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4171	mp->m_data = mp->m_ext.ext_buf;
4172	mp->m_next = NULL;
4173	return;
4174}
4175
4176static __inline void
4177igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4178{
4179
4180	/*
4181	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4182	 * should be computed by hardware. Also it should not have VLAN tag in
4183	 * ethernet header.
4184	 */
4185	if (rxr->lro_enabled &&
4186	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4187	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4188	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4189	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4190	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4191	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4192		/*
4193		 * Send to the stack if:
4194		 **  - LRO not enabled, or
4195		 **  - no LRO resources, or
4196		 **  - lro enqueue fails
4197		 */
4198		if (rxr->lro.lro_cnt != 0)
4199			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4200				return;
4201	}
4202	IGB_RX_UNLOCK(rxr);
4203	(*ifp->if_input)(ifp, m);
4204	IGB_RX_LOCK(rxr);
4205}
4206
4207/*********************************************************************
4208 *
4209 *  This routine executes in interrupt context. It replenishes
4210 *  the mbufs in the descriptor and sends data which has been
4211 *  dma'ed into host memory to upper layer.
4212 *
4213 *  We loop at most count times if count is > 0, or until done if
4214 *  count < 0.
4215 *
4216 *  Return TRUE if more to clean, FALSE otherwise
4217 *********************************************************************/
4218static bool
4219igb_rxeof(struct igb_queue *que, int count, int *done)
4220{
4221	struct adapter		*adapter = que->adapter;
4222	struct rx_ring		*rxr = que->rxr;
4223	struct ifnet		*ifp = adapter->ifp;
4224	struct lro_ctrl		*lro = &rxr->lro;
4225	struct lro_entry	*queued;
4226	int			i, processed = 0, rxdone = 0;
4227	u32			ptype, staterr = 0;
4228	union e1000_adv_rx_desc	*cur;
4229
4230	IGB_RX_LOCK(rxr);
4231	/* Sync the ring. */
4232	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4233	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4234
4235	/* Main clean loop */
4236	for (i = rxr->next_to_check; count != 0;) {
4237		struct mbuf		*sendmp, *mh, *mp;
4238		struct igb_rx_buf	*rxbuf;
4239		u16			hlen, plen, hdr, vtag;
4240		bool			eop = FALSE;
4241
4242		cur = &rxr->rx_base[i];
4243		staterr = le32toh(cur->wb.upper.status_error);
4244		if ((staterr & E1000_RXD_STAT_DD) == 0)
4245			break;
4246		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4247			break;
4248		count--;
4249		sendmp = mh = mp = NULL;
4250		cur->wb.upper.status_error = 0;
4251		rxbuf = &rxr->rx_buffers[i];
4252		plen = le16toh(cur->wb.upper.length);
4253		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4254		vtag = le16toh(cur->wb.upper.vlan);
4255		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4256		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4257
4258		/* Make sure all segments of a bad packet are discarded */
4259		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4260		    (rxr->discard)) {
4261			ifp->if_ierrors++;
4262			++rxr->rx_discarded;
4263			if (!eop) /* Catch subsequent segs */
4264				rxr->discard = TRUE;
4265			else
4266				rxr->discard = FALSE;
4267			igb_rx_discard(rxr, i);
4268			goto next_desc;
4269		}
4270
4271		/*
4272		** The way the hardware is configured to
4273		** split, it will ONLY use the header buffer
4274		** when header split is enabled, otherwise we
4275		** get normal behavior, ie, both header and
4276		** payload are DMA'd into the payload buffer.
4277		**
4278		** The fmp test is to catch the case where a
4279		** packet spans multiple descriptors, in that
4280		** case only the first header is valid.
4281		*/
4282		if (rxr->hdr_split && rxr->fmp == NULL) {
4283			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4284			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4285			if (hlen > IGB_HDR_BUF)
4286				hlen = IGB_HDR_BUF;
4287			/* Handle the header mbuf */
4288			mh = rxr->rx_buffers[i].m_head;
4289			mh->m_len = hlen;
4290			/* clear buf info for refresh */
4291			rxbuf->m_head = NULL;
4292			/*
4293			** Get the payload length, this
4294			** could be zero if its a small
4295			** packet.
4296			*/
4297			if (plen > 0) {
4298				mp = rxr->rx_buffers[i].m_pack;
4299				mp->m_len = plen;
4300				mh->m_next = mp;
4301				/* clear buf info for refresh */
4302				rxbuf->m_pack = NULL;
4303				rxr->rx_split_packets++;
4304			}
4305		} else {
4306			/*
4307			** Either no header split, or a
4308			** secondary piece of a fragmented
4309			** split packet.
4310			*/
4311			mh = rxr->rx_buffers[i].m_pack;
4312			mh->m_len = plen;
4313			/* clear buf info for refresh */
4314			rxbuf->m_pack = NULL;
4315		}
4316
4317		++processed; /* So we know when to refresh */
4318
4319		/* Initial frame - setup */
4320		if (rxr->fmp == NULL) {
4321			mh->m_pkthdr.len = mh->m_len;
4322			/* Store the first mbuf */
4323			rxr->fmp = mh;
4324			rxr->lmp = mh;
4325			if (mp != NULL) {
4326				/* Add payload if split */
4327				mh->m_pkthdr.len += mp->m_len;
4328				rxr->lmp = mh->m_next;
4329			}
4330		} else {
4331			/* Chain mbuf's together */
4332			rxr->lmp->m_next = mh;
4333			rxr->lmp = rxr->lmp->m_next;
4334			rxr->fmp->m_pkthdr.len += mh->m_len;
4335		}
4336
4337		if (eop) {
4338			rxr->fmp->m_pkthdr.rcvif = ifp;
4339			ifp->if_ipackets++;
4340			rxr->rx_packets++;
4341			/* capture data for AIM */
4342			rxr->packets++;
4343			rxr->bytes += rxr->fmp->m_pkthdr.len;
4344			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4345
4346			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4347				igb_rx_checksum(staterr, rxr->fmp, ptype);
4348
4349			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4350			    (staterr & E1000_RXD_STAT_VP) != 0) {
4351				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4352				rxr->fmp->m_flags |= M_VLANTAG;
4353			}
4354#if __FreeBSD_version >= 800000
4355			rxr->fmp->m_pkthdr.flowid = que->msix;
4356			rxr->fmp->m_flags |= M_FLOWID;
4357#endif
4358			sendmp = rxr->fmp;
4359			/* Make sure to set M_PKTHDR. */
4360			sendmp->m_flags |= M_PKTHDR;
4361			rxr->fmp = NULL;
4362			rxr->lmp = NULL;
4363		}
4364
4365next_desc:
4366		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4367		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4368
4369		/* Advance our pointers to the next descriptor. */
4370		if (++i == adapter->num_rx_desc)
4371			i = 0;
4372		/*
4373		** Send to the stack or LRO
4374		*/
4375		if (sendmp != NULL) {
4376			rxr->next_to_check = i;
4377			igb_rx_input(rxr, ifp, sendmp, ptype);
4378			i = rxr->next_to_check;
4379			rxdone++;
4380		}
4381
4382		/* Every 8 descriptors we go to refresh mbufs */
4383		if (processed == 8) {
4384                        igb_refresh_mbufs(rxr, i);
4385                        processed = 0;
4386		}
4387	}
4388
4389	/* Catch any remainders */
4390	if (processed != 0) {
4391		igb_refresh_mbufs(rxr, i);
4392		processed = 0;
4393	}
4394
4395	rxr->next_to_check = i;
4396
4397	/*
4398	 * Flush any outstanding LRO work
4399	 */
4400	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4401		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4402		tcp_lro_flush(lro, queued);
4403	}
4404
4405	IGB_RX_UNLOCK(rxr);
4406
4407	if (done != NULL)
4408		*done = rxdone;
4409
4410	/*
4411	** We still have cleaning to do?
4412	** Schedule another interrupt if so.
4413	*/
4414	if ((staterr & E1000_RXD_STAT_DD) != 0)
4415		return (TRUE);
4416
4417	return (FALSE);
4418}
4419
4420/*********************************************************************
4421 *
4422 *  Verify that the hardware indicated that the checksum is valid.
4423 *  Inform the stack about the status of checksum so that stack
4424 *  doesn't spend time verifying the checksum.
4425 *
4426 *********************************************************************/
4427static void
4428igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4429{
4430	u16 status = (u16)staterr;
4431	u8  errors = (u8) (staterr >> 24);
4432	int sctp;
4433
4434	/* Ignore Checksum bit is set */
4435	if (status & E1000_RXD_STAT_IXSM) {
4436		mp->m_pkthdr.csum_flags = 0;
4437		return;
4438	}
4439
4440	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4441	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4442		sctp = 1;
4443	else
4444		sctp = 0;
4445	if (status & E1000_RXD_STAT_IPCS) {
4446		/* Did it pass? */
4447		if (!(errors & E1000_RXD_ERR_IPE)) {
4448			/* IP Checksum Good */
4449			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4450			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4451		} else
4452			mp->m_pkthdr.csum_flags = 0;
4453	}
4454
4455	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4456		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4457#if __FreeBSD_version >= 800000
4458		if (sctp) /* reassign */
4459			type = CSUM_SCTP_VALID;
4460#endif
4461		/* Did it pass? */
4462		if (!(errors & E1000_RXD_ERR_TCPE)) {
4463			mp->m_pkthdr.csum_flags |= type;
4464			if (sctp == 0)
4465				mp->m_pkthdr.csum_data = htons(0xffff);
4466		}
4467	}
4468	return;
4469}
4470
4471/*
4472 * This routine is run via an vlan
4473 * config EVENT
4474 */
4475static void
4476igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4477{
4478	struct adapter	*adapter = ifp->if_softc;
4479	u32		index, bit;
4480
4481	if (ifp->if_softc !=  arg)   /* Not our event */
4482		return;
4483
4484	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4485                return;
4486
4487	index = (vtag >> 5) & 0x7F;
4488	bit = vtag & 0x1F;
4489	igb_shadow_vfta[index] |= (1 << bit);
4490	++adapter->num_vlans;
4491	/* Re-init to load the changes */
4492	igb_init(adapter);
4493}
4494
4495/*
4496 * This routine is run via an vlan
4497 * unconfig EVENT
4498 */
4499static void
4500igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4501{
4502	struct adapter	*adapter = ifp->if_softc;
4503	u32		index, bit;
4504
4505	if (ifp->if_softc !=  arg)
4506		return;
4507
4508	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4509                return;
4510
4511	index = (vtag >> 5) & 0x7F;
4512	bit = vtag & 0x1F;
4513	igb_shadow_vfta[index] &= ~(1 << bit);
4514	--adapter->num_vlans;
4515	/* Re-init to load the changes */
4516	igb_init(adapter);
4517}
4518
4519static void
4520igb_setup_vlan_hw_support(struct adapter *adapter)
4521{
4522	struct e1000_hw *hw = &adapter->hw;
4523	u32             reg;
4524
4525	/*
4526	** We get here thru init_locked, meaning
4527	** a soft reset, this has already cleared
4528	** the VFTA and other state, so if there
4529	** have been no vlan's registered do nothing.
4530	*/
4531	if (adapter->num_vlans == 0)
4532                return;
4533
4534	/*
4535	** A soft reset zero's out the VFTA, so
4536	** we need to repopulate it now.
4537	*/
4538	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4539                if (igb_shadow_vfta[i] != 0) {
4540			if (hw->mac.type == e1000_vfadapt)
4541				e1000_vfta_set_vf(hw, igb_shadow_vfta[i], TRUE);
4542			else
4543				E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4544                           	 i, igb_shadow_vfta[i]);
4545		}
4546
4547	if (hw->mac.type == e1000_vfadapt)
4548		e1000_rlpml_set_vf(hw,
4549		    adapter->max_frame_size + VLAN_TAG_SIZE);
4550	else {
4551		reg = E1000_READ_REG(hw, E1000_CTRL);
4552		reg |= E1000_CTRL_VME;
4553		E1000_WRITE_REG(hw, E1000_CTRL, reg);
4554
4555		/* Enable the Filter Table */
4556		reg = E1000_READ_REG(hw, E1000_RCTL);
4557		reg &= ~E1000_RCTL_CFIEN;
4558		reg |= E1000_RCTL_VFE;
4559		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4560
4561		/* Update the frame size */
4562		E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4563		    adapter->max_frame_size + VLAN_TAG_SIZE);
4564	}
4565}
4566
4567static void
4568igb_enable_intr(struct adapter *adapter)
4569{
4570	/* With RSS set up what to auto clear */
4571	if (adapter->msix_mem) {
4572		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4573		    adapter->eims_mask);
4574		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4575		    adapter->eims_mask);
4576		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4577		    adapter->eims_mask);
4578		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4579		    E1000_IMS_LSC);
4580	} else {
4581		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4582		    IMS_ENABLE_MASK);
4583	}
4584	E1000_WRITE_FLUSH(&adapter->hw);
4585
4586	return;
4587}
4588
4589static void
4590igb_disable_intr(struct adapter *adapter)
4591{
4592	if (adapter->msix_mem) {
4593		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4594		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4595	}
4596	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4597	E1000_WRITE_FLUSH(&adapter->hw);
4598	return;
4599}
4600
4601/*
4602 * Bit of a misnomer, what this really means is
4603 * to enable OS management of the system... aka
4604 * to disable special hardware management features
4605 */
4606static void
4607igb_init_manageability(struct adapter *adapter)
4608{
4609	if (adapter->has_manage) {
4610		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4611		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4612
4613		/* disable hardware interception of ARP */
4614		manc &= ~(E1000_MANC_ARP_EN);
4615
4616                /* enable receiving management packets to the host */
4617		manc |= E1000_MANC_EN_MNG2HOST;
4618		manc2h |= 1 << 5;  /* Mng Port 623 */
4619		manc2h |= 1 << 6;  /* Mng Port 664 */
4620		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4621		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4622	}
4623}
4624
4625/*
4626 * Give control back to hardware management
4627 * controller if there is one.
4628 */
4629static void
4630igb_release_manageability(struct adapter *adapter)
4631{
4632	if (adapter->has_manage) {
4633		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4634
4635		/* re-enable hardware interception of ARP */
4636		manc |= E1000_MANC_ARP_EN;
4637		manc &= ~E1000_MANC_EN_MNG2HOST;
4638
4639		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4640	}
4641}
4642
4643/*
4644 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4645 * For ASF and Pass Through versions of f/w this means that
4646 * the driver is loaded.
4647 *
4648 */
4649static void
4650igb_get_hw_control(struct adapter *adapter)
4651{
4652	u32 ctrl_ext;
4653
4654	if (adapter->hw.mac.type == e1000_vfadapt)
4655		return;
4656
4657	/* Let firmware know the driver has taken over */
4658	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4659	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4660	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4661}
4662
4663/*
4664 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4665 * For ASF and Pass Through versions of f/w this means that the
4666 * driver is no longer loaded.
4667 *
4668 */
4669static void
4670igb_release_hw_control(struct adapter *adapter)
4671{
4672	u32 ctrl_ext;
4673
4674	if (adapter->hw.mac.type == e1000_vfadapt)
4675		return;
4676
4677	/* Let firmware taken over control of h/w */
4678	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4679	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4680	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4681}
4682
4683static int
4684igb_is_valid_ether_addr(uint8_t *addr)
4685{
4686	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4687
4688	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4689		return (FALSE);
4690	}
4691
4692	return (TRUE);
4693}
4694
4695
4696/*
4697 * Enable PCI Wake On Lan capability
4698 */
4699static void
4700igb_enable_wakeup(device_t dev)
4701{
4702	u16     cap, status;
4703	u8      id;
4704
4705	/* First find the capabilities pointer*/
4706	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4707	/* Read the PM Capabilities */
4708	id = pci_read_config(dev, cap, 1);
4709	if (id != PCIY_PMG)     /* Something wrong */
4710		return;
4711	/* OK, we have the power capabilities, so
4712	   now get the status register */
4713	cap += PCIR_POWER_STATUS;
4714	status = pci_read_config(dev, cap, 2);
4715	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4716	pci_write_config(dev, cap, status, 2);
4717	return;
4718}
4719
4720static void
4721igb_led_func(void *arg, int onoff)
4722{
4723	struct adapter	*adapter = arg;
4724
4725	IGB_CORE_LOCK(adapter);
4726	if (onoff) {
4727		e1000_setup_led(&adapter->hw);
4728		e1000_led_on(&adapter->hw);
4729	} else {
4730		e1000_led_off(&adapter->hw);
4731		e1000_cleanup_led(&adapter->hw);
4732	}
4733	IGB_CORE_UNLOCK(adapter);
4734}
4735
4736/**********************************************************************
4737 *
4738 *  Update the board statistics counters.
4739 *
4740 **********************************************************************/
4741static void
4742igb_update_stats_counters(struct adapter *adapter)
4743{
4744	struct ifnet		*ifp;
4745        struct e1000_hw		*hw = &adapter->hw;
4746	struct e1000_hw_stats	*stats;
4747
4748	/*
4749	** The virtual function adapter has only a
4750	** small controlled set of stats, do only
4751	** those and return.
4752	*/
4753	if (adapter->hw.mac.type == e1000_vfadapt) {
4754		igb_update_vf_stats_counters(adapter);
4755		return;
4756	}
4757
4758	stats = (struct e1000_hw_stats	*)adapter->stats;
4759
4760	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4761	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4762		stats->symerrs +=
4763		    E1000_READ_REG(hw,E1000_SYMERRS);
4764		stats->sec += E1000_READ_REG(hw, E1000_SEC);
4765	}
4766
4767	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4768	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4769	stats->scc += E1000_READ_REG(hw, E1000_SCC);
4770	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4771
4772	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4773	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4774	stats->colc += E1000_READ_REG(hw, E1000_COLC);
4775	stats->dc += E1000_READ_REG(hw, E1000_DC);
4776	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4777	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4778	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4779	stats->xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC);
4780	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
4781	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
4782	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
4783	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
4784	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
4785	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
4786	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
4787	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
4788	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
4789	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
4790	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
4791	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
4792
4793	/* For the 64-bit byte counters the low dword must be read first. */
4794	/* Both registers clear on the read of the high dword */
4795
4796	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
4797	  ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
4798	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
4799	  ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32) ;
4800
4801	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
4802	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
4803	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
4804	stats->roc += E1000_READ_REG(hw, E1000_ROC);
4805	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
4806
4807	stats->tor += E1000_READ_REG(hw, E1000_TORH);
4808	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
4809
4810	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
4811	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
4812	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
4813	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
4814	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
4815	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
4816	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
4817	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
4818	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
4819	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
4820
4821	/* Interrupt Counts */
4822
4823	stats->iac += E1000_READ_REG(hw, E1000_IAC);
4824	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
4825	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
4826	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
4827	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
4828	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
4829	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
4830	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
4831	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
4832
4833	/* Host to Card Statistics */
4834
4835	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
4836	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
4837	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
4838	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
4839	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
4840	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
4841	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
4842	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
4843	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
4844	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
4845	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
4846	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
4847	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
4848	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
4849
4850	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
4851	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
4852	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
4853	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
4854	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
4855	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
4856	ifp = adapter->ifp;
4857
4858	ifp = adapter->ifp;
4859	ifp->if_collisions = stats->colc;
4860
4861	/* Rx Errors */
4862	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
4863	    stats->crcerrs + stats->algnerrc +
4864	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
4865
4866	/* Tx Errors */
4867	ifp->if_oerrors = stats->ecol +
4868	    stats->latecol + adapter->watchdog_events;
4869
4870	/* Driver specific counters */
4871	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
4872	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
4873	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
4874	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
4875	adapter->packet_buf_alloc_tx =
4876	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
4877	adapter->packet_buf_alloc_rx =
4878	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff);
4879}
4880
4881
4882/**********************************************************************
4883 *
4884 *  Initialize the VF board statistics counters.
4885 *
4886 **********************************************************************/
4887static void
4888igb_vf_init_stats(struct adapter *adapter)
4889{
4890        struct e1000_hw *hw = &adapter->hw;
4891	struct e1000_vf_stats	*stats;
4892
4893	stats = (struct e1000_vf_stats	*)adapter->stats;
4894
4895        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
4896        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
4897        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
4898        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
4899        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
4900}
4901
4902/**********************************************************************
4903 *
4904 *  Update the VF board statistics counters.
4905 *
4906 **********************************************************************/
4907static void
4908igb_update_vf_stats_counters(struct adapter *adapter)
4909{
4910	struct e1000_hw *hw = &adapter->hw;
4911	struct e1000_vf_stats	*stats;
4912
4913	if (adapter->link_speed == 0)
4914		return;
4915
4916	stats = (struct e1000_vf_stats	*)adapter->stats;
4917
4918	UPDATE_VF_REG(E1000_VFGPRC,
4919	    stats->last_gprc, stats->gprc);
4920	UPDATE_VF_REG(E1000_VFGORC,
4921	    stats->last_gorc, stats->gorc);
4922	UPDATE_VF_REG(E1000_VFGPTC,
4923	    stats->last_gptc, stats->gptc);
4924	UPDATE_VF_REG(E1000_VFGOTC,
4925	    stats->last_gotc, stats->gotc);
4926	UPDATE_VF_REG(E1000_VFMPRC,
4927	    stats->last_mprc, stats->mprc);
4928}
4929
4930
4931/*
4932 * Add sysctl variables, one per statistic, to the system.
4933 */
4934static void
4935igb_add_hw_stats(struct adapter *adapter)
4936{
4937
4938	device_t dev = adapter->dev;
4939
4940	struct tx_ring *txr = adapter->tx_rings;
4941	struct rx_ring *rxr = adapter->rx_rings;
4942
4943	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4944	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
4945	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
4946	struct e1000_hw_stats *stats = &adapter->stats;
4947
4948	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
4949	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
4950
4951#define QUEUE_NAME_LEN 32
4952	char namebuf[QUEUE_NAME_LEN];
4953
4954	/* Driver Statistics */
4955	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
4956			CTLFLAG_RD, &adapter->link_irq, 0,
4957			"Link MSIX IRQ Handled");
4958	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
4959			CTLFLAG_RD, &adapter->dropped_pkts,
4960			"Driver dropped packets");
4961	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
4962			CTLFLAG_RD, &adapter->no_tx_dma_setup,
4963			"Driver tx dma failure in xmit");
4964
4965	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
4966			CTLFLAG_RD, &adapter->device_control,
4967			"Device Control Register");
4968	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
4969			CTLFLAG_RD, &adapter->rx_control,
4970			"Receiver Control Register");
4971	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
4972			CTLFLAG_RD, &adapter->int_mask,
4973			"Interrupt Mask");
4974	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
4975			CTLFLAG_RD, &adapter->eint_mask,
4976			"Extended Interrupt Mask");
4977	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
4978			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
4979			"Transmit Buffer Packet Allocation");
4980	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
4981			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
4982			"Receive Buffer Packet Allocation");
4983	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
4984			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
4985			"Flow Control High Watermark");
4986	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
4987			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
4988			"Flow Control Low Watermark");
4989
4990	for (int i = 0; i < adapter->num_queues; i++, txr++) {
4991		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
4992		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4993					    CTLFLAG_RD, NULL, "Queue Name");
4994		queue_list = SYSCTL_CHILDREN(queue_node);
4995
4996		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "txd_head",
4997				CTLFLAG_RD,
4998				E1000_READ_REG(&adapter->hw,
4999				E1000_TDH(txr->me)), 0,
5000				"Transmit Descriptor Head");
5001		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "txd_tail",
5002				CTLFLAG_RD,
5003				E1000_READ_REG(&adapter->hw,
5004				E1000_TDT(txr->me))), 0,
5005				"Transmit Descriptor Tail");
5006		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5007				CTLFLAG_RD, &txr->no_desc_avail,
5008				"Queue No Descriptor Available");
5009		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5010				CTLFLAG_RD, &txr->tx_packets,
5011				"Queue Packets Transmitted");
5012	}
5013
5014	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5015		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5016		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5017					    CTLFLAG_RD, NULL, "Queue Name");
5018		queue_list = SYSCTL_CHILDREN(queue_node);
5019
5020		struct lro_ctrl *lro = &rxr->lro;
5021
5022		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5023		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5024					    CTLFLAG_RD, NULL, "Queue Name");
5025		queue_list = SYSCTL_CHILDREN(queue_node);
5026
5027		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rxd_head",
5028				CTLFLAG_RD, &rxr->rdh, 0,
5029				"Receive Descriptor Head");
5030		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rxd_tail",
5031				CTLFLAG_RD, &rxr->rdt, 0,
5032				"Receive Descriptor Tail");
5033		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5034				CTLFLAG_RD, &rxr->rx_packets,
5035				"Queue Packets Received");
5036		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5037				CTLFLAG_RD, &rxr->rx_bytes,
5038				"Queue Bytes Received");
5039		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5040				CTLFLAG_RD, &lro->lro_queued, 0,
5041				"LRO Queued");
5042		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5043				CTLFLAG_RD, &lro->lro_flushed, 0,
5044				"LRO Flushed");
5045	}
5046
5047	/* MAC stats get the own sub node */
5048
5049	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5050				    CTLFLAG_RD, NULL, "MAC Statistics");
5051	stat_list = SYSCTL_CHILDREN(stat_node);
5052
5053	/*
5054	** VF adapter has a very limited set of stats
5055	** since its not managing the metal, so to speak.
5056	*/
5057	if (adapter->hw.mac.type == e1000_vfadapt) {
5058	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5059			CTLFLAG_RD, &adapter->stats.gprc,
5060			"Good Packets Received");
5061	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5062			CTLFLAG_RD, &adapter->stats.gptc,
5063			"Good Packets Transmitted");
5064 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5065 			CTLFLAG_RD, &adapter->stats.gorc,
5066 			"Good Octets Received");
5067 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd",
5068 			CTLFLAG_RD, &adapter->stats.gotc,
5069 			"Good Octest Transmitted");
5070	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5071			CTLFLAG_RD, &adapter->stats.mprc,
5072			"Multicast Packets Received");
5073		return;
5074	}
5075
5076	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5077			CTLFLAG_RD, &stats->ecol,
5078			"Excessive collisions");
5079	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5080			CTLFLAG_RD, &stats->scc,
5081			"Single collisions");
5082	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5083			CTLFLAG_RD, &stats->mcc,
5084			"Multiple collisions");
5085	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5086			CTLFLAG_RD, &stats->latecol,
5087			"Late collisions");
5088	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5089			CTLFLAG_RD, &stats->colc,
5090			"Collision Count");
5091	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5092			CTLFLAG_RD, &adapter->stats.symerrs,
5093			"Symbol Errors");
5094	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5095			CTLFLAG_RD, &adapter->stats.sec,
5096			"Sequence Errors");
5097	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5098			CTLFLAG_RD, &adapter->stats.dc,
5099			"Defer Count");
5100	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5101			CTLFLAG_RD, &adapter->stats.mpc,
5102			"Missed Packets");
5103	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5104			CTLFLAG_RD, &adapter->stats.rnbc,
5105			"Receive No Buffers");
5106	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5107			CTLFLAG_RD, &adapter->stats.ruc,
5108			"Receive Undersize");
5109	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5110			CTLFLAG_RD, &adapter->stats.rfc,
5111			"Fragmented Packets Received ");
5112	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5113			CTLFLAG_RD, &adapter->stats.roc,
5114			"Oversized Packets Received");
5115	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5116			CTLFLAG_RD, &adapter->stats.rjc,
5117			"Recevied Jabber");
5118	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5119			CTLFLAG_RD, &adapter->stats.rxerrc,
5120			"Receive Errors");
5121	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5122			CTLFLAG_RD, &adapter->stats.crcerrs,
5123			"CRC errors");
5124	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5125			CTLFLAG_RD, &adapter->stats.algnerrc,
5126			"Alignment Errors");
5127	/* On 82575 these are collision counts */
5128	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5129			CTLFLAG_RD, &adapter->stats.cexterr,
5130			"Collision/Carrier extension errors");
5131	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_overruns",
5132			CTLFLAG_RD, &adapter->rx_overruns,
5133			"RX overruns");
5134	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "watchdog_timeouts",
5135			CTLFLAG_RD, &adapter->watchdog_events,
5136			"Watchdog timeouts");
5137	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5138			CTLFLAG_RD, &adapter->stats.xonrxc,
5139			"XON Received");
5140	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5141			CTLFLAG_RD, &adapter->stats.xontxc,
5142			"XON Transmitted");
5143	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5144			CTLFLAG_RD, &adapter->stats.xoffrxc,
5145			"XOFF Received");
5146	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5147			CTLFLAG_RD, &adapter->stats.xofftxc,
5148			"XOFF Transmitted");
5149	/* Packet Reception Stats */
5150	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5151			CTLFLAG_RD, &adapter->stats.tpr,
5152			"Total Packets Received ");
5153	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5154			CTLFLAG_RD, &adapter->stats.gprc,
5155			"Good Packets Received");
5156	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5157			CTLFLAG_RD, &adapter->stats.bprc,
5158			"Broadcast Packets Received");
5159	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5160			CTLFLAG_RD, &adapter->stats.mprc,
5161			"Multicast Packets Received");
5162	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5163			CTLFLAG_RD, &adapter->stats.prc64,
5164			"64 byte frames received ");
5165	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5166			CTLFLAG_RD, &adapter->stats.prc127,
5167			"65-127 byte frames received");
5168	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5169			CTLFLAG_RD, &adapter->stats.prc255,
5170			"128-255 byte frames received");
5171	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5172			CTLFLAG_RD, &adapter->stats.prc511,
5173			"256-511 byte frames received");
5174	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5175			CTLFLAG_RD, &adapter->stats.prc1023,
5176			"512-1023 byte frames received");
5177	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5178			CTLFLAG_RD, &adapter->stats.prc1522,
5179			"1023-1522 byte frames received");
5180 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5181 			CTLFLAG_RD, &adapter->stats.gorc,
5182 			"Good Octets Received");
5183
5184	/* Packet Transmission Stats */
5185 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd",
5186 			CTLFLAG_RD, &adapter->stats.gotc,
5187 			"Good Octest Transmitted");
5188	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5189			CTLFLAG_RD, &adapter->stats.tpt,
5190			"Total Packets Transmitted");
5191	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5192			CTLFLAG_RD, &adapter->stats.gptc,
5193			"Good Packets Transmitted");
5194	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5195			CTLFLAG_RD, &adapter->stats.bptc,
5196			"Broadcast Packets Transmitted");
5197	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5198			CTLFLAG_RD, &adapter->stats.mptc,
5199			"Multicast Packets Transmitted");
5200	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5201			CTLFLAG_RD, &adapter->stats.ptc64,
5202			"64 byte frames transmitted ");
5203	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5204			CTLFLAG_RD, &adapter->stats.ptc127,
5205			"65-127 byte frames transmitted");
5206	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5207			CTLFLAG_RD, &adapter->stats.ptc255,
5208			"128-255 byte frames transmitted");
5209	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5210			CTLFLAG_RD, &adapter->stats.ptc511,
5211			"256-511 byte frames transmitted");
5212	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5213			CTLFLAG_RD, &adapter->stats.ptc1023,
5214			"512-1023 byte frames transmitted");
5215	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5216			CTLFLAG_RD, &adapter->stats.ptc1522,
5217			"1024-1522 byte frames transmitted");
5218	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5219			CTLFLAG_RD, &adapter->stats.tsctc,
5220			"TSO Contexts Transmitted");
5221	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5222			CTLFLAG_RD, &adapter->stats.tsctfc,
5223			"TSO Contexts Failed");
5224
5225
5226	/* Interrupt Stats */
5227
5228	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5229				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5230	int_list = SYSCTL_CHILDREN(int_node);
5231
5232	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5233			CTLFLAG_RD, &adapter->stats.iac,
5234			"Interrupt Assertion Count");
5235
5236	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5237			CTLFLAG_RD, &adapter->stats.icrxptc,
5238			"Interrupt Cause Rx Pkt Timer Expire Count");
5239
5240	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5241			CTLFLAG_RD, &adapter->stats.icrxatc,
5242			"Interrupt Cause Rx Abs Timer Expire Count");
5243
5244	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5245			CTLFLAG_RD, &adapter->stats.ictxptc,
5246			"Interrupt Cause Tx Pkt Timer Expire Count");
5247
5248	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5249			CTLFLAG_RD, &adapter->stats.ictxatc,
5250			"Interrupt Cause Tx Abs Timer Expire Count");
5251
5252	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5253			CTLFLAG_RD, &adapter->stats.ictxqec,
5254			"Interrupt Cause Tx Queue Empty Count");
5255
5256	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5257			CTLFLAG_RD, &adapter->stats.ictxqmtc,
5258			"Interrupt Cause Tx Queue Min Thresh Count");
5259
5260	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5261			CTLFLAG_RD, &adapter->stats.icrxdmtc,
5262			"Interrupt Cause Rx Desc Min Thresh Count");
5263
5264	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5265			CTLFLAG_RD, &adapter->stats.icrxoc,
5266			"Interrupt Cause Receiver Overrun Count");
5267
5268	/* Host to Card Stats */
5269
5270	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5271				    CTLFLAG_RD, NULL,
5272				    "Host to Card Statistics");
5273
5274	host_list = SYSCTL_CHILDREN(host_node);
5275
5276	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5277			CTLFLAG_RD, &adapter->stats.cbtmpc,
5278			"Circuit Breaker Tx Packet Count");
5279
5280	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5281			CTLFLAG_RD, &adapter->stats.htdpmc,
5282			"Host Transmit Discarded Packets");
5283
5284	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5285			CTLFLAG_RD, &adapter->stats.rpthc,
5286			"Rx Packets To Host");
5287
5288	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5289			CTLFLAG_RD, &adapter->stats.cbrmpc,
5290			"Circuit Breaker Rx Packet Count");
5291
5292	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5293			CTLFLAG_RD, &adapter->stats.cbrdpc,
5294			"Circuit Breaker Rx Dropped Count");
5295
5296	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5297			CTLFLAG_RD, &adapter->stats.hgptc,
5298			"Host Good Packets Tx Count");
5299
5300	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5301			CTLFLAG_RD, &adapter->stats.htcbdpc,
5302			"Host Tx Circuit Breaker Dropped Count");
5303
5304	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5305			CTLFLAG_RD, &adapter->stats.hgorc,
5306			"Host Good Octets Received Count");
5307
5308	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5309			CTLFLAG_RD, &adapter->stats.hgotc,
5310			"Host Good Octets Transmit Count");
5311
5312	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5313			CTLFLAG_RD, &adapter->stats.lenerrs,
5314			"Length Errors");
5315
5316	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5317			CTLFLAG_RD, &adapter->stats.scvpc,
5318			"SerDes/SGMII Code Violation Pkt Count");
5319
5320	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5321			CTLFLAG_RD, &adapter->stats.hrmpc,
5322			"Header Redirection Missed Packet Count");
5323}
5324
5325
5326/**********************************************************************
5327 *
5328 *  This routine provides a way to dump out the adapter eeprom,
5329 *  often a useful debug/service tool. This only dumps the first
5330 *  32 words, stuff that matters is in that extent.
5331 *
5332 **********************************************************************/
5333static int
5334igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5335{
5336	struct adapter *adapter;
5337	int error;
5338	int result;
5339
5340	result = -1;
5341	error = sysctl_handle_int(oidp, &result, 0, req);
5342
5343	if (error || !req->newptr)
5344		return (error);
5345
5346	/*
5347	 * This value will cause a hex dump of the
5348	 * first 32 16-bit words of the EEPROM to
5349	 * the screen.
5350	 */
5351	if (result == 1) {
5352		adapter = (struct adapter *)arg1;
5353		igb_print_nvm_info(adapter);
5354        }
5355
5356	return (error);
5357}
5358
5359static void
5360igb_print_nvm_info(struct adapter *adapter)
5361{
5362	u16	eeprom_data;
5363	int	i, j, row = 0;
5364
5365	/* Its a bit crude, but it gets the job done */
5366	printf("\nInterface EEPROM Dump:\n");
5367	printf("Offset\n0x0000  ");
5368	for (i = 0, j = 0; i < 32; i++, j++) {
5369		if (j == 8) { /* Make the offset block */
5370			j = 0; ++row;
5371			printf("\n0x00%x0  ",row);
5372		}
5373		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5374		printf("%04x ", eeprom_data);
5375	}
5376	printf("\n");
5377}
5378
5379static void
5380igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5381	const char *description, int *limit, int value)
5382{
5383	*limit = value;
5384	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5385	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5386	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5387}
5388