if_igb.c revision 210968
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 210968 2010-08-06 20:55:49Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_altq.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <sys/pcpu.h>
61#include <sys/smp.h>
62#include <machine/smp.h>
63#include <machine/bus.h>
64#include <machine/resource.h>
65
66#include <net/bpf.h>
67#include <net/ethernet.h>
68#include <net/if.h>
69#include <net/if_arp.h>
70#include <net/if_dl.h>
71#include <net/if_media.h>
72
73#include <net/if_types.h>
74#include <net/if_vlan_var.h>
75
76#include <netinet/in_systm.h>
77#include <netinet/in.h>
78#include <netinet/if_ether.h>
79#include <netinet/ip.h>
80#include <netinet/ip6.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_lro.h>
83#include <netinet/udp.h>
84
85#include <machine/in_cksum.h>
86#include <dev/led/led.h>
87#include <dev/pci/pcivar.h>
88#include <dev/pci/pcireg.h>
89
90#include "e1000_api.h"
91#include "e1000_82575.h"
92#include "if_igb.h"
93
94/*********************************************************************
95 *  Set this to one to display debug statistics
96 *********************************************************************/
97int	igb_display_debug_stats = 0;
98
99/*********************************************************************
100 *  Driver version:
101 *********************************************************************/
102char igb_driver_version[] = "version - 2.0.1";
103
104
105/*********************************************************************
106 *  PCI Device ID Table
107 *
108 *  Used by probe to select devices to load on
109 *  Last field stores an index into e1000_strings
110 *  Last entry must be all 0s
111 *
112 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113 *********************************************************************/
114
115static igb_vendor_info_t igb_vendor_info_array[] =
116{
117	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
137						PCI_ANY_ID, PCI_ANY_ID, 0},
138	/* required last entry */
139	{ 0, 0, 0, 0, 0}
140};
141
142/*********************************************************************
143 *  Table of branding strings for all supported NICs.
144 *********************************************************************/
145
146static char *igb_strings[] = {
147	"Intel(R) PRO/1000 Network Connection"
148};
149
150/*********************************************************************
151 *  Function prototypes
152 *********************************************************************/
153static int	igb_probe(device_t);
154static int	igb_attach(device_t);
155static int	igb_detach(device_t);
156static int	igb_shutdown(device_t);
157static int	igb_suspend(device_t);
158static int	igb_resume(device_t);
159static void	igb_start(struct ifnet *);
160static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
161#if __FreeBSD_version >= 800000
162static int	igb_mq_start(struct ifnet *, struct mbuf *);
163static int	igb_mq_start_locked(struct ifnet *,
164		    struct tx_ring *, struct mbuf *);
165static void	igb_qflush(struct ifnet *);
166#endif
167static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
168static void	igb_init(void *);
169static void	igb_init_locked(struct adapter *);
170static void	igb_stop(void *);
171static void	igb_media_status(struct ifnet *, struct ifmediareq *);
172static int	igb_media_change(struct ifnet *);
173static void	igb_identify_hardware(struct adapter *);
174static int	igb_allocate_pci_resources(struct adapter *);
175static int	igb_allocate_msix(struct adapter *);
176static int	igb_allocate_legacy(struct adapter *);
177static int	igb_setup_msix(struct adapter *);
178static void	igb_free_pci_resources(struct adapter *);
179static void	igb_local_timer(void *);
180static void	igb_reset(struct adapter *);
181static void	igb_setup_interface(device_t, struct adapter *);
182static int	igb_allocate_queues(struct adapter *);
183static void	igb_configure_queues(struct adapter *);
184
185static int	igb_allocate_transmit_buffers(struct tx_ring *);
186static void	igb_setup_transmit_structures(struct adapter *);
187static void	igb_setup_transmit_ring(struct tx_ring *);
188static void	igb_initialize_transmit_units(struct adapter *);
189static void	igb_free_transmit_structures(struct adapter *);
190static void	igb_free_transmit_buffers(struct tx_ring *);
191
192static int	igb_allocate_receive_buffers(struct rx_ring *);
193static int	igb_setup_receive_structures(struct adapter *);
194static int	igb_setup_receive_ring(struct rx_ring *);
195static void	igb_initialize_receive_units(struct adapter *);
196static void	igb_free_receive_structures(struct adapter *);
197static void	igb_free_receive_buffers(struct rx_ring *);
198static void	igb_free_receive_ring(struct rx_ring *);
199
200static void	igb_enable_intr(struct adapter *);
201static void	igb_disable_intr(struct adapter *);
202static void	igb_update_stats_counters(struct adapter *);
203static bool	igb_txeof(struct tx_ring *);
204
205static __inline	void igb_rx_discard(struct rx_ring *, int);
206static __inline void igb_rx_input(struct rx_ring *,
207		    struct ifnet *, struct mbuf *, u32);
208
209static bool	igb_rxeof(struct igb_queue *, int, int *);
210static void	igb_rx_checksum(u32, struct mbuf *, u32);
211static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
212static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
213static void	igb_set_promisc(struct adapter *);
214static void	igb_disable_promisc(struct adapter *);
215static void	igb_set_multi(struct adapter *);
216static void	igb_update_link_status(struct adapter *);
217static void	igb_refresh_mbufs(struct rx_ring *, int);
218
219static void	igb_register_vlan(void *, struct ifnet *, u16);
220static void	igb_unregister_vlan(void *, struct ifnet *, u16);
221static void	igb_setup_vlan_hw_support(struct adapter *);
222
223static int	igb_xmit(struct tx_ring *, struct mbuf **);
224static int	igb_dma_malloc(struct adapter *, bus_size_t,
225		    struct igb_dma_alloc *, int);
226static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
227static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
228static void	igb_print_nvm_info(struct adapter *);
229static int 	igb_is_valid_ether_addr(u8 *);
230static void     igb_add_hw_stats(struct adapter *);
231
232static void	igb_vf_init_stats(struct adapter *);
233static void	igb_update_vf_stats_counters(struct adapter *);
234
235/* Management and WOL Support */
236static void	igb_init_manageability(struct adapter *);
237static void	igb_release_manageability(struct adapter *);
238static void     igb_get_hw_control(struct adapter *);
239static void     igb_release_hw_control(struct adapter *);
240static void     igb_enable_wakeup(device_t);
241static void     igb_led_func(void *, int);
242
243static int	igb_irq_fast(void *);
244static void	igb_add_rx_process_limit(struct adapter *, const char *,
245		    const char *, int *, int);
246static void	igb_handle_que(void *context, int pending);
247static void	igb_handle_link(void *context, int pending);
248
249/* These are MSIX only irq handlers */
250static void	igb_msix_que(void *);
251static void	igb_msix_link(void *);
252
253#ifdef DEVICE_POLLING
254static poll_handler_t igb_poll;
255#endif /* POLLING */
256
257/*********************************************************************
258 *  FreeBSD Device Interface Entry Points
259 *********************************************************************/
260
261static device_method_t igb_methods[] = {
262	/* Device interface */
263	DEVMETHOD(device_probe, igb_probe),
264	DEVMETHOD(device_attach, igb_attach),
265	DEVMETHOD(device_detach, igb_detach),
266	DEVMETHOD(device_shutdown, igb_shutdown),
267	DEVMETHOD(device_suspend, igb_suspend),
268	DEVMETHOD(device_resume, igb_resume),
269	{0, 0}
270};
271
272static driver_t igb_driver = {
273	"igb", igb_methods, sizeof(struct adapter),
274};
275
276static devclass_t igb_devclass;
277DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
278MODULE_DEPEND(igb, pci, 1, 1, 1);
279MODULE_DEPEND(igb, ether, 1, 1, 1);
280
281/*********************************************************************
282 *  Tunable default values.
283 *********************************************************************/
284
285/* Descriptor defaults */
286static int igb_rxd = IGB_DEFAULT_RXD;
287static int igb_txd = IGB_DEFAULT_TXD;
288TUNABLE_INT("hw.igb.rxd", &igb_rxd);
289TUNABLE_INT("hw.igb.txd", &igb_txd);
290
291/*
292** AIM: Adaptive Interrupt Moderation
293** which means that the interrupt rate
294** is varied over time based on the
295** traffic for that interrupt vector
296*/
297static int igb_enable_aim = TRUE;
298TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
299
300/*
301 * MSIX should be the default for best performance,
302 * but this allows it to be forced off for testing.
303 */
304static int igb_enable_msix = 1;
305TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
306
307/*
308 * Header split has seemed to be beneficial in
309 * many circumstances tested, however there have
310 * been some stability issues, so the default is
311 * off.
312 */
313static bool igb_header_split = FALSE;
314TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
315
316/*
317** This will autoconfigure based on
318** the number of CPUs if left at 0.
319*/
320static int igb_num_queues = 0;
321TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
322
323/* How many packets rxeof tries to clean at a time */
324static int igb_rx_process_limit = 100;
325TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
326
327/* Flow control setting - default to FULL */
328static int igb_fc_setting = e1000_fc_full;
329TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
330
331/*
332** Shadow VFTA table, this is needed because
333** the real filter table gets cleared during
334** a soft reset and the driver needs to be able
335** to repopulate it.
336*/
337static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
338
339
340/*********************************************************************
341 *  Device identification routine
342 *
343 *  igb_probe determines if the driver should be loaded on
344 *  adapter based on PCI vendor/device id of the adapter.
345 *
346 *  return BUS_PROBE_DEFAULT on success, positive on failure
347 *********************************************************************/
348
349static int
350igb_probe(device_t dev)
351{
352	char		adapter_name[60];
353	uint16_t	pci_vendor_id = 0;
354	uint16_t	pci_device_id = 0;
355	uint16_t	pci_subvendor_id = 0;
356	uint16_t	pci_subdevice_id = 0;
357	igb_vendor_info_t *ent;
358
359	INIT_DEBUGOUT("igb_probe: begin");
360
361	pci_vendor_id = pci_get_vendor(dev);
362	if (pci_vendor_id != IGB_VENDOR_ID)
363		return (ENXIO);
364
365	pci_device_id = pci_get_device(dev);
366	pci_subvendor_id = pci_get_subvendor(dev);
367	pci_subdevice_id = pci_get_subdevice(dev);
368
369	ent = igb_vendor_info_array;
370	while (ent->vendor_id != 0) {
371		if ((pci_vendor_id == ent->vendor_id) &&
372		    (pci_device_id == ent->device_id) &&
373
374		    ((pci_subvendor_id == ent->subvendor_id) ||
375		    (ent->subvendor_id == PCI_ANY_ID)) &&
376
377		    ((pci_subdevice_id == ent->subdevice_id) ||
378		    (ent->subdevice_id == PCI_ANY_ID))) {
379			sprintf(adapter_name, "%s %s",
380				igb_strings[ent->index],
381				igb_driver_version);
382			device_set_desc_copy(dev, adapter_name);
383			return (BUS_PROBE_DEFAULT);
384		}
385		ent++;
386	}
387
388	return (ENXIO);
389}
390
391/*********************************************************************
392 *  Device initialization routine
393 *
394 *  The attach entry point is called when the driver is being loaded.
395 *  This routine identifies the type of hardware, allocates all resources
396 *  and initializes the hardware.
397 *
398 *  return 0 on success, positive on failure
399 *********************************************************************/
400
401static int
402igb_attach(device_t dev)
403{
404	struct adapter	*adapter;
405	int		error = 0;
406	u16		eeprom_data;
407
408	INIT_DEBUGOUT("igb_attach: begin");
409
410	adapter = device_get_softc(dev);
411	adapter->dev = adapter->osdep.dev = dev;
412	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
413
414	/* SYSCTL stuff */
415	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
416	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
417	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
418	    igb_sysctl_nvm_info, "I", "NVM Information");
419
420	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
421	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
422	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
423	    &igb_fc_setting, 0, "Flow Control");
424
425	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
426	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
427	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
428	    &igb_enable_aim, 1, "Interrupt Moderation");
429
430	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
431
432	/* Determine hardware and mac info */
433	igb_identify_hardware(adapter);
434
435	/* Setup PCI resources */
436	if (igb_allocate_pci_resources(adapter)) {
437		device_printf(dev, "Allocation of PCI resources failed\n");
438		error = ENXIO;
439		goto err_pci;
440	}
441
442	/* Do Shared Code initialization */
443	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
444		device_printf(dev, "Setup of Shared code failed\n");
445		error = ENXIO;
446		goto err_pci;
447	}
448
449	e1000_get_bus_info(&adapter->hw);
450
451	/* Sysctls for limiting the amount of work done in the taskqueue */
452	igb_add_rx_process_limit(adapter, "rx_processing_limit",
453	    "max number of rx packets to process", &adapter->rx_process_limit,
454	    igb_rx_process_limit);
455
456	/*
457	 * Validate number of transmit and receive descriptors. It
458	 * must not exceed hardware maximum, and must be multiple
459	 * of E1000_DBA_ALIGN.
460	 */
461	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
462	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
463		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
464		    IGB_DEFAULT_TXD, igb_txd);
465		adapter->num_tx_desc = IGB_DEFAULT_TXD;
466	} else
467		adapter->num_tx_desc = igb_txd;
468	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
469	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
470		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
471		    IGB_DEFAULT_RXD, igb_rxd);
472		adapter->num_rx_desc = IGB_DEFAULT_RXD;
473	} else
474		adapter->num_rx_desc = igb_rxd;
475
476	adapter->hw.mac.autoneg = DO_AUTO_NEG;
477	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
478	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
479
480	/* Copper options */
481	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
482		adapter->hw.phy.mdix = AUTO_ALL_MODES;
483		adapter->hw.phy.disable_polarity_correction = FALSE;
484		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
485	}
486
487	/*
488	 * Set the frame limits assuming
489	 * standard ethernet sized frames.
490	 */
491	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
492	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
493
494	/*
495	** Allocate and Setup Queues
496	*/
497	if (igb_allocate_queues(adapter)) {
498		error = ENOMEM;
499		goto err_pci;
500	}
501
502	/* Allocate the appropriate stats memory */
503	if (adapter->hw.mac.type == e1000_vfadapt) {
504		adapter->stats =
505		    (struct e1000_vf_stats *)malloc(sizeof \
506		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
507		igb_vf_init_stats(adapter);
508	} else
509		adapter->stats =
510		    (struct e1000_hw_stats *)malloc(sizeof \
511		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
512
513	/*
514	** Start from a known state, this is
515	** important in reading the nvm and
516	** mac from that.
517	*/
518	e1000_reset_hw(&adapter->hw);
519
520	/* Make sure we have a good EEPROM before we read from it */
521	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
522		/*
523		** Some PCI-E parts fail the first check due to
524		** the link being in sleep state, call it again,
525		** if it fails a second time its a real issue.
526		*/
527		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
528			device_printf(dev,
529			    "The EEPROM Checksum Is Not Valid\n");
530			error = EIO;
531			goto err_late;
532		}
533	}
534
535	/*
536	** Copy the permanent MAC address out of the EEPROM
537	*/
538	if (e1000_read_mac_addr(&adapter->hw) < 0) {
539		device_printf(dev, "EEPROM read error while reading MAC"
540		    " address\n");
541		error = EIO;
542		goto err_late;
543	}
544	/* Check its sanity */
545	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
546		device_printf(dev, "Invalid MAC address\n");
547		error = EIO;
548		goto err_late;
549	}
550
551	/*
552	** Configure Interrupts
553	*/
554	if ((adapter->msix > 1) && (igb_enable_msix))
555		error = igb_allocate_msix(adapter);
556	else /* MSI or Legacy */
557		error = igb_allocate_legacy(adapter);
558	if (error)
559		goto err_late;
560
561	/* Setup OS specific network interface */
562	igb_setup_interface(dev, adapter);
563
564	/* Now get a good starting state */
565	igb_reset(adapter);
566
567	/* Initialize statistics */
568	igb_update_stats_counters(adapter);
569
570	adapter->hw.mac.get_link_status = 1;
571	igb_update_link_status(adapter);
572
573	/* Indicate SOL/IDER usage */
574	if (e1000_check_reset_block(&adapter->hw))
575		device_printf(dev,
576		    "PHY reset is blocked due to SOL/IDER session.\n");
577
578	/* Determine if we have to control management hardware */
579	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
580
581	/*
582	 * Setup Wake-on-Lan
583	 */
584	/* APME bit in EEPROM is mapped to WUC.APME */
585	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
586	if (eeprom_data)
587		adapter->wol = E1000_WUFC_MAG;
588
589	/* Register for VLAN events */
590	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
591	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
592	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
593	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
594
595	igb_add_hw_stats(adapter);
596
597	/* Tell the stack that the interface is not active */
598	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
599
600	adapter->led_dev = led_create(igb_led_func, adapter,
601	    device_get_nameunit(dev));
602
603	INIT_DEBUGOUT("igb_attach: end");
604
605	return (0);
606
607err_late:
608	igb_free_transmit_structures(adapter);
609	igb_free_receive_structures(adapter);
610	igb_release_hw_control(adapter);
611err_pci:
612	igb_free_pci_resources(adapter);
613	IGB_CORE_LOCK_DESTROY(adapter);
614
615	return (error);
616}
617
618/*********************************************************************
619 *  Device removal routine
620 *
621 *  The detach entry point is called when the driver is being removed.
622 *  This routine stops the adapter and deallocates all the resources
623 *  that were allocated for driver operation.
624 *
625 *  return 0 on success, positive on failure
626 *********************************************************************/
627
628static int
629igb_detach(device_t dev)
630{
631	struct adapter	*adapter = device_get_softc(dev);
632	struct ifnet	*ifp = adapter->ifp;
633
634	INIT_DEBUGOUT("igb_detach: begin");
635
636	/* Make sure VLANS are not using driver */
637	if (adapter->ifp->if_vlantrunk != NULL) {
638		device_printf(dev,"Vlan in use, detach first\n");
639		return (EBUSY);
640	}
641
642	if (adapter->led_dev != NULL)
643		led_destroy(adapter->led_dev);
644
645#ifdef DEVICE_POLLING
646	if (ifp->if_capenable & IFCAP_POLLING)
647		ether_poll_deregister(ifp);
648#endif
649
650	IGB_CORE_LOCK(adapter);
651	adapter->in_detach = 1;
652	igb_stop(adapter);
653	IGB_CORE_UNLOCK(adapter);
654
655	e1000_phy_hw_reset(&adapter->hw);
656
657	/* Give control back to firmware */
658	igb_release_manageability(adapter);
659	igb_release_hw_control(adapter);
660
661	if (adapter->wol) {
662		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
663		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
664		igb_enable_wakeup(dev);
665	}
666
667	/* Unregister VLAN events */
668	if (adapter->vlan_attach != NULL)
669		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
670	if (adapter->vlan_detach != NULL)
671		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
672
673	ether_ifdetach(adapter->ifp);
674
675	callout_drain(&adapter->timer);
676
677	igb_free_pci_resources(adapter);
678	bus_generic_detach(dev);
679	if_free(ifp);
680
681	igb_free_transmit_structures(adapter);
682	igb_free_receive_structures(adapter);
683
684	IGB_CORE_LOCK_DESTROY(adapter);
685
686	return (0);
687}
688
689/*********************************************************************
690 *
691 *  Shutdown entry point
692 *
693 **********************************************************************/
694
695static int
696igb_shutdown(device_t dev)
697{
698	return igb_suspend(dev);
699}
700
701/*
702 * Suspend/resume device methods.
703 */
704static int
705igb_suspend(device_t dev)
706{
707	struct adapter *adapter = device_get_softc(dev);
708
709	IGB_CORE_LOCK(adapter);
710
711	igb_stop(adapter);
712
713        igb_release_manageability(adapter);
714	igb_release_hw_control(adapter);
715
716        if (adapter->wol) {
717                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
718                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
719                igb_enable_wakeup(dev);
720        }
721
722	IGB_CORE_UNLOCK(adapter);
723
724	return bus_generic_suspend(dev);
725}
726
727static int
728igb_resume(device_t dev)
729{
730	struct adapter *adapter = device_get_softc(dev);
731	struct ifnet *ifp = adapter->ifp;
732
733	IGB_CORE_LOCK(adapter);
734	igb_init_locked(adapter);
735	igb_init_manageability(adapter);
736
737	if ((ifp->if_flags & IFF_UP) &&
738	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
739		igb_start(ifp);
740
741	IGB_CORE_UNLOCK(adapter);
742
743	return bus_generic_resume(dev);
744}
745
746
747/*********************************************************************
748 *  Transmit entry point
749 *
750 *  igb_start is called by the stack to initiate a transmit.
751 *  The driver will remain in this routine as long as there are
752 *  packets to transmit and transmit resources are available.
753 *  In case resources are not available stack is notified and
754 *  the packet is requeued.
755 **********************************************************************/
756
757static void
758igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
759{
760	struct adapter	*adapter = ifp->if_softc;
761	struct mbuf	*m_head;
762
763	IGB_TX_LOCK_ASSERT(txr);
764
765	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
766	    IFF_DRV_RUNNING)
767		return;
768	if (!adapter->link_active)
769		return;
770
771	/* Call cleanup if number of TX descriptors low */
772	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
773		igb_txeof(txr);
774
775	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
776		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
777			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
778			break;
779		}
780		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
781		if (m_head == NULL)
782			break;
783		/*
784		 *  Encapsulation can modify our pointer, and or make it
785		 *  NULL on failure.  In that event, we can't requeue.
786		 */
787		if (igb_xmit(txr, &m_head)) {
788			if (m_head == NULL)
789				break;
790			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
791			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
792			break;
793		}
794
795		/* Send a copy of the frame to the BPF listener */
796		ETHER_BPF_MTAP(ifp, m_head);
797
798		/* Set watchdog on */
799		txr->watchdog_time = ticks;
800		txr->watchdog_check = TRUE;
801	}
802}
803
804/*
805 * Legacy TX driver routine, called from the
806 * stack, always uses tx[0], and spins for it.
807 * Should not be used with multiqueue tx
808 */
809static void
810igb_start(struct ifnet *ifp)
811{
812	struct adapter	*adapter = ifp->if_softc;
813	struct tx_ring	*txr = adapter->tx_rings;
814
815	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
816		IGB_TX_LOCK(txr);
817		igb_start_locked(txr, ifp);
818		IGB_TX_UNLOCK(txr);
819	}
820	return;
821}
822
823#if __FreeBSD_version >= 800000
824/*
825** Multiqueue Transmit driver
826**
827*/
828static int
829igb_mq_start(struct ifnet *ifp, struct mbuf *m)
830{
831	struct adapter		*adapter = ifp->if_softc;
832	struct igb_queue	*que;
833	struct tx_ring		*txr;
834	int 			i = 0, err = 0;
835
836	/* Which queue to use */
837	if ((m->m_flags & M_FLOWID) != 0)
838		i = m->m_pkthdr.flowid % adapter->num_queues;
839
840	txr = &adapter->tx_rings[i];
841	que = &adapter->queues[i];
842
843	if (IGB_TX_TRYLOCK(txr)) {
844		err = igb_mq_start_locked(ifp, txr, m);
845		IGB_TX_UNLOCK(txr);
846	} else {
847		err = drbr_enqueue(ifp, txr->br, m);
848		taskqueue_enqueue(que->tq, &que->que_task);
849	}
850
851	return (err);
852}
853
854static int
855igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
856{
857	struct adapter  *adapter = txr->adapter;
858        struct mbuf     *next;
859        int             err = 0, enq;
860
861	IGB_TX_LOCK_ASSERT(txr);
862
863	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
864	    IFF_DRV_RUNNING || adapter->link_active == 0) {
865		if (m != NULL)
866			err = drbr_enqueue(ifp, txr->br, m);
867		return (err);
868	}
869
870	/* Call cleanup if number of TX descriptors low */
871	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
872		igb_txeof(txr);
873
874	enq = 0;
875	if (m == NULL) {
876		next = drbr_dequeue(ifp, txr->br);
877	} else if (drbr_needs_enqueue(ifp, txr->br)) {
878		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
879			return (err);
880		next = drbr_dequeue(ifp, txr->br);
881	} else
882		next = m;
883
884	/* Process the queue */
885	while (next != NULL) {
886		if ((err = igb_xmit(txr, &next)) != 0) {
887			if (next != NULL)
888				err = drbr_enqueue(ifp, txr->br, next);
889			break;
890		}
891		enq++;
892		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
893		ETHER_BPF_MTAP(ifp, next);
894		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
895			break;
896		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
897			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
898			break;
899		}
900		next = drbr_dequeue(ifp, txr->br);
901	}
902	if (enq > 0) {
903		/* Set the watchdog */
904		txr->watchdog_check = TRUE;
905		txr->watchdog_time = ticks;
906	}
907	return (err);
908}
909
910/*
911** Flush all ring buffers
912*/
913static void
914igb_qflush(struct ifnet *ifp)
915{
916	struct adapter	*adapter = ifp->if_softc;
917	struct tx_ring	*txr = adapter->tx_rings;
918	struct mbuf	*m;
919
920	for (int i = 0; i < adapter->num_queues; i++, txr++) {
921		IGB_TX_LOCK(txr);
922		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
923			m_freem(m);
924		IGB_TX_UNLOCK(txr);
925	}
926	if_qflush(ifp);
927}
928#endif /* __FreeBSD_version >= 800000 */
929
930/*********************************************************************
931 *  Ioctl entry point
932 *
933 *  igb_ioctl is called when the user wants to configure the
934 *  interface.
935 *
936 *  return 0 on success, positive on failure
937 **********************************************************************/
938
939static int
940igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
941{
942	struct adapter	*adapter = ifp->if_softc;
943	struct ifreq *ifr = (struct ifreq *)data;
944#ifdef INET
945	struct ifaddr *ifa = (struct ifaddr *)data;
946#endif
947	int error = 0;
948
949	if (adapter->in_detach)
950		return (error);
951
952	switch (command) {
953	case SIOCSIFADDR:
954#ifdef INET
955		if (ifa->ifa_addr->sa_family == AF_INET) {
956			/*
957			 * XXX
958			 * Since resetting hardware takes a very long time
959			 * and results in link renegotiation we only
960			 * initialize the hardware only when it is absolutely
961			 * required.
962			 */
963			ifp->if_flags |= IFF_UP;
964			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
965				IGB_CORE_LOCK(adapter);
966				igb_init_locked(adapter);
967				IGB_CORE_UNLOCK(adapter);
968			}
969			if (!(ifp->if_flags & IFF_NOARP))
970				arp_ifinit(ifp, ifa);
971		} else
972#endif
973			error = ether_ioctl(ifp, command, data);
974		break;
975	case SIOCSIFMTU:
976	    {
977		int max_frame_size;
978
979		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
980
981		IGB_CORE_LOCK(adapter);
982		max_frame_size = 9234;
983		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
984		    ETHER_CRC_LEN) {
985			IGB_CORE_UNLOCK(adapter);
986			error = EINVAL;
987			break;
988		}
989
990		ifp->if_mtu = ifr->ifr_mtu;
991		adapter->max_frame_size =
992		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
993		igb_init_locked(adapter);
994		IGB_CORE_UNLOCK(adapter);
995		break;
996	    }
997	case SIOCSIFFLAGS:
998		IOCTL_DEBUGOUT("ioctl rcv'd:\
999		    SIOCSIFFLAGS (Set Interface Flags)");
1000		IGB_CORE_LOCK(adapter);
1001		if (ifp->if_flags & IFF_UP) {
1002			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1003				if ((ifp->if_flags ^ adapter->if_flags) &
1004				    (IFF_PROMISC | IFF_ALLMULTI)) {
1005					igb_disable_promisc(adapter);
1006					igb_set_promisc(adapter);
1007				}
1008			} else
1009				igb_init_locked(adapter);
1010		} else
1011			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1012				igb_stop(adapter);
1013		adapter->if_flags = ifp->if_flags;
1014		IGB_CORE_UNLOCK(adapter);
1015		break;
1016	case SIOCADDMULTI:
1017	case SIOCDELMULTI:
1018		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1019		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1020			IGB_CORE_LOCK(adapter);
1021			igb_disable_intr(adapter);
1022			igb_set_multi(adapter);
1023#ifdef DEVICE_POLLING
1024			if (!(ifp->if_capenable & IFCAP_POLLING))
1025#endif
1026				igb_enable_intr(adapter);
1027			IGB_CORE_UNLOCK(adapter);
1028		}
1029		break;
1030	case SIOCSIFMEDIA:
1031		/* Check SOL/IDER usage */
1032		IGB_CORE_LOCK(adapter);
1033		if (e1000_check_reset_block(&adapter->hw)) {
1034			IGB_CORE_UNLOCK(adapter);
1035			device_printf(adapter->dev, "Media change is"
1036			    " blocked due to SOL/IDER session.\n");
1037			break;
1038		}
1039		IGB_CORE_UNLOCK(adapter);
1040	case SIOCGIFMEDIA:
1041		IOCTL_DEBUGOUT("ioctl rcv'd: \
1042		    SIOCxIFMEDIA (Get/Set Interface Media)");
1043		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1044		break;
1045	case SIOCSIFCAP:
1046	    {
1047		int mask, reinit;
1048
1049		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1050		reinit = 0;
1051		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1052#ifdef DEVICE_POLLING
1053		if (mask & IFCAP_POLLING) {
1054			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1055				error = ether_poll_register(igb_poll, ifp);
1056				if (error)
1057					return (error);
1058				IGB_CORE_LOCK(adapter);
1059				igb_disable_intr(adapter);
1060				ifp->if_capenable |= IFCAP_POLLING;
1061				IGB_CORE_UNLOCK(adapter);
1062			} else {
1063				error = ether_poll_deregister(ifp);
1064				/* Enable interrupt even in error case */
1065				IGB_CORE_LOCK(adapter);
1066				igb_enable_intr(adapter);
1067				ifp->if_capenable &= ~IFCAP_POLLING;
1068				IGB_CORE_UNLOCK(adapter);
1069			}
1070		}
1071#endif
1072		if (mask & IFCAP_HWCSUM) {
1073			ifp->if_capenable ^= IFCAP_HWCSUM;
1074			reinit = 1;
1075		}
1076		if (mask & IFCAP_TSO4) {
1077			ifp->if_capenable ^= IFCAP_TSO4;
1078			reinit = 1;
1079		}
1080		if (mask & IFCAP_VLAN_HWTAGGING) {
1081			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1082			reinit = 1;
1083		}
1084		if (mask & IFCAP_VLAN_HWFILTER) {
1085			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1086			reinit = 1;
1087		}
1088		if (mask & IFCAP_LRO) {
1089			ifp->if_capenable ^= IFCAP_LRO;
1090			reinit = 1;
1091		}
1092		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1093			igb_init(adapter);
1094		VLAN_CAPABILITIES(ifp);
1095		break;
1096	    }
1097
1098	default:
1099		error = ether_ioctl(ifp, command, data);
1100		break;
1101	}
1102
1103	return (error);
1104}
1105
1106
1107/*********************************************************************
1108 *  Init entry point
1109 *
1110 *  This routine is used in two ways. It is used by the stack as
1111 *  init entry point in network interface structure. It is also used
1112 *  by the driver as a hw/sw initialization routine to get to a
1113 *  consistent state.
1114 *
1115 *  return 0 on success, positive on failure
1116 **********************************************************************/
1117
1118static void
1119igb_init_locked(struct adapter *adapter)
1120{
1121	struct ifnet	*ifp = adapter->ifp;
1122	device_t	dev = adapter->dev;
1123
1124	INIT_DEBUGOUT("igb_init: begin");
1125
1126	IGB_CORE_LOCK_ASSERT(adapter);
1127
1128	igb_disable_intr(adapter);
1129	callout_stop(&adapter->timer);
1130
1131	/* Get the latest mac address, User can use a LAA */
1132        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1133              ETHER_ADDR_LEN);
1134
1135	/* Put the address into the Receive Address Array */
1136	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1137
1138	igb_reset(adapter);
1139	igb_update_link_status(adapter);
1140
1141	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1142
1143        /* Use real VLAN Filter support? */
1144	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1145		if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1146			/* Use real VLAN Filter support */
1147			igb_setup_vlan_hw_support(adapter);
1148		else {
1149			u32 ctrl;
1150			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1151			ctrl |= E1000_CTRL_VME;
1152			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1153		}
1154	}
1155
1156	/* Set hardware offload abilities */
1157	ifp->if_hwassist = 0;
1158	if (ifp->if_capenable & IFCAP_TXCSUM) {
1159		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1160#if __FreeBSD_version >= 800000
1161		if (adapter->hw.mac.type == e1000_82576)
1162			ifp->if_hwassist |= CSUM_SCTP;
1163#endif
1164	}
1165
1166	if (ifp->if_capenable & IFCAP_TSO4)
1167		ifp->if_hwassist |= CSUM_TSO;
1168
1169	/* Configure for OS presence */
1170	igb_init_manageability(adapter);
1171
1172	/* Prepare transmit descriptors and buffers */
1173	igb_setup_transmit_structures(adapter);
1174	igb_initialize_transmit_units(adapter);
1175
1176	/* Setup Multicast table */
1177	igb_set_multi(adapter);
1178
1179	/*
1180	** Figure out the desired mbuf pool
1181	** for doing jumbo/packetsplit
1182	*/
1183	if (ifp->if_mtu > ETHERMTU)
1184		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1185	else
1186		adapter->rx_mbuf_sz = MCLBYTES;
1187
1188	/* Prepare receive descriptors and buffers */
1189	if (igb_setup_receive_structures(adapter)) {
1190		device_printf(dev, "Could not setup receive structures\n");
1191		return;
1192	}
1193	igb_initialize_receive_units(adapter);
1194
1195	/* Don't lose promiscuous settings */
1196	igb_set_promisc(adapter);
1197
1198	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1199	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1200
1201	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1202	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1203
1204	if (adapter->msix > 1) /* Set up queue routing */
1205		igb_configure_queues(adapter);
1206
1207	/* Set up VLAN tag offload and filter */
1208	igb_setup_vlan_hw_support(adapter);
1209
1210	/* this clears any pending interrupts */
1211	E1000_READ_REG(&adapter->hw, E1000_ICR);
1212#ifdef DEVICE_POLLING
1213	/*
1214	 * Only enable interrupts if we are not polling, make sure
1215	 * they are off otherwise.
1216	 */
1217	if (ifp->if_capenable & IFCAP_POLLING)
1218		igb_disable_intr(adapter);
1219	else
1220#endif /* DEVICE_POLLING */
1221	{
1222	igb_enable_intr(adapter);
1223	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1224	}
1225
1226	/* Don't reset the phy next time init gets called */
1227	adapter->hw.phy.reset_disable = TRUE;
1228}
1229
1230static void
1231igb_init(void *arg)
1232{
1233	struct adapter *adapter = arg;
1234
1235	IGB_CORE_LOCK(adapter);
1236	igb_init_locked(adapter);
1237	IGB_CORE_UNLOCK(adapter);
1238}
1239
1240
1241static void
1242igb_handle_que(void *context, int pending)
1243{
1244	struct igb_queue *que = context;
1245	struct adapter *adapter = que->adapter;
1246	struct tx_ring *txr = que->txr;
1247	struct ifnet	*ifp = adapter->ifp;
1248
1249	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1250		bool	more;
1251
1252		more = igb_rxeof(que, -1, NULL);
1253
1254		IGB_TX_LOCK(txr);
1255		if (igb_txeof(txr))
1256			more = TRUE;
1257#if __FreeBSD_version >= 800000
1258		if (!drbr_empty(ifp, txr->br))
1259			igb_mq_start_locked(ifp, txr, NULL);
1260#else
1261		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1262			igb_start_locked(txr, ifp);
1263#endif
1264		IGB_TX_UNLOCK(txr);
1265		if (more) {
1266			taskqueue_enqueue(que->tq, &que->que_task);
1267			return;
1268		}
1269	}
1270
1271#ifdef DEVICE_POLLING
1272	if (ifp->if_capenable & IFCAP_POLLING)
1273		return;
1274#endif
1275	/* Reenable this interrupt */
1276	if (que->eims)
1277		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1278	else
1279		igb_enable_intr(adapter);
1280}
1281
1282/* Deal with link in a sleepable context */
1283static void
1284igb_handle_link(void *context, int pending)
1285{
1286	struct adapter *adapter = context;
1287
1288	adapter->hw.mac.get_link_status = 1;
1289	igb_update_link_status(adapter);
1290}
1291
1292/*********************************************************************
1293 *
1294 *  MSI/Legacy Deferred
1295 *  Interrupt Service routine
1296 *
1297 *********************************************************************/
1298static int
1299igb_irq_fast(void *arg)
1300{
1301	struct adapter		*adapter = arg;
1302	struct igb_queue	*que = adapter->queues;
1303	u32			reg_icr;
1304
1305
1306	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1307
1308	/* Hot eject?  */
1309	if (reg_icr == 0xffffffff)
1310		return FILTER_STRAY;
1311
1312	/* Definitely not our interrupt.  */
1313	if (reg_icr == 0x0)
1314		return FILTER_STRAY;
1315
1316	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1317		return FILTER_STRAY;
1318
1319	/*
1320	 * Mask interrupts until the taskqueue is finished running.  This is
1321	 * cheap, just assume that it is needed.  This also works around the
1322	 * MSI message reordering errata on certain systems.
1323	 */
1324	igb_disable_intr(adapter);
1325	taskqueue_enqueue(que->tq, &que->que_task);
1326
1327	/* Link status change */
1328	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1329		taskqueue_enqueue(que->tq, &adapter->link_task);
1330
1331	if (reg_icr & E1000_ICR_RXO)
1332		adapter->rx_overruns++;
1333	return FILTER_HANDLED;
1334}
1335
1336#ifdef DEVICE_POLLING
1337/*********************************************************************
1338 *
1339 *  Legacy polling routine : if using this code you MUST be sure that
1340 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1341 *
1342 *********************************************************************/
1343#if __FreeBSD_version >= 800000
1344#define POLL_RETURN_COUNT(a) (a)
1345static int
1346#else
1347#define POLL_RETURN_COUNT(a)
1348static void
1349#endif
1350igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1351{
1352	struct adapter		*adapter = ifp->if_softc;
1353	struct igb_queue	*que = adapter->queues;
1354	struct tx_ring		*txr = adapter->tx_rings;
1355	u32			reg_icr, rx_done = 0;
1356	u32			loop = IGB_MAX_LOOP;
1357	bool			more;
1358
1359	IGB_CORE_LOCK(adapter);
1360	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1361		IGB_CORE_UNLOCK(adapter);
1362		return POLL_RETURN_COUNT(rx_done);
1363	}
1364
1365	if (cmd == POLL_AND_CHECK_STATUS) {
1366		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1367		/* Link status change */
1368		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1369			igb_handle_link(adapter, 0);
1370
1371		if (reg_icr & E1000_ICR_RXO)
1372			adapter->rx_overruns++;
1373	}
1374	IGB_CORE_UNLOCK(adapter);
1375
1376	igb_rxeof(que, count, &rx_done);
1377
1378	IGB_TX_LOCK(txr);
1379	do {
1380		more = igb_txeof(txr);
1381	} while (loop-- && more);
1382#if __FreeBSD_version >= 800000
1383	if (!drbr_empty(ifp, txr->br))
1384		igb_mq_start_locked(ifp, txr, NULL);
1385#else
1386	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1387		igb_start_locked(txr, ifp);
1388#endif
1389	IGB_TX_UNLOCK(txr);
1390	return POLL_RETURN_COUNT(rx_done);
1391}
1392#endif /* DEVICE_POLLING */
1393
1394/*********************************************************************
1395 *
1396 *  MSIX TX Interrupt Service routine
1397 *
1398 **********************************************************************/
1399static void
1400igb_msix_que(void *arg)
1401{
1402	struct igb_queue *que = arg;
1403	struct adapter *adapter = que->adapter;
1404	struct tx_ring *txr = que->txr;
1405	struct rx_ring *rxr = que->rxr;
1406	u32		newitr = 0;
1407	bool		more_tx, more_rx;
1408
1409	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1410	++que->irqs;
1411
1412	IGB_TX_LOCK(txr);
1413	more_tx = igb_txeof(txr);
1414	IGB_TX_UNLOCK(txr);
1415
1416	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1417
1418	if (igb_enable_aim == FALSE)
1419		goto no_calc;
1420	/*
1421	** Do Adaptive Interrupt Moderation:
1422        **  - Write out last calculated setting
1423	**  - Calculate based on average size over
1424	**    the last interval.
1425	*/
1426        if (que->eitr_setting)
1427                E1000_WRITE_REG(&adapter->hw,
1428                    E1000_EITR(que->msix), que->eitr_setting);
1429
1430        que->eitr_setting = 0;
1431
1432        /* Idle, do nothing */
1433        if ((txr->bytes == 0) && (rxr->bytes == 0))
1434                goto no_calc;
1435
1436        /* Used half Default if sub-gig */
1437        if (adapter->link_speed != 1000)
1438                newitr = IGB_DEFAULT_ITR / 2;
1439        else {
1440		if ((txr->bytes) && (txr->packets))
1441                	newitr = txr->bytes/txr->packets;
1442		if ((rxr->bytes) && (rxr->packets))
1443			newitr = max(newitr,
1444			    (rxr->bytes / rxr->packets));
1445                newitr += 24; /* account for hardware frame, crc */
1446		/* set an upper boundary */
1447		newitr = min(newitr, 3000);
1448		/* Be nice to the mid range */
1449                if ((newitr > 300) && (newitr < 1200))
1450                        newitr = (newitr / 3);
1451                else
1452                        newitr = (newitr / 2);
1453        }
1454        newitr &= 0x7FFC;  /* Mask invalid bits */
1455        if (adapter->hw.mac.type == e1000_82575)
1456                newitr |= newitr << 16;
1457        else
1458                newitr |= E1000_EITR_CNT_IGNR;
1459
1460        /* save for next interrupt */
1461        que->eitr_setting = newitr;
1462
1463        /* Reset state */
1464        txr->bytes = 0;
1465        txr->packets = 0;
1466        rxr->bytes = 0;
1467        rxr->packets = 0;
1468
1469no_calc:
1470	/* Schedule a clean task if needed*/
1471	if (more_tx || more_rx)
1472		taskqueue_enqueue(que->tq, &que->que_task);
1473	else
1474		/* Reenable this interrupt */
1475		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1476	return;
1477}
1478
1479
1480/*********************************************************************
1481 *
1482 *  MSIX Link Interrupt Service routine
1483 *
1484 **********************************************************************/
1485
1486static void
1487igb_msix_link(void *arg)
1488{
1489	struct adapter	*adapter = arg;
1490	u32       	icr;
1491
1492	++adapter->link_irq;
1493	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1494	if (!(icr & E1000_ICR_LSC))
1495		goto spurious;
1496	igb_handle_link(adapter, 0);
1497
1498spurious:
1499	/* Rearm */
1500	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1501	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1502	return;
1503}
1504
1505
1506/*********************************************************************
1507 *
1508 *  Media Ioctl callback
1509 *
1510 *  This routine is called whenever the user queries the status of
1511 *  the interface using ifconfig.
1512 *
1513 **********************************************************************/
1514static void
1515igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1516{
1517	struct adapter *adapter = ifp->if_softc;
1518	u_char fiber_type = IFM_1000_SX;
1519
1520	INIT_DEBUGOUT("igb_media_status: begin");
1521
1522	IGB_CORE_LOCK(adapter);
1523	igb_update_link_status(adapter);
1524
1525	ifmr->ifm_status = IFM_AVALID;
1526	ifmr->ifm_active = IFM_ETHER;
1527
1528	if (!adapter->link_active) {
1529		IGB_CORE_UNLOCK(adapter);
1530		return;
1531	}
1532
1533	ifmr->ifm_status |= IFM_ACTIVE;
1534
1535	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1536	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1537		ifmr->ifm_active |= fiber_type | IFM_FDX;
1538	else {
1539		switch (adapter->link_speed) {
1540		case 10:
1541			ifmr->ifm_active |= IFM_10_T;
1542			break;
1543		case 100:
1544			ifmr->ifm_active |= IFM_100_TX;
1545			break;
1546		case 1000:
1547			ifmr->ifm_active |= IFM_1000_T;
1548			break;
1549		}
1550		if (adapter->link_duplex == FULL_DUPLEX)
1551			ifmr->ifm_active |= IFM_FDX;
1552		else
1553			ifmr->ifm_active |= IFM_HDX;
1554	}
1555	IGB_CORE_UNLOCK(adapter);
1556}
1557
1558/*********************************************************************
1559 *
1560 *  Media Ioctl callback
1561 *
1562 *  This routine is called when the user changes speed/duplex using
1563 *  media/mediopt option with ifconfig.
1564 *
1565 **********************************************************************/
1566static int
1567igb_media_change(struct ifnet *ifp)
1568{
1569	struct adapter *adapter = ifp->if_softc;
1570	struct ifmedia  *ifm = &adapter->media;
1571
1572	INIT_DEBUGOUT("igb_media_change: begin");
1573
1574	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1575		return (EINVAL);
1576
1577	IGB_CORE_LOCK(adapter);
1578	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1579	case IFM_AUTO:
1580		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1581		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1582		break;
1583	case IFM_1000_LX:
1584	case IFM_1000_SX:
1585	case IFM_1000_T:
1586		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1587		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1588		break;
1589	case IFM_100_TX:
1590		adapter->hw.mac.autoneg = FALSE;
1591		adapter->hw.phy.autoneg_advertised = 0;
1592		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1593			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1594		else
1595			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1596		break;
1597	case IFM_10_T:
1598		adapter->hw.mac.autoneg = FALSE;
1599		adapter->hw.phy.autoneg_advertised = 0;
1600		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1601			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1602		else
1603			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1604		break;
1605	default:
1606		device_printf(adapter->dev, "Unsupported media type\n");
1607	}
1608
1609	/* As the speed/duplex settings my have changed we need to
1610	 * reset the PHY.
1611	 */
1612	adapter->hw.phy.reset_disable = FALSE;
1613
1614	igb_init_locked(adapter);
1615	IGB_CORE_UNLOCK(adapter);
1616
1617	return (0);
1618}
1619
1620
1621/*********************************************************************
1622 *
1623 *  This routine maps the mbufs to Advanced TX descriptors.
1624 *  used by the 82575 adapter.
1625 *
1626 **********************************************************************/
1627
1628static int
1629igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1630{
1631	struct adapter		*adapter = txr->adapter;
1632	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1633	bus_dmamap_t		map;
1634	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1635	union e1000_adv_tx_desc	*txd = NULL;
1636	struct mbuf		*m_head;
1637	u32			olinfo_status = 0, cmd_type_len = 0;
1638	int			nsegs, i, j, error, first, last = 0;
1639	u32			hdrlen = 0;
1640
1641	m_head = *m_headp;
1642
1643
1644	/* Set basic descriptor constants */
1645	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1646	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1647	if (m_head->m_flags & M_VLANTAG)
1648		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1649
1650        /*
1651         * Force a cleanup if number of TX descriptors
1652         * available hits the threshold
1653         */
1654	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1655		igb_txeof(txr);
1656		/* Now do we at least have a minimal? */
1657		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1658			txr->no_desc_avail++;
1659			return (ENOBUFS);
1660		}
1661	}
1662
1663	/*
1664         * Map the packet for DMA.
1665	 *
1666	 * Capture the first descriptor index,
1667	 * this descriptor will have the index
1668	 * of the EOP which is the only one that
1669	 * now gets a DONE bit writeback.
1670	 */
1671	first = txr->next_avail_desc;
1672	tx_buffer = &txr->tx_buffers[first];
1673	tx_buffer_mapped = tx_buffer;
1674	map = tx_buffer->map;
1675
1676	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1677	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1678
1679	if (error == EFBIG) {
1680		struct mbuf *m;
1681
1682		m = m_defrag(*m_headp, M_DONTWAIT);
1683		if (m == NULL) {
1684			adapter->mbuf_defrag_failed++;
1685			m_freem(*m_headp);
1686			*m_headp = NULL;
1687			return (ENOBUFS);
1688		}
1689		*m_headp = m;
1690
1691		/* Try it again */
1692		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1693		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1694
1695		if (error == ENOMEM) {
1696			adapter->no_tx_dma_setup++;
1697			return (error);
1698		} else if (error != 0) {
1699			adapter->no_tx_dma_setup++;
1700			m_freem(*m_headp);
1701			*m_headp = NULL;
1702			return (error);
1703		}
1704	} else if (error == ENOMEM) {
1705		adapter->no_tx_dma_setup++;
1706		return (error);
1707	} else if (error != 0) {
1708		adapter->no_tx_dma_setup++;
1709		m_freem(*m_headp);
1710		*m_headp = NULL;
1711		return (error);
1712	}
1713
1714	/* Check again to be sure we have enough descriptors */
1715        if (nsegs > (txr->tx_avail - 2)) {
1716                txr->no_desc_avail++;
1717		bus_dmamap_unload(txr->txtag, map);
1718		return (ENOBUFS);
1719        }
1720	m_head = *m_headp;
1721
1722        /*
1723         * Set up the context descriptor:
1724         * used when any hardware offload is done.
1725	 * This includes CSUM, VLAN, and TSO. It
1726	 * will use the first descriptor.
1727         */
1728        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1729		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1730			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1731			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1732			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1733		} else
1734			return (ENXIO);
1735	} else if (igb_tx_ctx_setup(txr, m_head))
1736		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1737
1738	/* Calculate payload length */
1739	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1740	    << E1000_ADVTXD_PAYLEN_SHIFT);
1741
1742	/* 82575 needs the queue index added */
1743	if (adapter->hw.mac.type == e1000_82575)
1744		olinfo_status |= txr->me << 4;
1745
1746	/* Set up our transmit descriptors */
1747	i = txr->next_avail_desc;
1748	for (j = 0; j < nsegs; j++) {
1749		bus_size_t seg_len;
1750		bus_addr_t seg_addr;
1751
1752		tx_buffer = &txr->tx_buffers[i];
1753		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1754		seg_addr = segs[j].ds_addr;
1755		seg_len  = segs[j].ds_len;
1756
1757		txd->read.buffer_addr = htole64(seg_addr);
1758		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1759		txd->read.olinfo_status = htole32(olinfo_status);
1760		last = i;
1761		if (++i == adapter->num_tx_desc)
1762			i = 0;
1763		tx_buffer->m_head = NULL;
1764		tx_buffer->next_eop = -1;
1765	}
1766
1767	txr->next_avail_desc = i;
1768	txr->tx_avail -= nsegs;
1769
1770        tx_buffer->m_head = m_head;
1771	tx_buffer_mapped->map = tx_buffer->map;
1772	tx_buffer->map = map;
1773        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1774
1775        /*
1776         * Last Descriptor of Packet
1777	 * needs End Of Packet (EOP)
1778	 * and Report Status (RS)
1779         */
1780        txd->read.cmd_type_len |=
1781	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1782	/*
1783	 * Keep track in the first buffer which
1784	 * descriptor will be written back
1785	 */
1786	tx_buffer = &txr->tx_buffers[first];
1787	tx_buffer->next_eop = last;
1788	txr->watchdog_time = ticks;
1789
1790	/*
1791	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1792	 * that this frame is available to transmit.
1793	 */
1794	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1795	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1796	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1797	++txr->tx_packets;
1798
1799	return (0);
1800
1801}
1802
1803static void
1804igb_set_promisc(struct adapter *adapter)
1805{
1806	struct ifnet	*ifp = adapter->ifp;
1807	struct e1000_hw *hw = &adapter->hw;
1808	u32		reg;
1809
1810	if (hw->mac.type == e1000_vfadapt) {
1811		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1812		return;
1813	}
1814
1815	reg = E1000_READ_REG(hw, E1000_RCTL);
1816	if (ifp->if_flags & IFF_PROMISC) {
1817		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1818		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1819	} else if (ifp->if_flags & IFF_ALLMULTI) {
1820		reg |= E1000_RCTL_MPE;
1821		reg &= ~E1000_RCTL_UPE;
1822		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1823	}
1824}
1825
1826static void
1827igb_disable_promisc(struct adapter *adapter)
1828{
1829	struct e1000_hw *hw = &adapter->hw;
1830	u32		reg;
1831
1832	if (hw->mac.type == e1000_vfadapt) {
1833		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1834		return;
1835	}
1836	reg = E1000_READ_REG(hw, E1000_RCTL);
1837	reg &=  (~E1000_RCTL_UPE);
1838	reg &=  (~E1000_RCTL_MPE);
1839	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1840}
1841
1842
1843/*********************************************************************
1844 *  Multicast Update
1845 *
1846 *  This routine is called whenever multicast address list is updated.
1847 *
1848 **********************************************************************/
1849
1850static void
1851igb_set_multi(struct adapter *adapter)
1852{
1853	struct ifnet	*ifp = adapter->ifp;
1854	struct ifmultiaddr *ifma;
1855	u32 reg_rctl = 0;
1856	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1857
1858	int mcnt = 0;
1859
1860	IOCTL_DEBUGOUT("igb_set_multi: begin");
1861
1862#if __FreeBSD_version < 800000
1863	IF_ADDR_LOCK(ifp);
1864#else
1865	if_maddr_rlock(ifp);
1866#endif
1867	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1868		if (ifma->ifma_addr->sa_family != AF_LINK)
1869			continue;
1870
1871		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1872			break;
1873
1874		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1875		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1876		mcnt++;
1877	}
1878#if __FreeBSD_version < 800000
1879	IF_ADDR_UNLOCK(ifp);
1880#else
1881	if_maddr_runlock(ifp);
1882#endif
1883
1884	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1885		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1886		reg_rctl |= E1000_RCTL_MPE;
1887		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1888	} else
1889		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1890}
1891
1892
1893/*********************************************************************
1894 *  Timer routine:
1895 *  	This routine checks for link status,
1896 *	updates statistics, and does the watchdog.
1897 *
1898 **********************************************************************/
1899
1900static void
1901igb_local_timer(void *arg)
1902{
1903	struct adapter		*adapter = arg;
1904	device_t		dev = adapter->dev;
1905	struct tx_ring		*txr = adapter->tx_rings;
1906
1907
1908	IGB_CORE_LOCK_ASSERT(adapter);
1909
1910	igb_update_link_status(adapter);
1911	igb_update_stats_counters(adapter);
1912
1913        /*
1914        ** Watchdog: check for time since any descriptor was cleaned
1915        */
1916	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1917		if (txr->watchdog_check == FALSE)
1918			continue;
1919		if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1920			goto timeout;
1921	}
1922
1923	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1924	return;
1925
1926timeout:
1927	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1928	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1929            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1930            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1931	device_printf(dev,"TX(%d) desc avail = %d,"
1932            "Next TX to Clean = %d\n",
1933            txr->me, txr->tx_avail, txr->next_to_clean);
1934	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1935	adapter->watchdog_events++;
1936	igb_init_locked(adapter);
1937}
1938
1939static void
1940igb_update_link_status(struct adapter *adapter)
1941{
1942	struct e1000_hw *hw = &adapter->hw;
1943	struct ifnet *ifp = adapter->ifp;
1944	device_t dev = adapter->dev;
1945	struct tx_ring *txr = adapter->tx_rings;
1946	u32 link_check = 0;
1947
1948	/* Get the cached link value or read for real */
1949        switch (hw->phy.media_type) {
1950        case e1000_media_type_copper:
1951                if (hw->mac.get_link_status) {
1952			/* Do the work to read phy */
1953                        e1000_check_for_link(hw);
1954                        link_check = !hw->mac.get_link_status;
1955                } else
1956                        link_check = TRUE;
1957                break;
1958        case e1000_media_type_fiber:
1959                e1000_check_for_link(hw);
1960                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1961                                 E1000_STATUS_LU);
1962                break;
1963        case e1000_media_type_internal_serdes:
1964                e1000_check_for_link(hw);
1965                link_check = adapter->hw.mac.serdes_has_link;
1966                break;
1967	/* VF device is type_unknown */
1968        case e1000_media_type_unknown:
1969                e1000_check_for_link(hw);
1970		link_check = !hw->mac.get_link_status;
1971		/* Fall thru */
1972        default:
1973                break;
1974        }
1975
1976	/* Now we check if a transition has happened */
1977	if (link_check && (adapter->link_active == 0)) {
1978		e1000_get_speed_and_duplex(&adapter->hw,
1979		    &adapter->link_speed, &adapter->link_duplex);
1980		if (bootverbose)
1981			device_printf(dev, "Link is up %d Mbps %s\n",
1982			    adapter->link_speed,
1983			    ((adapter->link_duplex == FULL_DUPLEX) ?
1984			    "Full Duplex" : "Half Duplex"));
1985		adapter->link_active = 1;
1986		ifp->if_baudrate = adapter->link_speed * 1000000;
1987		/* This can sleep */
1988		if_link_state_change(ifp, LINK_STATE_UP);
1989	} else if (!link_check && (adapter->link_active == 1)) {
1990		ifp->if_baudrate = adapter->link_speed = 0;
1991		adapter->link_duplex = 0;
1992		if (bootverbose)
1993			device_printf(dev, "Link is Down\n");
1994		adapter->link_active = 0;
1995		/* This can sleep */
1996		if_link_state_change(ifp, LINK_STATE_DOWN);
1997		/* Turn off watchdogs */
1998		for (int i = 0; i < adapter->num_queues; i++, txr++)
1999			txr->watchdog_check = FALSE;
2000	}
2001}
2002
2003/*********************************************************************
2004 *
2005 *  This routine disables all traffic on the adapter by issuing a
2006 *  global reset on the MAC and deallocates TX/RX buffers.
2007 *
2008 **********************************************************************/
2009
2010static void
2011igb_stop(void *arg)
2012{
2013	struct adapter	*adapter = arg;
2014	struct ifnet	*ifp = adapter->ifp;
2015	struct tx_ring *txr = adapter->tx_rings;
2016
2017	IGB_CORE_LOCK_ASSERT(adapter);
2018
2019	INIT_DEBUGOUT("igb_stop: begin");
2020
2021	igb_disable_intr(adapter);
2022
2023	callout_stop(&adapter->timer);
2024
2025	/* Tell the stack that the interface is no longer active */
2026	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2027
2028	/* Unarm watchdog timer. */
2029	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2030		IGB_TX_LOCK(txr);
2031		txr->watchdog_check = FALSE;
2032		IGB_TX_UNLOCK(txr);
2033	}
2034
2035	e1000_reset_hw(&adapter->hw);
2036	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2037
2038	e1000_led_off(&adapter->hw);
2039	e1000_cleanup_led(&adapter->hw);
2040}
2041
2042
2043/*********************************************************************
2044 *
2045 *  Determine hardware revision.
2046 *
2047 **********************************************************************/
2048static void
2049igb_identify_hardware(struct adapter *adapter)
2050{
2051	device_t dev = adapter->dev;
2052
2053	/* Make sure our PCI config space has the necessary stuff set */
2054	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2055	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2056	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2057		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2058		    "bits were not set!\n");
2059		adapter->hw.bus.pci_cmd_word |=
2060		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2061		pci_write_config(dev, PCIR_COMMAND,
2062		    adapter->hw.bus.pci_cmd_word, 2);
2063	}
2064
2065	/* Save off the information about this board */
2066	adapter->hw.vendor_id = pci_get_vendor(dev);
2067	adapter->hw.device_id = pci_get_device(dev);
2068	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2069	adapter->hw.subsystem_vendor_id =
2070	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2071	adapter->hw.subsystem_device_id =
2072	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2073
2074	/* Set MAC type early for PCI setup */
2075	e1000_set_mac_type(&adapter->hw);
2076}
2077
2078static int
2079igb_allocate_pci_resources(struct adapter *adapter)
2080{
2081	device_t	dev = adapter->dev;
2082	int		rid;
2083
2084	rid = PCIR_BAR(0);
2085	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2086	    &rid, RF_ACTIVE);
2087	if (adapter->pci_mem == NULL) {
2088		device_printf(dev, "Unable to allocate bus resource: memory\n");
2089		return (ENXIO);
2090	}
2091	adapter->osdep.mem_bus_space_tag =
2092	    rman_get_bustag(adapter->pci_mem);
2093	adapter->osdep.mem_bus_space_handle =
2094	    rman_get_bushandle(adapter->pci_mem);
2095	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2096
2097	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2098
2099	/* This will setup either MSI/X or MSI */
2100	adapter->msix = igb_setup_msix(adapter);
2101	adapter->hw.back = &adapter->osdep;
2102
2103	return (0);
2104}
2105
2106/*********************************************************************
2107 *
2108 *  Setup the Legacy or MSI Interrupt handler
2109 *
2110 **********************************************************************/
2111static int
2112igb_allocate_legacy(struct adapter *adapter)
2113{
2114	device_t		dev = adapter->dev;
2115	struct igb_queue	*que = adapter->queues;
2116	int			error, rid = 0;
2117
2118	/* Turn off all interrupts */
2119	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2120
2121	/* MSI RID is 1 */
2122	if (adapter->msix == 1)
2123		rid = 1;
2124
2125	/* We allocate a single interrupt resource */
2126	adapter->res = bus_alloc_resource_any(dev,
2127	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2128	if (adapter->res == NULL) {
2129		device_printf(dev, "Unable to allocate bus resource: "
2130		    "interrupt\n");
2131		return (ENXIO);
2132	}
2133
2134	/*
2135	 * Try allocating a fast interrupt and the associated deferred
2136	 * processing contexts.
2137	 */
2138	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2139	/* Make tasklet for deferred link handling */
2140	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2141	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2142	    taskqueue_thread_enqueue, &que->tq);
2143	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2144	    device_get_nameunit(adapter->dev));
2145	if ((error = bus_setup_intr(dev, adapter->res,
2146	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2147	    adapter, &adapter->tag)) != 0) {
2148		device_printf(dev, "Failed to register fast interrupt "
2149			    "handler: %d\n", error);
2150		taskqueue_free(que->tq);
2151		que->tq = NULL;
2152		return (error);
2153	}
2154
2155	return (0);
2156}
2157
2158
2159/*********************************************************************
2160 *
2161 *  Setup the MSIX Queue Interrupt handlers:
2162 *
2163 **********************************************************************/
2164static int
2165igb_allocate_msix(struct adapter *adapter)
2166{
2167	device_t		dev = adapter->dev;
2168	struct igb_queue	*que = adapter->queues;
2169	int			error, rid, vector = 0;
2170
2171
2172	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2173		rid = vector +1;
2174		que->res = bus_alloc_resource_any(dev,
2175		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2176		if (que->res == NULL) {
2177			device_printf(dev,
2178			    "Unable to allocate bus resource: "
2179			    "MSIX Queue Interrupt\n");
2180			return (ENXIO);
2181		}
2182		error = bus_setup_intr(dev, que->res,
2183	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2184		    igb_msix_que, que, &que->tag);
2185		if (error) {
2186			que->res = NULL;
2187			device_printf(dev, "Failed to register Queue handler");
2188			return (error);
2189		}
2190#if __FreeBSD_version >= 800504
2191		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2192#endif
2193		que->msix = vector;
2194		if (adapter->hw.mac.type == e1000_82575)
2195			que->eims = E1000_EICR_TX_QUEUE0 << i;
2196		else
2197			que->eims = 1 << vector;
2198		/*
2199		** Bind the msix vector, and thus the
2200		** rings to the corresponding cpu.
2201		*/
2202		if (adapter->num_queues > 1)
2203			bus_bind_intr(dev, que->res, i);
2204		/* Make tasklet for deferred handling */
2205		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2206		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2207		    taskqueue_thread_enqueue, &que->tq);
2208		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2209		    device_get_nameunit(adapter->dev));
2210	}
2211
2212	/* And Link */
2213	rid = vector + 1;
2214	adapter->res = bus_alloc_resource_any(dev,
2215	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2216	if (adapter->res == NULL) {
2217		device_printf(dev,
2218		    "Unable to allocate bus resource: "
2219		    "MSIX Link Interrupt\n");
2220		return (ENXIO);
2221	}
2222	if ((error = bus_setup_intr(dev, adapter->res,
2223	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2224	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2225		device_printf(dev, "Failed to register Link handler");
2226		return (error);
2227	}
2228#if __FreeBSD_version >= 800504
2229	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2230#endif
2231	adapter->linkvec = vector;
2232
2233	return (0);
2234}
2235
2236
2237static void
2238igb_configure_queues(struct adapter *adapter)
2239{
2240	struct	e1000_hw	*hw = &adapter->hw;
2241	struct	igb_queue	*que;
2242	u32			tmp, ivar = 0;
2243	u32			newitr = IGB_DEFAULT_ITR;
2244
2245	/* First turn on RSS capability */
2246	if (adapter->hw.mac.type > e1000_82575)
2247		E1000_WRITE_REG(hw, E1000_GPIE,
2248		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2249		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2250
2251	/* Turn on MSIX */
2252	switch (adapter->hw.mac.type) {
2253	case e1000_82580:
2254	case e1000_vfadapt:
2255		/* RX entries */
2256		for (int i = 0; i < adapter->num_queues; i++) {
2257			u32 index = i >> 1;
2258			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2259			que = &adapter->queues[i];
2260			if (i & 1) {
2261				ivar &= 0xFF00FFFF;
2262				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2263			} else {
2264				ivar &= 0xFFFFFF00;
2265				ivar |= que->msix | E1000_IVAR_VALID;
2266			}
2267			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2268		}
2269		/* TX entries */
2270		for (int i = 0; i < adapter->num_queues; i++) {
2271			u32 index = i >> 1;
2272			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2273			que = &adapter->queues[i];
2274			if (i & 1) {
2275				ivar &= 0x00FFFFFF;
2276				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2277			} else {
2278				ivar &= 0xFFFF00FF;
2279				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2280			}
2281			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2282			adapter->eims_mask |= que->eims;
2283		}
2284
2285		/* And for the link interrupt */
2286		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2287		adapter->link_mask = 1 << adapter->linkvec;
2288		adapter->eims_mask |= adapter->link_mask;
2289		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2290		break;
2291	case e1000_82576:
2292		/* RX entries */
2293		for (int i = 0; i < adapter->num_queues; i++) {
2294			u32 index = i & 0x7; /* Each IVAR has two entries */
2295			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2296			que = &adapter->queues[i];
2297			if (i < 8) {
2298				ivar &= 0xFFFFFF00;
2299				ivar |= que->msix | E1000_IVAR_VALID;
2300			} else {
2301				ivar &= 0xFF00FFFF;
2302				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2303			}
2304			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2305			adapter->eims_mask |= que->eims;
2306		}
2307		/* TX entries */
2308		for (int i = 0; i < adapter->num_queues; i++) {
2309			u32 index = i & 0x7; /* Each IVAR has two entries */
2310			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2311			que = &adapter->queues[i];
2312			if (i < 8) {
2313				ivar &= 0xFFFF00FF;
2314				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2315			} else {
2316				ivar &= 0x00FFFFFF;
2317				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2318			}
2319			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2320			adapter->eims_mask |= que->eims;
2321		}
2322
2323		/* And for the link interrupt */
2324		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2325		adapter->link_mask = 1 << adapter->linkvec;
2326		adapter->eims_mask |= adapter->link_mask;
2327		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2328		break;
2329
2330	case e1000_82575:
2331                /* enable MSI-X support*/
2332		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2333                tmp |= E1000_CTRL_EXT_PBA_CLR;
2334                /* Auto-Mask interrupts upon ICR read. */
2335                tmp |= E1000_CTRL_EXT_EIAME;
2336                tmp |= E1000_CTRL_EXT_IRCA;
2337                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2338
2339		/* Queues */
2340		for (int i = 0; i < adapter->num_queues; i++) {
2341			que = &adapter->queues[i];
2342			tmp = E1000_EICR_RX_QUEUE0 << i;
2343			tmp |= E1000_EICR_TX_QUEUE0 << i;
2344			que->eims = tmp;
2345			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2346			    i, que->eims);
2347			adapter->eims_mask |= que->eims;
2348		}
2349
2350		/* Link */
2351		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2352		    E1000_EIMS_OTHER);
2353		adapter->link_mask |= E1000_EIMS_OTHER;
2354		adapter->eims_mask |= adapter->link_mask;
2355	default:
2356		break;
2357	}
2358
2359	/* Set the starting interrupt rate */
2360        if (hw->mac.type == e1000_82575)
2361                newitr |= newitr << 16;
2362        else
2363                newitr |= E1000_EITR_CNT_IGNR;
2364
2365	for (int i = 0; i < adapter->num_queues; i++) {
2366		que = &adapter->queues[i];
2367		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2368	}
2369
2370	return;
2371}
2372
2373
2374static void
2375igb_free_pci_resources(struct adapter *adapter)
2376{
2377	struct		igb_queue *que = adapter->queues;
2378	device_t	dev = adapter->dev;
2379	int		rid;
2380
2381	/*
2382	** There is a slight possibility of a failure mode
2383	** in attach that will result in entering this function
2384	** before interrupt resources have been initialized, and
2385	** in that case we do not want to execute the loops below
2386	** We can detect this reliably by the state of the adapter
2387	** res pointer.
2388	*/
2389	if (adapter->res == NULL)
2390		goto mem;
2391
2392	/*
2393	 * First release all the interrupt resources:
2394	 */
2395	for (int i = 0; i < adapter->num_queues; i++, que++) {
2396		rid = que->msix + 1;
2397		if (que->tag != NULL) {
2398			bus_teardown_intr(dev, que->res, que->tag);
2399			que->tag = NULL;
2400		}
2401		if (que->res != NULL)
2402			bus_release_resource(dev,
2403			    SYS_RES_IRQ, rid, que->res);
2404	}
2405
2406	/* Clean the Legacy or Link interrupt last */
2407	if (adapter->linkvec) /* we are doing MSIX */
2408		rid = adapter->linkvec + 1;
2409	else
2410		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2411
2412	if (adapter->tag != NULL) {
2413		bus_teardown_intr(dev, adapter->res, adapter->tag);
2414		adapter->tag = NULL;
2415	}
2416	if (adapter->res != NULL)
2417		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2418
2419mem:
2420	if (adapter->msix)
2421		pci_release_msi(dev);
2422
2423	if (adapter->msix_mem != NULL)
2424		bus_release_resource(dev, SYS_RES_MEMORY,
2425		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2426
2427	if (adapter->pci_mem != NULL)
2428		bus_release_resource(dev, SYS_RES_MEMORY,
2429		    PCIR_BAR(0), adapter->pci_mem);
2430
2431}
2432
2433/*
2434 * Setup Either MSI/X or MSI
2435 */
2436static int
2437igb_setup_msix(struct adapter *adapter)
2438{
2439	device_t dev = adapter->dev;
2440	int rid, want, queues, msgs;
2441
2442	/* tuneable override */
2443	if (igb_enable_msix == 0)
2444		goto msi;
2445
2446	/* First try MSI/X */
2447	rid = PCIR_BAR(IGB_MSIX_BAR);
2448	adapter->msix_mem = bus_alloc_resource_any(dev,
2449	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2450       	if (!adapter->msix_mem) {
2451		/* May not be enabled */
2452		device_printf(adapter->dev,
2453		    "Unable to map MSIX table \n");
2454		goto msi;
2455	}
2456
2457	msgs = pci_msix_count(dev);
2458	if (msgs == 0) { /* system has msix disabled */
2459		bus_release_resource(dev, SYS_RES_MEMORY,
2460		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2461		adapter->msix_mem = NULL;
2462		goto msi;
2463	}
2464
2465	/* Figure out a reasonable auto config value */
2466	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2467
2468	/* Manual override */
2469	if (igb_num_queues != 0)
2470		queues = igb_num_queues;
2471
2472	/* Can have max of 4 queues on 82575 */
2473	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2474		queues = 4;
2475
2476	/* Limit the VF adapter to one queues */
2477	if ((adapter->hw.mac.type == e1000_vfadapt) && (queues > 2))
2478		queues = 1;
2479
2480	/*
2481	** One vector (RX/TX pair) per queue
2482	** plus an additional for Link interrupt
2483	*/
2484	want = queues + 1;
2485	if (msgs >= want)
2486		msgs = want;
2487	else {
2488               	device_printf(adapter->dev,
2489		    "MSIX Configuration Problem, "
2490		    "%d vectors configured, but %d queues wanted!\n",
2491		    msgs, want);
2492		return (ENXIO);
2493	}
2494	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2495               	device_printf(adapter->dev,
2496		    "Using MSIX interrupts with %d vectors\n", msgs);
2497		adapter->num_queues = queues;
2498		return (msgs);
2499	}
2500msi:
2501       	msgs = pci_msi_count(dev);
2502       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2503               	device_printf(adapter->dev,"Using MSI interrupt\n");
2504	return (msgs);
2505}
2506
2507/*********************************************************************
2508 *
2509 *  Set up an fresh starting state
2510 *
2511 **********************************************************************/
2512static void
2513igb_reset(struct adapter *adapter)
2514{
2515	device_t	dev = adapter->dev;
2516	struct e1000_hw *hw = &adapter->hw;
2517	struct e1000_fc_info *fc = &hw->fc;
2518	struct ifnet	*ifp = adapter->ifp;
2519	u32		pba = 0;
2520	u16		hwm;
2521
2522	INIT_DEBUGOUT("igb_reset: begin");
2523
2524	/* Let the firmware know the OS is in control */
2525	igb_get_hw_control(adapter);
2526
2527	/*
2528	 * Packet Buffer Allocation (PBA)
2529	 * Writing PBA sets the receive portion of the buffer
2530	 * the remainder is used for the transmit buffer.
2531	 */
2532	switch (hw->mac.type) {
2533	case e1000_82575:
2534		pba = E1000_PBA_32K;
2535		break;
2536	case e1000_82576:
2537	case e1000_vfadapt:
2538		pba = E1000_PBA_64K;
2539		break;
2540	case e1000_82580:
2541		pba = E1000_PBA_35K;
2542	default:
2543		break;
2544	}
2545
2546	/* Special needs in case of Jumbo frames */
2547	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2548		u32 tx_space, min_tx, min_rx;
2549		pba = E1000_READ_REG(hw, E1000_PBA);
2550		tx_space = pba >> 16;
2551		pba &= 0xffff;
2552		min_tx = (adapter->max_frame_size +
2553		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2554		min_tx = roundup2(min_tx, 1024);
2555		min_tx >>= 10;
2556                min_rx = adapter->max_frame_size;
2557                min_rx = roundup2(min_rx, 1024);
2558                min_rx >>= 10;
2559		if (tx_space < min_tx &&
2560		    ((min_tx - tx_space) < pba)) {
2561			pba = pba - (min_tx - tx_space);
2562			/*
2563                         * if short on rx space, rx wins
2564                         * and must trump tx adjustment
2565			 */
2566                        if (pba < min_rx)
2567                                pba = min_rx;
2568		}
2569		E1000_WRITE_REG(hw, E1000_PBA, pba);
2570	}
2571
2572	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2573
2574	/*
2575	 * These parameters control the automatic generation (Tx) and
2576	 * response (Rx) to Ethernet PAUSE frames.
2577	 * - High water mark should allow for at least two frames to be
2578	 *   received after sending an XOFF.
2579	 * - Low water mark works best when it is very near the high water mark.
2580	 *   This allows the receiver to restart by sending XON when it has
2581	 *   drained a bit.
2582	 */
2583	hwm = min(((pba << 10) * 9 / 10),
2584	    ((pba << 10) - 2 * adapter->max_frame_size));
2585
2586	if (hw->mac.type < e1000_82576) {
2587		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2588		fc->low_water = fc->high_water - 8;
2589	} else {
2590		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2591		fc->low_water = fc->high_water - 16;
2592	}
2593
2594	fc->pause_time = IGB_FC_PAUSE_TIME;
2595	fc->send_xon = TRUE;
2596
2597	/* Set Flow control, use the tunable location if sane */
2598	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2599		fc->requested_mode = igb_fc_setting;
2600	else
2601		fc->requested_mode = e1000_fc_none;
2602
2603	fc->current_mode = fc->requested_mode;
2604
2605	/* Issue a global reset */
2606	e1000_reset_hw(hw);
2607	E1000_WRITE_REG(hw, E1000_WUC, 0);
2608
2609	if (e1000_init_hw(hw) < 0)
2610		device_printf(dev, "Hardware Initialization Failed\n");
2611
2612	if (hw->mac.type == e1000_82580) {
2613		u32 reg;
2614
2615		hwm = (pba << 10) - (2 * adapter->max_frame_size);
2616		/*
2617		 * 0x80000000 - enable DMA COAL
2618		 * 0x10000000 - use L0s as low power
2619		 * 0x20000000 - use L1 as low power
2620		 * X << 16 - exit dma coal when rx data exceeds X kB
2621		 * Y - upper limit to stay in dma coal in units of 32usecs
2622		 */
2623		E1000_WRITE_REG(hw, E1000_DMACR,
2624		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
2625
2626		/* set hwm to PBA -  2 * max frame size */
2627		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2628		/*
2629		 * This sets the time to wait before requesting transition to
2630		 * low power state to number of usecs needed to receive 1 512
2631		 * byte frame at gigabit line rate
2632		 */
2633		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2634
2635		/* free space in tx packet buffer to wake from DMA coal */
2636		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2637		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2638
2639		/* make low power state decision controlled by DMA coal */
2640		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2641		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2642		    reg | E1000_PCIEMISC_LX_DECISION);
2643	}
2644
2645	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2646	e1000_get_phy_info(hw);
2647	e1000_check_for_link(hw);
2648	return;
2649}
2650
2651/*********************************************************************
2652 *
2653 *  Setup networking device structure and register an interface.
2654 *
2655 **********************************************************************/
2656static void
2657igb_setup_interface(device_t dev, struct adapter *adapter)
2658{
2659	struct ifnet   *ifp;
2660
2661	INIT_DEBUGOUT("igb_setup_interface: begin");
2662
2663	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2664	if (ifp == NULL)
2665		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2666	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2667	ifp->if_mtu = ETHERMTU;
2668	ifp->if_init =  igb_init;
2669	ifp->if_softc = adapter;
2670	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2671	ifp->if_ioctl = igb_ioctl;
2672	ifp->if_start = igb_start;
2673#if __FreeBSD_version >= 800000
2674	ifp->if_transmit = igb_mq_start;
2675	ifp->if_qflush = igb_qflush;
2676#endif
2677	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2678	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2679	IFQ_SET_READY(&ifp->if_snd);
2680
2681	ether_ifattach(ifp, adapter->hw.mac.addr);
2682
2683	ifp->if_capabilities = ifp->if_capenable = 0;
2684
2685	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2686	ifp->if_capabilities |= IFCAP_TSO4;
2687	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2688	if (igb_header_split)
2689		ifp->if_capabilities |= IFCAP_LRO;
2690
2691	ifp->if_capenable = ifp->if_capabilities;
2692#ifdef DEVICE_POLLING
2693	ifp->if_capabilities |= IFCAP_POLLING;
2694#endif
2695
2696	/*
2697	 * Tell the upper layer(s) we
2698	 * support full VLAN capability.
2699	 */
2700	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2701	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2702	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2703
2704	/*
2705	** Dont turn this on by default, if vlans are
2706	** created on another pseudo device (eg. lagg)
2707	** then vlan events are not passed thru, breaking
2708	** operation, but with HW FILTER off it works. If
2709	** using vlans directly on the em driver you can
2710	** enable this and get full hardware tag filtering.
2711	*/
2712	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2713
2714	/*
2715	 * Specify the media types supported by this adapter and register
2716	 * callbacks to update media and link information
2717	 */
2718	ifmedia_init(&adapter->media, IFM_IMASK,
2719	    igb_media_change, igb_media_status);
2720	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2721	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2722		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2723			    0, NULL);
2724		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2725	} else {
2726		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2727		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2728			    0, NULL);
2729		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2730			    0, NULL);
2731		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2732			    0, NULL);
2733		if (adapter->hw.phy.type != e1000_phy_ife) {
2734			ifmedia_add(&adapter->media,
2735				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2736			ifmedia_add(&adapter->media,
2737				IFM_ETHER | IFM_1000_T, 0, NULL);
2738		}
2739	}
2740	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2741	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2742}
2743
2744
2745/*
2746 * Manage DMA'able memory.
2747 */
2748static void
2749igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2750{
2751	if (error)
2752		return;
2753	*(bus_addr_t *) arg = segs[0].ds_addr;
2754}
2755
2756static int
2757igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2758        struct igb_dma_alloc *dma, int mapflags)
2759{
2760	int error;
2761
2762	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2763				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2764				BUS_SPACE_MAXADDR,	/* lowaddr */
2765				BUS_SPACE_MAXADDR,	/* highaddr */
2766				NULL, NULL,		/* filter, filterarg */
2767				size,			/* maxsize */
2768				1,			/* nsegments */
2769				size,			/* maxsegsize */
2770				0,			/* flags */
2771				NULL,			/* lockfunc */
2772				NULL,			/* lockarg */
2773				&dma->dma_tag);
2774	if (error) {
2775		device_printf(adapter->dev,
2776		    "%s: bus_dma_tag_create failed: %d\n",
2777		    __func__, error);
2778		goto fail_0;
2779	}
2780
2781	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2782	    BUS_DMA_NOWAIT, &dma->dma_map);
2783	if (error) {
2784		device_printf(adapter->dev,
2785		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2786		    __func__, (uintmax_t)size, error);
2787		goto fail_2;
2788	}
2789
2790	dma->dma_paddr = 0;
2791	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2792	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2793	if (error || dma->dma_paddr == 0) {
2794		device_printf(adapter->dev,
2795		    "%s: bus_dmamap_load failed: %d\n",
2796		    __func__, error);
2797		goto fail_3;
2798	}
2799
2800	return (0);
2801
2802fail_3:
2803	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2804fail_2:
2805	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2806	bus_dma_tag_destroy(dma->dma_tag);
2807fail_0:
2808	dma->dma_map = NULL;
2809	dma->dma_tag = NULL;
2810
2811	return (error);
2812}
2813
2814static void
2815igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2816{
2817	if (dma->dma_tag == NULL)
2818		return;
2819	if (dma->dma_map != NULL) {
2820		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2821		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2822		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2823		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2824		dma->dma_map = NULL;
2825	}
2826	bus_dma_tag_destroy(dma->dma_tag);
2827	dma->dma_tag = NULL;
2828}
2829
2830
2831/*********************************************************************
2832 *
2833 *  Allocate memory for the transmit and receive rings, and then
2834 *  the descriptors associated with each, called only once at attach.
2835 *
2836 **********************************************************************/
2837static int
2838igb_allocate_queues(struct adapter *adapter)
2839{
2840	device_t dev = adapter->dev;
2841	struct igb_queue	*que = NULL;
2842	struct tx_ring		*txr = NULL;
2843	struct rx_ring		*rxr = NULL;
2844	int rsize, tsize, error = E1000_SUCCESS;
2845	int txconf = 0, rxconf = 0;
2846
2847	/* First allocate the top level queue structs */
2848	if (!(adapter->queues =
2849	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2850	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2851		device_printf(dev, "Unable to allocate queue memory\n");
2852		error = ENOMEM;
2853		goto fail;
2854	}
2855
2856	/* Next allocate the TX ring struct memory */
2857	if (!(adapter->tx_rings =
2858	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2859	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2860		device_printf(dev, "Unable to allocate TX ring memory\n");
2861		error = ENOMEM;
2862		goto tx_fail;
2863	}
2864
2865	/* Now allocate the RX */
2866	if (!(adapter->rx_rings =
2867	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2868	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2869		device_printf(dev, "Unable to allocate RX ring memory\n");
2870		error = ENOMEM;
2871		goto rx_fail;
2872	}
2873
2874	tsize = roundup2(adapter->num_tx_desc *
2875	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2876	/*
2877	 * Now set up the TX queues, txconf is needed to handle the
2878	 * possibility that things fail midcourse and we need to
2879	 * undo memory gracefully
2880	 */
2881	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2882		/* Set up some basics */
2883		txr = &adapter->tx_rings[i];
2884		txr->adapter = adapter;
2885		txr->me = i;
2886
2887		/* Initialize the TX lock */
2888		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2889		    device_get_nameunit(dev), txr->me);
2890		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2891
2892		if (igb_dma_malloc(adapter, tsize,
2893			&txr->txdma, BUS_DMA_NOWAIT)) {
2894			device_printf(dev,
2895			    "Unable to allocate TX Descriptor memory\n");
2896			error = ENOMEM;
2897			goto err_tx_desc;
2898		}
2899		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2900		bzero((void *)txr->tx_base, tsize);
2901
2902        	/* Now allocate transmit buffers for the ring */
2903        	if (igb_allocate_transmit_buffers(txr)) {
2904			device_printf(dev,
2905			    "Critical Failure setting up transmit buffers\n");
2906			error = ENOMEM;
2907			goto err_tx_desc;
2908        	}
2909#if __FreeBSD_version >= 800000
2910		/* Allocate a buf ring */
2911		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2912		    M_WAITOK, &txr->tx_mtx);
2913#endif
2914	}
2915
2916	/*
2917	 * Next the RX queues...
2918	 */
2919	rsize = roundup2(adapter->num_rx_desc *
2920	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2921	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2922		rxr = &adapter->rx_rings[i];
2923		rxr->adapter = adapter;
2924		rxr->me = i;
2925
2926		/* Initialize the RX lock */
2927		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2928		    device_get_nameunit(dev), txr->me);
2929		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2930
2931		if (igb_dma_malloc(adapter, rsize,
2932			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2933			device_printf(dev,
2934			    "Unable to allocate RxDescriptor memory\n");
2935			error = ENOMEM;
2936			goto err_rx_desc;
2937		}
2938		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2939		bzero((void *)rxr->rx_base, rsize);
2940
2941        	/* Allocate receive buffers for the ring*/
2942		if (igb_allocate_receive_buffers(rxr)) {
2943			device_printf(dev,
2944			    "Critical Failure setting up receive buffers\n");
2945			error = ENOMEM;
2946			goto err_rx_desc;
2947		}
2948	}
2949
2950	/*
2951	** Finally set up the queue holding structs
2952	*/
2953	for (int i = 0; i < adapter->num_queues; i++) {
2954		que = &adapter->queues[i];
2955		que->adapter = adapter;
2956		que->txr = &adapter->tx_rings[i];
2957		que->rxr = &adapter->rx_rings[i];
2958	}
2959
2960	return (0);
2961
2962err_rx_desc:
2963	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2964		igb_dma_free(adapter, &rxr->rxdma);
2965err_tx_desc:
2966	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2967		igb_dma_free(adapter, &txr->txdma);
2968	free(adapter->rx_rings, M_DEVBUF);
2969rx_fail:
2970#if __FreeBSD_version >= 800000
2971	buf_ring_free(txr->br, M_DEVBUF);
2972#endif
2973	free(adapter->tx_rings, M_DEVBUF);
2974tx_fail:
2975	free(adapter->queues, M_DEVBUF);
2976fail:
2977	return (error);
2978}
2979
2980/*********************************************************************
2981 *
2982 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2983 *  the information needed to transmit a packet on the wire. This is
2984 *  called only once at attach, setup is done every reset.
2985 *
2986 **********************************************************************/
2987static int
2988igb_allocate_transmit_buffers(struct tx_ring *txr)
2989{
2990	struct adapter *adapter = txr->adapter;
2991	device_t dev = adapter->dev;
2992	struct igb_tx_buffer *txbuf;
2993	int error, i;
2994
2995	/*
2996	 * Setup DMA descriptor areas.
2997	 */
2998	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2999			       1, 0,			/* alignment, bounds */
3000			       BUS_SPACE_MAXADDR,	/* lowaddr */
3001			       BUS_SPACE_MAXADDR,	/* highaddr */
3002			       NULL, NULL,		/* filter, filterarg */
3003			       IGB_TSO_SIZE,		/* maxsize */
3004			       IGB_MAX_SCATTER,		/* nsegments */
3005			       PAGE_SIZE,		/* maxsegsize */
3006			       0,			/* flags */
3007			       NULL,			/* lockfunc */
3008			       NULL,			/* lockfuncarg */
3009			       &txr->txtag))) {
3010		device_printf(dev,"Unable to allocate TX DMA tag\n");
3011		goto fail;
3012	}
3013
3014	if (!(txr->tx_buffers =
3015	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3016	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3017		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3018		error = ENOMEM;
3019		goto fail;
3020	}
3021
3022        /* Create the descriptor buffer dma maps */
3023	txbuf = txr->tx_buffers;
3024	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3025		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3026		if (error != 0) {
3027			device_printf(dev, "Unable to create TX DMA map\n");
3028			goto fail;
3029		}
3030	}
3031
3032	return 0;
3033fail:
3034	/* We free all, it handles case where we are in the middle */
3035	igb_free_transmit_structures(adapter);
3036	return (error);
3037}
3038
3039/*********************************************************************
3040 *
3041 *  Initialize a transmit ring.
3042 *
3043 **********************************************************************/
3044static void
3045igb_setup_transmit_ring(struct tx_ring *txr)
3046{
3047	struct adapter *adapter = txr->adapter;
3048	struct igb_tx_buffer *txbuf;
3049	int i;
3050
3051	/* Clear the old descriptor contents */
3052	IGB_TX_LOCK(txr);
3053	bzero((void *)txr->tx_base,
3054	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3055	/* Reset indices */
3056	txr->next_avail_desc = 0;
3057	txr->next_to_clean = 0;
3058
3059	/* Free any existing tx buffers. */
3060        txbuf = txr->tx_buffers;
3061	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3062		if (txbuf->m_head != NULL) {
3063			bus_dmamap_sync(txr->txtag, txbuf->map,
3064			    BUS_DMASYNC_POSTWRITE);
3065			bus_dmamap_unload(txr->txtag, txbuf->map);
3066			m_freem(txbuf->m_head);
3067			txbuf->m_head = NULL;
3068		}
3069		/* clear the watch index */
3070		txbuf->next_eop = -1;
3071        }
3072
3073	/* Set number of descriptors available */
3074	txr->tx_avail = adapter->num_tx_desc;
3075
3076	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3077	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3078	IGB_TX_UNLOCK(txr);
3079}
3080
3081/*********************************************************************
3082 *
3083 *  Initialize all transmit rings.
3084 *
3085 **********************************************************************/
3086static void
3087igb_setup_transmit_structures(struct adapter *adapter)
3088{
3089	struct tx_ring *txr = adapter->tx_rings;
3090
3091	for (int i = 0; i < adapter->num_queues; i++, txr++)
3092		igb_setup_transmit_ring(txr);
3093
3094	return;
3095}
3096
3097/*********************************************************************
3098 *
3099 *  Enable transmit unit.
3100 *
3101 **********************************************************************/
3102static void
3103igb_initialize_transmit_units(struct adapter *adapter)
3104{
3105	struct tx_ring	*txr = adapter->tx_rings;
3106	struct e1000_hw *hw = &adapter->hw;
3107	u32		tctl, txdctl;
3108
3109	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3110	tctl = txdctl = 0;
3111
3112	/* Setup the Tx Descriptor Rings */
3113	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3114		u64 bus_addr = txr->txdma.dma_paddr;
3115
3116		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3117		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3118		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3119		    (uint32_t)(bus_addr >> 32));
3120		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3121		    (uint32_t)bus_addr);
3122
3123		/* Setup the HW Tx Head and Tail descriptor pointers */
3124		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3125		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3126
3127		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3128		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3129		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3130
3131		txr->watchdog_check = FALSE;
3132
3133		txdctl |= IGB_TX_PTHRESH;
3134		txdctl |= IGB_TX_HTHRESH << 8;
3135		txdctl |= IGB_TX_WTHRESH << 16;
3136		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3137		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3138	}
3139
3140	if (adapter->hw.mac.type == e1000_vfadapt)
3141		return;
3142
3143	/* Program the Transmit Control Register */
3144	tctl = E1000_READ_REG(hw, E1000_TCTL);
3145	tctl &= ~E1000_TCTL_CT;
3146	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3147		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3148
3149	e1000_config_collision_dist(hw);
3150
3151	/* This write will effectively turn on the transmit unit. */
3152	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3153}
3154
3155/*********************************************************************
3156 *
3157 *  Free all transmit rings.
3158 *
3159 **********************************************************************/
3160static void
3161igb_free_transmit_structures(struct adapter *adapter)
3162{
3163	struct tx_ring *txr = adapter->tx_rings;
3164
3165	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3166		IGB_TX_LOCK(txr);
3167		igb_free_transmit_buffers(txr);
3168		igb_dma_free(adapter, &txr->txdma);
3169		IGB_TX_UNLOCK(txr);
3170		IGB_TX_LOCK_DESTROY(txr);
3171	}
3172	free(adapter->tx_rings, M_DEVBUF);
3173}
3174
3175/*********************************************************************
3176 *
3177 *  Free transmit ring related data structures.
3178 *
3179 **********************************************************************/
3180static void
3181igb_free_transmit_buffers(struct tx_ring *txr)
3182{
3183	struct adapter *adapter = txr->adapter;
3184	struct igb_tx_buffer *tx_buffer;
3185	int             i;
3186
3187	INIT_DEBUGOUT("free_transmit_ring: begin");
3188
3189	if (txr->tx_buffers == NULL)
3190		return;
3191
3192	tx_buffer = txr->tx_buffers;
3193	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3194		if (tx_buffer->m_head != NULL) {
3195			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3196			    BUS_DMASYNC_POSTWRITE);
3197			bus_dmamap_unload(txr->txtag,
3198			    tx_buffer->map);
3199			m_freem(tx_buffer->m_head);
3200			tx_buffer->m_head = NULL;
3201			if (tx_buffer->map != NULL) {
3202				bus_dmamap_destroy(txr->txtag,
3203				    tx_buffer->map);
3204				tx_buffer->map = NULL;
3205			}
3206		} else if (tx_buffer->map != NULL) {
3207			bus_dmamap_unload(txr->txtag,
3208			    tx_buffer->map);
3209			bus_dmamap_destroy(txr->txtag,
3210			    tx_buffer->map);
3211			tx_buffer->map = NULL;
3212		}
3213	}
3214#if __FreeBSD_version >= 800000
3215	if (txr->br != NULL)
3216		buf_ring_free(txr->br, M_DEVBUF);
3217#endif
3218	if (txr->tx_buffers != NULL) {
3219		free(txr->tx_buffers, M_DEVBUF);
3220		txr->tx_buffers = NULL;
3221	}
3222	if (txr->txtag != NULL) {
3223		bus_dma_tag_destroy(txr->txtag);
3224		txr->txtag = NULL;
3225	}
3226	return;
3227}
3228
3229/**********************************************************************
3230 *
3231 *  Setup work for hardware segmentation offload (TSO)
3232 *
3233 **********************************************************************/
3234static boolean_t
3235igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3236{
3237	struct adapter *adapter = txr->adapter;
3238	struct e1000_adv_tx_context_desc *TXD;
3239	struct igb_tx_buffer        *tx_buffer;
3240	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3241	u32 mss_l4len_idx = 0;
3242	u16 vtag = 0;
3243	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3244	struct ether_vlan_header *eh;
3245	struct ip *ip;
3246	struct tcphdr *th;
3247
3248
3249	/*
3250	 * Determine where frame payload starts.
3251	 * Jump over vlan headers if already present
3252	 */
3253	eh = mtod(mp, struct ether_vlan_header *);
3254	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3255		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3256	else
3257		ehdrlen = ETHER_HDR_LEN;
3258
3259	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3260	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3261		return FALSE;
3262
3263	/* Only supports IPV4 for now */
3264	ctxd = txr->next_avail_desc;
3265	tx_buffer = &txr->tx_buffers[ctxd];
3266	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3267
3268	ip = (struct ip *)(mp->m_data + ehdrlen);
3269	if (ip->ip_p != IPPROTO_TCP)
3270                return FALSE;   /* 0 */
3271	ip->ip_sum = 0;
3272	ip_hlen = ip->ip_hl << 2;
3273	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3274	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3275	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3276	tcp_hlen = th->th_off << 2;
3277	/*
3278	 * Calculate header length, this is used
3279	 * in the transmit desc in igb_xmit
3280	 */
3281	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3282
3283	/* VLAN MACLEN IPLEN */
3284	if (mp->m_flags & M_VLANTAG) {
3285		vtag = htole16(mp->m_pkthdr.ether_vtag);
3286		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3287	}
3288
3289	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3290	vlan_macip_lens |= ip_hlen;
3291	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3292
3293	/* ADV DTYPE TUCMD */
3294	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3295	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3296	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3297	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3298
3299	/* MSS L4LEN IDX */
3300	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3301	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3302	/* 82575 needs the queue index added */
3303	if (adapter->hw.mac.type == e1000_82575)
3304		mss_l4len_idx |= txr->me << 4;
3305	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3306
3307	TXD->seqnum_seed = htole32(0);
3308	tx_buffer->m_head = NULL;
3309	tx_buffer->next_eop = -1;
3310
3311	if (++ctxd == adapter->num_tx_desc)
3312		ctxd = 0;
3313
3314	txr->tx_avail--;
3315	txr->next_avail_desc = ctxd;
3316	return TRUE;
3317}
3318
3319
3320/*********************************************************************
3321 *
3322 *  Context Descriptor setup for VLAN or CSUM
3323 *
3324 **********************************************************************/
3325
3326static bool
3327igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3328{
3329	struct adapter *adapter = txr->adapter;
3330	struct e1000_adv_tx_context_desc *TXD;
3331	struct igb_tx_buffer        *tx_buffer;
3332	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3333	struct ether_vlan_header *eh;
3334	struct ip *ip = NULL;
3335	struct ip6_hdr *ip6;
3336	int  ehdrlen, ctxd, ip_hlen = 0;
3337	u16	etype, vtag = 0;
3338	u8	ipproto = 0;
3339	bool	offload = TRUE;
3340
3341	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3342		offload = FALSE;
3343
3344	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3345	ctxd = txr->next_avail_desc;
3346	tx_buffer = &txr->tx_buffers[ctxd];
3347	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3348
3349	/*
3350	** In advanced descriptors the vlan tag must
3351	** be placed into the context descriptor, thus
3352	** we need to be here just for that setup.
3353	*/
3354	if (mp->m_flags & M_VLANTAG) {
3355		vtag = htole16(mp->m_pkthdr.ether_vtag);
3356		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3357	} else if (offload == FALSE)
3358		return FALSE;
3359
3360	/*
3361	 * Determine where frame payload starts.
3362	 * Jump over vlan headers if already present,
3363	 * helpful for QinQ too.
3364	 */
3365	eh = mtod(mp, struct ether_vlan_header *);
3366	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3367		etype = ntohs(eh->evl_proto);
3368		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3369	} else {
3370		etype = ntohs(eh->evl_encap_proto);
3371		ehdrlen = ETHER_HDR_LEN;
3372	}
3373
3374	/* Set the ether header length */
3375	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3376
3377	switch (etype) {
3378		case ETHERTYPE_IP:
3379			ip = (struct ip *)(mp->m_data + ehdrlen);
3380			ip_hlen = ip->ip_hl << 2;
3381			if (mp->m_len < ehdrlen + ip_hlen) {
3382				offload = FALSE;
3383				break;
3384			}
3385			ipproto = ip->ip_p;
3386			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3387			break;
3388		case ETHERTYPE_IPV6:
3389			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3390			ip_hlen = sizeof(struct ip6_hdr);
3391			if (mp->m_len < ehdrlen + ip_hlen)
3392				return (FALSE);
3393			ipproto = ip6->ip6_nxt;
3394			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3395			break;
3396		default:
3397			offload = FALSE;
3398			break;
3399	}
3400
3401	vlan_macip_lens |= ip_hlen;
3402	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3403
3404	switch (ipproto) {
3405		case IPPROTO_TCP:
3406			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3407				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3408			break;
3409		case IPPROTO_UDP:
3410			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3411				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3412			break;
3413#if __FreeBSD_version >= 800000
3414		case IPPROTO_SCTP:
3415			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3416				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3417			break;
3418#endif
3419		default:
3420			offload = FALSE;
3421			break;
3422	}
3423
3424	/* 82575 needs the queue index added */
3425	if (adapter->hw.mac.type == e1000_82575)
3426		mss_l4len_idx = txr->me << 4;
3427
3428	/* Now copy bits into descriptor */
3429	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3430	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3431	TXD->seqnum_seed = htole32(0);
3432	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3433
3434	tx_buffer->m_head = NULL;
3435	tx_buffer->next_eop = -1;
3436
3437	/* We've consumed the first desc, adjust counters */
3438	if (++ctxd == adapter->num_tx_desc)
3439		ctxd = 0;
3440	txr->next_avail_desc = ctxd;
3441	--txr->tx_avail;
3442
3443        return (offload);
3444}
3445
3446
3447/**********************************************************************
3448 *
3449 *  Examine each tx_buffer in the used queue. If the hardware is done
3450 *  processing the packet then free associated resources. The
3451 *  tx_buffer is put back on the free queue.
3452 *
3453 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3454 **********************************************************************/
3455static bool
3456igb_txeof(struct tx_ring *txr)
3457{
3458	struct adapter	*adapter = txr->adapter;
3459        int first, last, done;
3460        struct igb_tx_buffer *tx_buffer;
3461        struct e1000_tx_desc   *tx_desc, *eop_desc;
3462	struct ifnet   *ifp = adapter->ifp;
3463
3464	IGB_TX_LOCK_ASSERT(txr);
3465
3466        if (txr->tx_avail == adapter->num_tx_desc)
3467                return FALSE;
3468
3469        first = txr->next_to_clean;
3470        tx_desc = &txr->tx_base[first];
3471        tx_buffer = &txr->tx_buffers[first];
3472	last = tx_buffer->next_eop;
3473        eop_desc = &txr->tx_base[last];
3474
3475	/*
3476	 * What this does is get the index of the
3477	 * first descriptor AFTER the EOP of the
3478	 * first packet, that way we can do the
3479	 * simple comparison on the inner while loop.
3480	 */
3481	if (++last == adapter->num_tx_desc)
3482 		last = 0;
3483	done = last;
3484
3485        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3486            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3487
3488        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3489		/* We clean the range of the packet */
3490		while (first != done) {
3491                	tx_desc->upper.data = 0;
3492                	tx_desc->lower.data = 0;
3493                	tx_desc->buffer_addr = 0;
3494                	++txr->tx_avail;
3495
3496			if (tx_buffer->m_head) {
3497				txr->bytes +=
3498				    tx_buffer->m_head->m_pkthdr.len;
3499				bus_dmamap_sync(txr->txtag,
3500				    tx_buffer->map,
3501				    BUS_DMASYNC_POSTWRITE);
3502				bus_dmamap_unload(txr->txtag,
3503				    tx_buffer->map);
3504
3505                        	m_freem(tx_buffer->m_head);
3506                        	tx_buffer->m_head = NULL;
3507                	}
3508			tx_buffer->next_eop = -1;
3509			txr->watchdog_time = ticks;
3510
3511	                if (++first == adapter->num_tx_desc)
3512				first = 0;
3513
3514	                tx_buffer = &txr->tx_buffers[first];
3515			tx_desc = &txr->tx_base[first];
3516		}
3517		++txr->packets;
3518		++ifp->if_opackets;
3519		/* See if we can continue to the next packet */
3520		last = tx_buffer->next_eop;
3521		if (last != -1) {
3522        		eop_desc = &txr->tx_base[last];
3523			/* Get new done point */
3524			if (++last == adapter->num_tx_desc) last = 0;
3525			done = last;
3526		} else
3527			break;
3528        }
3529        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3530            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3531
3532        txr->next_to_clean = first;
3533
3534        /*
3535         * If we have enough room, clear IFF_DRV_OACTIVE
3536         * to tell the stack that it is OK to send packets.
3537         */
3538        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3539                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3540		/* All clean, turn off the watchdog */
3541                if (txr->tx_avail == adapter->num_tx_desc) {
3542			txr->watchdog_check = FALSE;
3543			return (FALSE);
3544		}
3545        }
3546
3547	return (TRUE);
3548}
3549
3550
3551/*********************************************************************
3552 *
3553 *  Refresh mbuf buffers for RX descriptor rings
3554 *   - now keeps its own state so discards due to resource
3555 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3556 *     it just returns, keeping its placeholder, thus it can simply
3557 *     be recalled to try again.
3558 *
3559 **********************************************************************/
3560static void
3561igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3562{
3563	struct adapter		*adapter = rxr->adapter;
3564	bus_dma_segment_t	hseg[1];
3565	bus_dma_segment_t	pseg[1];
3566	struct igb_rx_buf	*rxbuf;
3567	struct mbuf		*mh, *mp;
3568	int			i, nsegs, error, cleaned;
3569
3570	i = rxr->next_to_refresh;
3571	cleaned = -1; /* Signify no completions */
3572	while (i != limit) {
3573		rxbuf = &rxr->rx_buffers[i];
3574		if ((rxbuf->m_head == NULL) && (rxr->hdr_split)) {
3575			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3576			if (mh == NULL)
3577				goto update;
3578			mh->m_pkthdr.len = mh->m_len = MHLEN;
3579			mh->m_len = MHLEN;
3580			mh->m_flags |= M_PKTHDR;
3581			m_adj(mh, ETHER_ALIGN);
3582			/* Get the memory mapping */
3583			error = bus_dmamap_load_mbuf_sg(rxr->htag,
3584			    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3585			if (error != 0) {
3586				printf("GET BUF: dmamap load"
3587				    " failure - %d\n", error);
3588				m_free(mh);
3589				goto update;
3590			}
3591			rxbuf->m_head = mh;
3592			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3593			    BUS_DMASYNC_PREREAD);
3594			rxr->rx_base[i].read.hdr_addr =
3595			    htole64(hseg[0].ds_addr);
3596		}
3597
3598		if (rxbuf->m_pack == NULL) {
3599			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3600			    M_PKTHDR, adapter->rx_mbuf_sz);
3601			if (mp == NULL)
3602				goto update;
3603			mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3604			/* Get the memory mapping */
3605			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3606			    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3607			if (error != 0) {
3608				printf("GET BUF: dmamap load"
3609				    " failure - %d\n", error);
3610				m_free(mp);
3611				goto update;
3612			}
3613			rxbuf->m_pack = mp;
3614			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3615			    BUS_DMASYNC_PREREAD);
3616			rxr->rx_base[i].read.pkt_addr =
3617			    htole64(pseg[0].ds_addr);
3618		}
3619
3620		cleaned = i;
3621		/* Calculate next index */
3622		if (++i == adapter->num_rx_desc)
3623			i = 0;
3624		/* This is the work marker for refresh */
3625		rxr->next_to_refresh = i;
3626	}
3627update:
3628	if (cleaned != -1) /* If we refreshed some, bump tail */
3629		E1000_WRITE_REG(&adapter->hw,
3630		    E1000_RDT(rxr->me), cleaned);
3631	return;
3632}
3633
3634
3635/*********************************************************************
3636 *
3637 *  Allocate memory for rx_buffer structures. Since we use one
3638 *  rx_buffer per received packet, the maximum number of rx_buffer's
3639 *  that we'll need is equal to the number of receive descriptors
3640 *  that we've allocated.
3641 *
3642 **********************************************************************/
3643static int
3644igb_allocate_receive_buffers(struct rx_ring *rxr)
3645{
3646	struct	adapter 	*adapter = rxr->adapter;
3647	device_t 		dev = adapter->dev;
3648	struct igb_rx_buf	*rxbuf;
3649	int             	i, bsize, error;
3650
3651	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3652	if (!(rxr->rx_buffers =
3653	    (struct igb_rx_buf *) malloc(bsize,
3654	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3655		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3656		error = ENOMEM;
3657		goto fail;
3658	}
3659
3660	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3661				   1, 0,		/* alignment, bounds */
3662				   BUS_SPACE_MAXADDR,	/* lowaddr */
3663				   BUS_SPACE_MAXADDR,	/* highaddr */
3664				   NULL, NULL,		/* filter, filterarg */
3665				   MSIZE,		/* maxsize */
3666				   1,			/* nsegments */
3667				   MSIZE,		/* maxsegsize */
3668				   0,			/* flags */
3669				   NULL,		/* lockfunc */
3670				   NULL,		/* lockfuncarg */
3671				   &rxr->htag))) {
3672		device_printf(dev, "Unable to create RX DMA tag\n");
3673		goto fail;
3674	}
3675
3676	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3677				   1, 0,		/* alignment, bounds */
3678				   BUS_SPACE_MAXADDR,	/* lowaddr */
3679				   BUS_SPACE_MAXADDR,	/* highaddr */
3680				   NULL, NULL,		/* filter, filterarg */
3681				   MJUMPAGESIZE,	/* maxsize */
3682				   1,			/* nsegments */
3683				   MJUMPAGESIZE,	/* maxsegsize */
3684				   0,			/* flags */
3685				   NULL,		/* lockfunc */
3686				   NULL,		/* lockfuncarg */
3687				   &rxr->ptag))) {
3688		device_printf(dev, "Unable to create RX payload DMA tag\n");
3689		goto fail;
3690	}
3691
3692	for (i = 0; i < adapter->num_rx_desc; i++) {
3693		rxbuf = &rxr->rx_buffers[i];
3694		error = bus_dmamap_create(rxr->htag,
3695		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3696		if (error) {
3697			device_printf(dev,
3698			    "Unable to create RX head DMA maps\n");
3699			goto fail;
3700		}
3701		error = bus_dmamap_create(rxr->ptag,
3702		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3703		if (error) {
3704			device_printf(dev,
3705			    "Unable to create RX packet DMA maps\n");
3706			goto fail;
3707		}
3708	}
3709
3710	return (0);
3711
3712fail:
3713	/* Frees all, but can handle partial completion */
3714	igb_free_receive_structures(adapter);
3715	return (error);
3716}
3717
3718
3719static void
3720igb_free_receive_ring(struct rx_ring *rxr)
3721{
3722	struct	adapter		*adapter;
3723	struct igb_rx_buf	*rxbuf;
3724	int i;
3725
3726	adapter = rxr->adapter;
3727	for (i = 0; i < adapter->num_rx_desc; i++) {
3728		rxbuf = &rxr->rx_buffers[i];
3729		if (rxbuf->m_head != NULL) {
3730			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3731			    BUS_DMASYNC_POSTREAD);
3732			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3733			rxbuf->m_head->m_flags |= M_PKTHDR;
3734			m_freem(rxbuf->m_head);
3735		}
3736		if (rxbuf->m_pack != NULL) {
3737			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3738			    BUS_DMASYNC_POSTREAD);
3739			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3740			rxbuf->m_pack->m_flags |= M_PKTHDR;
3741			m_freem(rxbuf->m_pack);
3742		}
3743		rxbuf->m_head = NULL;
3744		rxbuf->m_pack = NULL;
3745	}
3746}
3747
3748
3749/*********************************************************************
3750 *
3751 *  Initialize a receive ring and its buffers.
3752 *
3753 **********************************************************************/
3754static int
3755igb_setup_receive_ring(struct rx_ring *rxr)
3756{
3757	struct	adapter		*adapter;
3758	struct  ifnet		*ifp;
3759	device_t		dev;
3760	struct igb_rx_buf	*rxbuf;
3761	bus_dma_segment_t	pseg[1], hseg[1];
3762	struct lro_ctrl		*lro = &rxr->lro;
3763	int			rsize, nsegs, error = 0;
3764
3765	adapter = rxr->adapter;
3766	dev = adapter->dev;
3767	ifp = adapter->ifp;
3768
3769	/* Clear the ring contents */
3770	IGB_RX_LOCK(rxr);
3771	rsize = roundup2(adapter->num_rx_desc *
3772	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3773	bzero((void *)rxr->rx_base, rsize);
3774
3775	/*
3776	** Free current RX buffer structures and their mbufs
3777	*/
3778	igb_free_receive_ring(rxr);
3779
3780	/* Configure for header split? */
3781	if (igb_header_split)
3782		rxr->hdr_split = TRUE;
3783
3784        /* Now replenish the ring mbufs */
3785	for (int j = 0; j != adapter->num_rx_desc; ++j) {
3786		struct mbuf	*mh, *mp;
3787
3788		rxbuf = &rxr->rx_buffers[j];
3789		if (rxr->hdr_split == FALSE)
3790			goto skip_head;
3791
3792		/* First the header */
3793		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3794		if (rxbuf->m_head == NULL) {
3795			error = ENOBUFS;
3796                        goto fail;
3797		}
3798		m_adj(rxbuf->m_head, ETHER_ALIGN);
3799		mh = rxbuf->m_head;
3800		mh->m_len = mh->m_pkthdr.len = MHLEN;
3801		mh->m_flags |= M_PKTHDR;
3802		/* Get the memory mapping */
3803		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3804		    rxbuf->hmap, rxbuf->m_head, hseg,
3805		    &nsegs, BUS_DMA_NOWAIT);
3806		if (error != 0) /* Nothing elegant to do here */
3807                        goto fail;
3808		bus_dmamap_sync(rxr->htag,
3809		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3810		/* Update descriptor */
3811		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3812
3813skip_head:
3814		/* Now the payload cluster */
3815		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3816		    M_PKTHDR, adapter->rx_mbuf_sz);
3817		if (rxbuf->m_pack == NULL) {
3818			error = ENOBUFS;
3819                        goto fail;
3820		}
3821		mp = rxbuf->m_pack;
3822		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3823		/* Get the memory mapping */
3824		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3825		    rxbuf->pmap, mp, pseg,
3826		    &nsegs, BUS_DMA_NOWAIT);
3827		if (error != 0)
3828                        goto fail;
3829		bus_dmamap_sync(rxr->ptag,
3830		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
3831		/* Update descriptor */
3832		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3833        }
3834
3835	/* Setup our descriptor indices */
3836	rxr->next_to_check = 0;
3837	rxr->next_to_refresh = 0;
3838	rxr->lro_enabled = FALSE;
3839	rxr->rx_split_packets = 0;
3840	rxr->rx_bytes = 0;
3841
3842	rxr->fmp = NULL;
3843	rxr->lmp = NULL;
3844	rxr->discard = FALSE;
3845
3846	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3847	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3848
3849	/*
3850	** Now set up the LRO interface, we
3851	** also only do head split when LRO
3852	** is enabled, since so often they
3853	** are undesireable in similar setups.
3854	*/
3855	if (ifp->if_capenable & IFCAP_LRO) {
3856		int err = tcp_lro_init(lro);
3857		if (err) {
3858			device_printf(dev, "LRO Initialization failed!\n");
3859			goto fail;
3860		}
3861		INIT_DEBUGOUT("RX LRO Initialized\n");
3862		rxr->lro_enabled = TRUE;
3863		lro->ifp = adapter->ifp;
3864	}
3865
3866	IGB_RX_UNLOCK(rxr);
3867	return (0);
3868
3869fail:
3870	igb_free_receive_ring(rxr);
3871	IGB_RX_UNLOCK(rxr);
3872	return (error);
3873}
3874
3875/*********************************************************************
3876 *
3877 *  Initialize all receive rings.
3878 *
3879 **********************************************************************/
3880static int
3881igb_setup_receive_structures(struct adapter *adapter)
3882{
3883	struct rx_ring *rxr = adapter->rx_rings;
3884	int i;
3885
3886	for (i = 0; i < adapter->num_queues; i++, rxr++)
3887		if (igb_setup_receive_ring(rxr))
3888			goto fail;
3889
3890	return (0);
3891fail:
3892	/*
3893	 * Free RX buffers allocated so far, we will only handle
3894	 * the rings that completed, the failing case will have
3895	 * cleaned up for itself. 'i' is the endpoint.
3896	 */
3897	for (int j = 0; j > i; ++j) {
3898		rxr = &adapter->rx_rings[i];
3899		igb_free_receive_ring(rxr);
3900	}
3901
3902	return (ENOBUFS);
3903}
3904
3905/*********************************************************************
3906 *
3907 *  Enable receive unit.
3908 *
3909 **********************************************************************/
3910static void
3911igb_initialize_receive_units(struct adapter *adapter)
3912{
3913	struct rx_ring	*rxr = adapter->rx_rings;
3914	struct ifnet	*ifp = adapter->ifp;
3915	struct e1000_hw *hw = &adapter->hw;
3916	u32		rctl, rxcsum, psize, srrctl = 0;
3917
3918	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3919
3920	/*
3921	 * Make sure receives are disabled while setting
3922	 * up the descriptor ring
3923	 */
3924	rctl = E1000_READ_REG(hw, E1000_RCTL);
3925	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3926
3927	/*
3928	** Set up for header split
3929	*/
3930	if (rxr->hdr_split) {
3931		/* Use a standard mbuf for the header */
3932		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3933		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3934	} else
3935		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3936
3937	/*
3938	** Set up for jumbo frames
3939	*/
3940	if (ifp->if_mtu > ETHERMTU) {
3941		rctl |= E1000_RCTL_LPE;
3942		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3943		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3944
3945		/* Set maximum packet len */
3946		psize = adapter->max_frame_size;
3947		/* are we on a vlan? */
3948		if (adapter->ifp->if_vlantrunk != NULL)
3949			psize += VLAN_TAG_SIZE;
3950		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3951	} else {
3952		rctl &= ~E1000_RCTL_LPE;
3953		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3954		rctl |= E1000_RCTL_SZ_2048;
3955	}
3956
3957	/* Setup the Base and Length of the Rx Descriptor Rings */
3958	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3959		u64 bus_addr = rxr->rxdma.dma_paddr;
3960		u32 rxdctl;
3961
3962		E1000_WRITE_REG(hw, E1000_RDLEN(i),
3963		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3964		E1000_WRITE_REG(hw, E1000_RDBAH(i),
3965		    (uint32_t)(bus_addr >> 32));
3966		E1000_WRITE_REG(hw, E1000_RDBAL(i),
3967		    (uint32_t)bus_addr);
3968		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3969		/* Enable this Queue */
3970		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3971		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3972		rxdctl &= 0xFFF00000;
3973		rxdctl |= IGB_RX_PTHRESH;
3974		rxdctl |= IGB_RX_HTHRESH << 8;
3975		rxdctl |= IGB_RX_WTHRESH << 16;
3976		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3977	}
3978
3979	/*
3980	** Setup for RX MultiQueue
3981	*/
3982	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3983	if (adapter->num_queues >1) {
3984		u32 random[10], mrqc, shift = 0;
3985		union igb_reta {
3986			u32 dword;
3987			u8  bytes[4];
3988		} reta;
3989
3990		arc4rand(&random, sizeof(random), 0);
3991		if (adapter->hw.mac.type == e1000_82575)
3992			shift = 6;
3993		/* Warning FM follows */
3994		for (int i = 0; i < 128; i++) {
3995			reta.bytes[i & 3] =
3996			    (i % adapter->num_queues) << shift;
3997			if ((i & 3) == 3)
3998				E1000_WRITE_REG(hw,
3999				    E1000_RETA(i >> 2), reta.dword);
4000		}
4001		/* Now fill in hash table */
4002		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4003		for (int i = 0; i < 10; i++)
4004			E1000_WRITE_REG_ARRAY(hw,
4005			    E1000_RSSRK(0), i, random[i]);
4006
4007		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4008		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4009		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4010		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4011		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4012		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4013		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4014		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4015
4016		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4017
4018		/*
4019		** NOTE: Receive Full-Packet Checksum Offload
4020		** is mutually exclusive with Multiqueue. However
4021		** this is not the same as TCP/IP checksums which
4022		** still work.
4023		*/
4024		rxcsum |= E1000_RXCSUM_PCSD;
4025#if __FreeBSD_version >= 800000
4026		/* For SCTP Offload */
4027		if ((hw->mac.type == e1000_82576)
4028		    && (ifp->if_capenable & IFCAP_RXCSUM))
4029			rxcsum |= E1000_RXCSUM_CRCOFL;
4030#endif
4031	} else {
4032		/* Non RSS setup */
4033		if (ifp->if_capenable & IFCAP_RXCSUM) {
4034			rxcsum |= E1000_RXCSUM_IPPCSE;
4035#if __FreeBSD_version >= 800000
4036			if (adapter->hw.mac.type == e1000_82576)
4037				rxcsum |= E1000_RXCSUM_CRCOFL;
4038#endif
4039		} else
4040			rxcsum &= ~E1000_RXCSUM_TUOFL;
4041	}
4042	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4043
4044	/* Setup the Receive Control Register */
4045	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4046	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4047		   E1000_RCTL_RDMTS_HALF |
4048		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4049	/* Strip CRC bytes. */
4050	rctl |= E1000_RCTL_SECRC;
4051	/* Make sure VLAN Filters are off */
4052	rctl &= ~E1000_RCTL_VFE;
4053	/* Don't store bad packets */
4054	rctl &= ~E1000_RCTL_SBP;
4055
4056	/* Enable Receives */
4057	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4058
4059	/*
4060	 * Setup the HW Rx Head and Tail Descriptor Pointers
4061	 *   - needs to be after enable
4062	 */
4063	for (int i = 0; i < adapter->num_queues; i++) {
4064		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
4065		E1000_WRITE_REG(hw, E1000_RDT(i),
4066		     adapter->num_rx_desc - 1);
4067	}
4068	return;
4069}
4070
4071/*********************************************************************
4072 *
4073 *  Free receive rings.
4074 *
4075 **********************************************************************/
4076static void
4077igb_free_receive_structures(struct adapter *adapter)
4078{
4079	struct rx_ring *rxr = adapter->rx_rings;
4080
4081	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4082		struct lro_ctrl	*lro = &rxr->lro;
4083		igb_free_receive_buffers(rxr);
4084		tcp_lro_free(lro);
4085		igb_dma_free(adapter, &rxr->rxdma);
4086	}
4087
4088	free(adapter->rx_rings, M_DEVBUF);
4089}
4090
4091/*********************************************************************
4092 *
4093 *  Free receive ring data structures.
4094 *
4095 **********************************************************************/
4096static void
4097igb_free_receive_buffers(struct rx_ring *rxr)
4098{
4099	struct adapter		*adapter = rxr->adapter;
4100	struct igb_rx_buf	*rxbuf;
4101	int i;
4102
4103	INIT_DEBUGOUT("free_receive_structures: begin");
4104
4105	/* Cleanup any existing buffers */
4106	if (rxr->rx_buffers != NULL) {
4107		for (i = 0; i < adapter->num_rx_desc; i++) {
4108			rxbuf = &rxr->rx_buffers[i];
4109			if (rxbuf->m_head != NULL) {
4110				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4111				    BUS_DMASYNC_POSTREAD);
4112				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4113				rxbuf->m_head->m_flags |= M_PKTHDR;
4114				m_freem(rxbuf->m_head);
4115			}
4116			if (rxbuf->m_pack != NULL) {
4117				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4118				    BUS_DMASYNC_POSTREAD);
4119				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4120				rxbuf->m_pack->m_flags |= M_PKTHDR;
4121				m_freem(rxbuf->m_pack);
4122			}
4123			rxbuf->m_head = NULL;
4124			rxbuf->m_pack = NULL;
4125			if (rxbuf->hmap != NULL) {
4126				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4127				rxbuf->hmap = NULL;
4128			}
4129			if (rxbuf->pmap != NULL) {
4130				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4131				rxbuf->pmap = NULL;
4132			}
4133		}
4134		if (rxr->rx_buffers != NULL) {
4135			free(rxr->rx_buffers, M_DEVBUF);
4136			rxr->rx_buffers = NULL;
4137		}
4138	}
4139
4140	if (rxr->htag != NULL) {
4141		bus_dma_tag_destroy(rxr->htag);
4142		rxr->htag = NULL;
4143	}
4144	if (rxr->ptag != NULL) {
4145		bus_dma_tag_destroy(rxr->ptag);
4146		rxr->ptag = NULL;
4147	}
4148}
4149
4150static __inline void
4151igb_rx_discard(struct rx_ring *rxr, int i)
4152{
4153	struct adapter		*adapter = rxr->adapter;
4154	struct igb_rx_buf	*rbuf;
4155	struct mbuf             *mh, *mp;
4156
4157	rbuf = &rxr->rx_buffers[i];
4158	if (rxr->fmp != NULL) {
4159		rxr->fmp->m_flags |= M_PKTHDR;
4160		m_freem(rxr->fmp);
4161		rxr->fmp = NULL;
4162		rxr->lmp = NULL;
4163	}
4164
4165	mh = rbuf->m_head;
4166	mp = rbuf->m_pack;
4167
4168	/* Reuse loaded DMA map and just update mbuf chain */
4169	mh->m_len = MHLEN;
4170	mh->m_flags |= M_PKTHDR;
4171	mh->m_next = NULL;
4172
4173	mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4174	mp->m_data = mp->m_ext.ext_buf;
4175	mp->m_next = NULL;
4176	return;
4177}
4178
4179static __inline void
4180igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4181{
4182
4183	/*
4184	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4185	 * should be computed by hardware. Also it should not have VLAN tag in
4186	 * ethernet header.
4187	 */
4188	if (rxr->lro_enabled &&
4189	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4190	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4191	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4192	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4193	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4194	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4195		/*
4196		 * Send to the stack if:
4197		 **  - LRO not enabled, or
4198		 **  - no LRO resources, or
4199		 **  - lro enqueue fails
4200		 */
4201		if (rxr->lro.lro_cnt != 0)
4202			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4203				return;
4204	}
4205	IGB_RX_UNLOCK(rxr);
4206	(*ifp->if_input)(ifp, m);
4207	IGB_RX_LOCK(rxr);
4208}
4209
4210/*********************************************************************
4211 *
4212 *  This routine executes in interrupt context. It replenishes
4213 *  the mbufs in the descriptor and sends data which has been
4214 *  dma'ed into host memory to upper layer.
4215 *
4216 *  We loop at most count times if count is > 0, or until done if
4217 *  count < 0.
4218 *
4219 *  Return TRUE if more to clean, FALSE otherwise
4220 *********************************************************************/
4221static bool
4222igb_rxeof(struct igb_queue *que, int count, int *done)
4223{
4224	struct adapter		*adapter = que->adapter;
4225	struct rx_ring		*rxr = que->rxr;
4226	struct ifnet		*ifp = adapter->ifp;
4227	struct lro_ctrl		*lro = &rxr->lro;
4228	struct lro_entry	*queued;
4229	int			i, processed = 0, rxdone = 0;
4230	u32			ptype, staterr = 0;
4231	union e1000_adv_rx_desc	*cur;
4232
4233	IGB_RX_LOCK(rxr);
4234	/* Sync the ring. */
4235	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4236	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4237
4238	/* Main clean loop */
4239	for (i = rxr->next_to_check; count != 0;) {
4240		struct mbuf		*sendmp, *mh, *mp;
4241		struct igb_rx_buf	*rxbuf;
4242		u16			hlen, plen, hdr, vtag;
4243		bool			eop = FALSE;
4244
4245		cur = &rxr->rx_base[i];
4246		staterr = le32toh(cur->wb.upper.status_error);
4247		if ((staterr & E1000_RXD_STAT_DD) == 0)
4248			break;
4249		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4250			break;
4251		count--;
4252		sendmp = mh = mp = NULL;
4253		cur->wb.upper.status_error = 0;
4254		rxbuf = &rxr->rx_buffers[i];
4255		plen = le16toh(cur->wb.upper.length);
4256		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4257		vtag = le16toh(cur->wb.upper.vlan);
4258		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4259		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4260
4261		/* Make sure all segments of a bad packet are discarded */
4262		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4263		    (rxr->discard)) {
4264			ifp->if_ierrors++;
4265			++rxr->rx_discarded;
4266			if (!eop) /* Catch subsequent segs */
4267				rxr->discard = TRUE;
4268			else
4269				rxr->discard = FALSE;
4270			igb_rx_discard(rxr, i);
4271			goto next_desc;
4272		}
4273
4274		/*
4275		** The way the hardware is configured to
4276		** split, it will ONLY use the header buffer
4277		** when header split is enabled, otherwise we
4278		** get normal behavior, ie, both header and
4279		** payload are DMA'd into the payload buffer.
4280		**
4281		** The fmp test is to catch the case where a
4282		** packet spans multiple descriptors, in that
4283		** case only the first header is valid.
4284		*/
4285		if (rxr->hdr_split && rxr->fmp == NULL) {
4286			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4287			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4288			if (hlen > IGB_HDR_BUF)
4289				hlen = IGB_HDR_BUF;
4290			/* Handle the header mbuf */
4291			mh = rxr->rx_buffers[i].m_head;
4292			mh->m_len = hlen;
4293			/* clear buf info for refresh */
4294			rxbuf->m_head = NULL;
4295			/*
4296			** Get the payload length, this
4297			** could be zero if its a small
4298			** packet.
4299			*/
4300			if (plen > 0) {
4301				mp = rxr->rx_buffers[i].m_pack;
4302				mp->m_len = plen;
4303				mh->m_next = mp;
4304				/* clear buf info for refresh */
4305				rxbuf->m_pack = NULL;
4306				rxr->rx_split_packets++;
4307			}
4308		} else {
4309			/*
4310			** Either no header split, or a
4311			** secondary piece of a fragmented
4312			** split packet.
4313			*/
4314			mh = rxr->rx_buffers[i].m_pack;
4315			mh->m_len = plen;
4316			/* clear buf info for refresh */
4317			rxbuf->m_pack = NULL;
4318		}
4319
4320		++processed; /* So we know when to refresh */
4321
4322		/* Initial frame - setup */
4323		if (rxr->fmp == NULL) {
4324			mh->m_pkthdr.len = mh->m_len;
4325			/* Store the first mbuf */
4326			rxr->fmp = mh;
4327			rxr->lmp = mh;
4328			if (mp != NULL) {
4329				/* Add payload if split */
4330				mh->m_pkthdr.len += mp->m_len;
4331				rxr->lmp = mh->m_next;
4332			}
4333		} else {
4334			/* Chain mbuf's together */
4335			rxr->lmp->m_next = mh;
4336			rxr->lmp = rxr->lmp->m_next;
4337			rxr->fmp->m_pkthdr.len += mh->m_len;
4338		}
4339
4340		if (eop) {
4341			rxr->fmp->m_pkthdr.rcvif = ifp;
4342			ifp->if_ipackets++;
4343			rxr->rx_packets++;
4344			/* capture data for AIM */
4345			rxr->packets++;
4346			rxr->bytes += rxr->fmp->m_pkthdr.len;
4347			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4348
4349			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4350				igb_rx_checksum(staterr, rxr->fmp, ptype);
4351
4352			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4353			    (staterr & E1000_RXD_STAT_VP) != 0) {
4354				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4355				rxr->fmp->m_flags |= M_VLANTAG;
4356			}
4357#if __FreeBSD_version >= 800000
4358			rxr->fmp->m_pkthdr.flowid = que->msix;
4359			rxr->fmp->m_flags |= M_FLOWID;
4360#endif
4361			sendmp = rxr->fmp;
4362			/* Make sure to set M_PKTHDR. */
4363			sendmp->m_flags |= M_PKTHDR;
4364			rxr->fmp = NULL;
4365			rxr->lmp = NULL;
4366		}
4367
4368next_desc:
4369		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4370		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4371
4372		/* Advance our pointers to the next descriptor. */
4373		if (++i == adapter->num_rx_desc)
4374			i = 0;
4375		/*
4376		** Send to the stack or LRO
4377		*/
4378		if (sendmp != NULL) {
4379			rxr->next_to_check = i;
4380			igb_rx_input(rxr, ifp, sendmp, ptype);
4381			i = rxr->next_to_check;
4382			rxdone++;
4383		}
4384
4385		/* Every 8 descriptors we go to refresh mbufs */
4386		if (processed == 8) {
4387                        igb_refresh_mbufs(rxr, i);
4388                        processed = 0;
4389		}
4390	}
4391
4392	/* Catch any remainders */
4393	if (processed != 0) {
4394		igb_refresh_mbufs(rxr, i);
4395		processed = 0;
4396	}
4397
4398	rxr->next_to_check = i;
4399
4400	/*
4401	 * Flush any outstanding LRO work
4402	 */
4403	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4404		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4405		tcp_lro_flush(lro, queued);
4406	}
4407
4408	IGB_RX_UNLOCK(rxr);
4409
4410	if (done != NULL)
4411		*done = rxdone;
4412
4413	/*
4414	** We still have cleaning to do?
4415	** Schedule another interrupt if so.
4416	*/
4417	if ((staterr & E1000_RXD_STAT_DD) != 0)
4418		return (TRUE);
4419
4420	return (FALSE);
4421}
4422
4423/*********************************************************************
4424 *
4425 *  Verify that the hardware indicated that the checksum is valid.
4426 *  Inform the stack about the status of checksum so that stack
4427 *  doesn't spend time verifying the checksum.
4428 *
4429 *********************************************************************/
4430static void
4431igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4432{
4433	u16 status = (u16)staterr;
4434	u8  errors = (u8) (staterr >> 24);
4435	int sctp;
4436
4437	/* Ignore Checksum bit is set */
4438	if (status & E1000_RXD_STAT_IXSM) {
4439		mp->m_pkthdr.csum_flags = 0;
4440		return;
4441	}
4442
4443	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4444	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4445		sctp = 1;
4446	else
4447		sctp = 0;
4448	if (status & E1000_RXD_STAT_IPCS) {
4449		/* Did it pass? */
4450		if (!(errors & E1000_RXD_ERR_IPE)) {
4451			/* IP Checksum Good */
4452			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4453			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4454		} else
4455			mp->m_pkthdr.csum_flags = 0;
4456	}
4457
4458	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4459		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4460#if __FreeBSD_version >= 800000
4461		if (sctp) /* reassign */
4462			type = CSUM_SCTP_VALID;
4463#endif
4464		/* Did it pass? */
4465		if (!(errors & E1000_RXD_ERR_TCPE)) {
4466			mp->m_pkthdr.csum_flags |= type;
4467			if (sctp == 0)
4468				mp->m_pkthdr.csum_data = htons(0xffff);
4469		}
4470	}
4471	return;
4472}
4473
4474/*
4475 * This routine is run via an vlan
4476 * config EVENT
4477 */
4478static void
4479igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4480{
4481	struct adapter	*adapter = ifp->if_softc;
4482	u32		index, bit;
4483
4484	if (ifp->if_softc !=  arg)   /* Not our event */
4485		return;
4486
4487	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4488                return;
4489
4490	index = (vtag >> 5) & 0x7F;
4491	bit = vtag & 0x1F;
4492	igb_shadow_vfta[index] |= (1 << bit);
4493	++adapter->num_vlans;
4494	/* Re-init to load the changes */
4495	igb_init(adapter);
4496}
4497
4498/*
4499 * This routine is run via an vlan
4500 * unconfig EVENT
4501 */
4502static void
4503igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4504{
4505	struct adapter	*adapter = ifp->if_softc;
4506	u32		index, bit;
4507
4508	if (ifp->if_softc !=  arg)
4509		return;
4510
4511	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4512                return;
4513
4514	index = (vtag >> 5) & 0x7F;
4515	bit = vtag & 0x1F;
4516	igb_shadow_vfta[index] &= ~(1 << bit);
4517	--adapter->num_vlans;
4518	/* Re-init to load the changes */
4519	igb_init(adapter);
4520}
4521
4522static void
4523igb_setup_vlan_hw_support(struct adapter *adapter)
4524{
4525	struct e1000_hw *hw = &adapter->hw;
4526	u32             reg;
4527
4528	/*
4529	** We get here thru init_locked, meaning
4530	** a soft reset, this has already cleared
4531	** the VFTA and other state, so if there
4532	** have been no vlan's registered do nothing.
4533	*/
4534	if (adapter->num_vlans == 0)
4535                return;
4536
4537	/*
4538	** A soft reset zero's out the VFTA, so
4539	** we need to repopulate it now.
4540	*/
4541	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4542                if (igb_shadow_vfta[i] != 0) {
4543			if (hw->mac.type == e1000_vfadapt)
4544				e1000_vfta_set_vf(hw, igb_shadow_vfta[i], TRUE);
4545			else
4546				E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4547                           	 i, igb_shadow_vfta[i]);
4548		}
4549
4550	if (hw->mac.type == e1000_vfadapt)
4551		e1000_rlpml_set_vf(hw,
4552		    adapter->max_frame_size + VLAN_TAG_SIZE);
4553	else {
4554		reg = E1000_READ_REG(hw, E1000_CTRL);
4555		reg |= E1000_CTRL_VME;
4556		E1000_WRITE_REG(hw, E1000_CTRL, reg);
4557
4558		/* Enable the Filter Table */
4559		reg = E1000_READ_REG(hw, E1000_RCTL);
4560		reg &= ~E1000_RCTL_CFIEN;
4561		reg |= E1000_RCTL_VFE;
4562		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4563
4564		/* Update the frame size */
4565		E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4566		    adapter->max_frame_size + VLAN_TAG_SIZE);
4567	}
4568}
4569
4570static void
4571igb_enable_intr(struct adapter *adapter)
4572{
4573	/* With RSS set up what to auto clear */
4574	if (adapter->msix_mem) {
4575		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4576		    adapter->eims_mask);
4577		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4578		    adapter->eims_mask);
4579		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4580		    adapter->eims_mask);
4581		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4582		    E1000_IMS_LSC);
4583	} else {
4584		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4585		    IMS_ENABLE_MASK);
4586	}
4587	E1000_WRITE_FLUSH(&adapter->hw);
4588
4589	return;
4590}
4591
4592static void
4593igb_disable_intr(struct adapter *adapter)
4594{
4595	if (adapter->msix_mem) {
4596		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4597		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4598	}
4599	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4600	E1000_WRITE_FLUSH(&adapter->hw);
4601	return;
4602}
4603
4604/*
4605 * Bit of a misnomer, what this really means is
4606 * to enable OS management of the system... aka
4607 * to disable special hardware management features
4608 */
4609static void
4610igb_init_manageability(struct adapter *adapter)
4611{
4612	if (adapter->has_manage) {
4613		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4614		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4615
4616		/* disable hardware interception of ARP */
4617		manc &= ~(E1000_MANC_ARP_EN);
4618
4619                /* enable receiving management packets to the host */
4620		manc |= E1000_MANC_EN_MNG2HOST;
4621		manc2h |= 1 << 5;  /* Mng Port 623 */
4622		manc2h |= 1 << 6;  /* Mng Port 664 */
4623		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4624		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4625	}
4626}
4627
4628/*
4629 * Give control back to hardware management
4630 * controller if there is one.
4631 */
4632static void
4633igb_release_manageability(struct adapter *adapter)
4634{
4635	if (adapter->has_manage) {
4636		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4637
4638		/* re-enable hardware interception of ARP */
4639		manc |= E1000_MANC_ARP_EN;
4640		manc &= ~E1000_MANC_EN_MNG2HOST;
4641
4642		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4643	}
4644}
4645
4646/*
4647 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4648 * For ASF and Pass Through versions of f/w this means that
4649 * the driver is loaded.
4650 *
4651 */
4652static void
4653igb_get_hw_control(struct adapter *adapter)
4654{
4655	u32 ctrl_ext;
4656
4657	if (adapter->hw.mac.type == e1000_vfadapt)
4658		return;
4659
4660	/* Let firmware know the driver has taken over */
4661	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4662	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4663	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4664}
4665
4666/*
4667 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4668 * For ASF and Pass Through versions of f/w this means that the
4669 * driver is no longer loaded.
4670 *
4671 */
4672static void
4673igb_release_hw_control(struct adapter *adapter)
4674{
4675	u32 ctrl_ext;
4676
4677	if (adapter->hw.mac.type == e1000_vfadapt)
4678		return;
4679
4680	/* Let firmware taken over control of h/w */
4681	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4682	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4683	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4684}
4685
4686static int
4687igb_is_valid_ether_addr(uint8_t *addr)
4688{
4689	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4690
4691	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4692		return (FALSE);
4693	}
4694
4695	return (TRUE);
4696}
4697
4698
4699/*
4700 * Enable PCI Wake On Lan capability
4701 */
4702static void
4703igb_enable_wakeup(device_t dev)
4704{
4705	u16     cap, status;
4706	u8      id;
4707
4708	/* First find the capabilities pointer*/
4709	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4710	/* Read the PM Capabilities */
4711	id = pci_read_config(dev, cap, 1);
4712	if (id != PCIY_PMG)     /* Something wrong */
4713		return;
4714	/* OK, we have the power capabilities, so
4715	   now get the status register */
4716	cap += PCIR_POWER_STATUS;
4717	status = pci_read_config(dev, cap, 2);
4718	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4719	pci_write_config(dev, cap, status, 2);
4720	return;
4721}
4722
4723static void
4724igb_led_func(void *arg, int onoff)
4725{
4726	struct adapter	*adapter = arg;
4727
4728	IGB_CORE_LOCK(adapter);
4729	if (onoff) {
4730		e1000_setup_led(&adapter->hw);
4731		e1000_led_on(&adapter->hw);
4732	} else {
4733		e1000_led_off(&adapter->hw);
4734		e1000_cleanup_led(&adapter->hw);
4735	}
4736	IGB_CORE_UNLOCK(adapter);
4737}
4738
4739/**********************************************************************
4740 *
4741 *  Update the board statistics counters.
4742 *
4743 **********************************************************************/
4744static void
4745igb_update_stats_counters(struct adapter *adapter)
4746{
4747	struct ifnet		*ifp;
4748        struct e1000_hw		*hw = &adapter->hw;
4749	struct e1000_hw_stats	*stats;
4750
4751	/*
4752	** The virtual function adapter has only a
4753	** small controlled set of stats, do only
4754	** those and return.
4755	*/
4756	if (adapter->hw.mac.type == e1000_vfadapt) {
4757		igb_update_vf_stats_counters(adapter);
4758		return;
4759	}
4760
4761	stats = (struct e1000_hw_stats	*)adapter->stats;
4762
4763	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4764	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4765		stats->symerrs +=
4766		    E1000_READ_REG(hw,E1000_SYMERRS);
4767		stats->sec += E1000_READ_REG(hw, E1000_SEC);
4768	}
4769
4770	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4771	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4772	stats->scc += E1000_READ_REG(hw, E1000_SCC);
4773	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4774
4775	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4776	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4777	stats->colc += E1000_READ_REG(hw, E1000_COLC);
4778	stats->dc += E1000_READ_REG(hw, E1000_DC);
4779	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4780	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4781	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4782	stats->xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC);
4783	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
4784	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
4785	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
4786	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
4787	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
4788	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
4789	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
4790	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
4791	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
4792	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
4793	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
4794	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
4795
4796	/* For the 64-bit byte counters the low dword must be read first. */
4797	/* Both registers clear on the read of the high dword */
4798
4799	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
4800	  ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
4801	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
4802	  ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32) ;
4803
4804	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
4805	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
4806	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
4807	stats->roc += E1000_READ_REG(hw, E1000_ROC);
4808	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
4809
4810	stats->tor += E1000_READ_REG(hw, E1000_TORH);
4811	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
4812
4813	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
4814	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
4815	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
4816	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
4817	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
4818	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
4819	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
4820	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
4821	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
4822	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
4823
4824	/* Interrupt Counts */
4825
4826	stats->iac += E1000_READ_REG(hw, E1000_IAC);
4827	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
4828	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
4829	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
4830	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
4831	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
4832	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
4833	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
4834	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
4835
4836	/* Host to Card Statistics */
4837
4838	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
4839	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
4840	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
4841	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
4842	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
4843	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
4844	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
4845	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
4846	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
4847	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
4848	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
4849	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
4850	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
4851	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
4852
4853	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
4854	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
4855	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
4856	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
4857	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
4858	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
4859
4860	ifp = adapter->ifp;
4861	ifp->if_collisions = stats->colc;
4862
4863	/* Rx Errors */
4864	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
4865	    stats->crcerrs + stats->algnerrc +
4866	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
4867
4868	/* Tx Errors */
4869	ifp->if_oerrors = stats->ecol +
4870	    stats->latecol + adapter->watchdog_events;
4871
4872	/* Driver specific counters */
4873	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
4874	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
4875	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
4876	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
4877	adapter->packet_buf_alloc_tx =
4878	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
4879	adapter->packet_buf_alloc_rx =
4880	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
4881}
4882
4883
4884/**********************************************************************
4885 *
4886 *  Initialize the VF board statistics counters.
4887 *
4888 **********************************************************************/
4889static void
4890igb_vf_init_stats(struct adapter *adapter)
4891{
4892        struct e1000_hw *hw = &adapter->hw;
4893	struct e1000_vf_stats	*stats;
4894
4895	stats = (struct e1000_vf_stats	*)adapter->stats;
4896
4897        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
4898        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
4899        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
4900        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
4901        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
4902}
4903
4904/**********************************************************************
4905 *
4906 *  Update the VF board statistics counters.
4907 *
4908 **********************************************************************/
4909static void
4910igb_update_vf_stats_counters(struct adapter *adapter)
4911{
4912	struct e1000_hw *hw = &adapter->hw;
4913	struct e1000_vf_stats	*stats;
4914
4915	if (adapter->link_speed == 0)
4916		return;
4917
4918	stats = (struct e1000_vf_stats	*)adapter->stats;
4919
4920	UPDATE_VF_REG(E1000_VFGPRC,
4921	    stats->last_gprc, stats->gprc);
4922	UPDATE_VF_REG(E1000_VFGORC,
4923	    stats->last_gorc, stats->gorc);
4924	UPDATE_VF_REG(E1000_VFGPTC,
4925	    stats->last_gptc, stats->gptc);
4926	UPDATE_VF_REG(E1000_VFGOTC,
4927	    stats->last_gotc, stats->gotc);
4928	UPDATE_VF_REG(E1000_VFMPRC,
4929	    stats->last_mprc, stats->mprc);
4930}
4931
4932
4933/** igb_sysctl_tdh_handler - Handler function
4934 *  Retrieves the TDH value from the hardware
4935 */
4936static int
4937igb_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
4938{
4939	int error;
4940
4941	struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
4942	if (!txr) return 0;
4943
4944	unsigned val = E1000_READ_REG(&txr->adapter->hw, E1000_TDH(txr->me));
4945	error = sysctl_handle_int(oidp, &val, 0, req);
4946	if (error || !req->newptr)
4947		return error;
4948	return 0;
4949}
4950
4951/** igb_sysctl_tdt_handler - Handler function
4952 *  Retrieves the TDT value from the hardware
4953 */
4954static int
4955igb_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
4956{
4957	int error;
4958
4959	struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
4960	if (!txr) return 0;
4961
4962	unsigned val = E1000_READ_REG(&txr->adapter->hw, E1000_TDT(txr->me));
4963	error = sysctl_handle_int(oidp, &val, 0, req);
4964	if (error || !req->newptr)
4965		return error;
4966	return 0;
4967}
4968
4969/** igb_sysctl_rdh_handler - Handler function
4970 *  Retrieves the RDH value from the hardware
4971 */
4972static int
4973igb_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
4974{
4975	int error;
4976
4977	struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
4978	if (!rxr) return 0;
4979
4980	unsigned val = E1000_READ_REG(&rxr->adapter->hw, E1000_RDH(rxr->me));
4981	error = sysctl_handle_int(oidp, &val, 0, req);
4982	if (error || !req->newptr)
4983		return error;
4984	return 0;
4985}
4986
4987/** igb_sysctl_rdt_handler - Handler function
4988 *  Retrieves the RDT value from the hardware
4989 */
4990static int
4991igb_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
4992{
4993	int error;
4994
4995	struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
4996	if (!rxr) return 0;
4997
4998	unsigned val = E1000_READ_REG(&rxr->adapter->hw, E1000_RDT(rxr->me));
4999	error = sysctl_handle_int(oidp, &val, 0, req);
5000	if (error || !req->newptr)
5001		return error;
5002	return 0;
5003}
5004
5005/*
5006 * Add sysctl variables, one per statistic, to the system.
5007 */
5008static void
5009igb_add_hw_stats(struct adapter *adapter)
5010{
5011
5012	device_t dev = adapter->dev;
5013
5014	struct tx_ring *txr = adapter->tx_rings;
5015	struct rx_ring *rxr = adapter->rx_rings;
5016
5017	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5018	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5019	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5020	struct e1000_hw_stats *stats = adapter->stats;
5021
5022	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5023	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5024
5025#define QUEUE_NAME_LEN 32
5026	char namebuf[QUEUE_NAME_LEN];
5027
5028	/* Driver Statistics */
5029	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5030			CTLFLAG_RD, &adapter->link_irq, 0,
5031			"Link MSIX IRQ Handled");
5032	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5033			CTLFLAG_RD, &adapter->dropped_pkts,
5034			"Driver dropped packets");
5035	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5036			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5037			"Driver tx dma failure in xmit");
5038
5039	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5040			CTLFLAG_RD, &adapter->device_control,
5041			"Device Control Register");
5042	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5043			CTLFLAG_RD, &adapter->rx_control,
5044			"Receiver Control Register");
5045	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5046			CTLFLAG_RD, &adapter->int_mask,
5047			"Interrupt Mask");
5048	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5049			CTLFLAG_RD, &adapter->eint_mask,
5050			"Extended Interrupt Mask");
5051	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5052			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5053			"Transmit Buffer Packet Allocation");
5054	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5055			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5056			"Receive Buffer Packet Allocation");
5057	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5058			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5059			"Flow Control High Watermark");
5060	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5061			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5062			"Flow Control Low Watermark");
5063
5064	for (int i = 0; i < adapter->num_queues; i++, txr++) {
5065		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5066		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5067					    CTLFLAG_RD, NULL, "Queue Name");
5068		queue_list = SYSCTL_CHILDREN(queue_node);
5069
5070		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5071				CTLFLAG_RD, txr, sizeof(txr),
5072				igb_sysctl_tdh_handler, "IU",
5073 				"Transmit Descriptor Head");
5074		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5075				CTLFLAG_RD, txr, sizeof(txr),
5076				igb_sysctl_tdt_handler, "IU",
5077 				"Transmit Descriptor Tail");
5078		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5079				CTLFLAG_RD, &txr->no_desc_avail,
5080				"Queue No Descriptor Available");
5081		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5082				CTLFLAG_RD, &txr->tx_packets,
5083				"Queue Packets Transmitted");
5084	}
5085
5086	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5087		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5088		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5089					    CTLFLAG_RD, NULL, "Queue Name");
5090		queue_list = SYSCTL_CHILDREN(queue_node);
5091
5092		struct lro_ctrl *lro = &rxr->lro;
5093
5094		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5095		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5096					    CTLFLAG_RD, NULL, "Queue Name");
5097		queue_list = SYSCTL_CHILDREN(queue_node);
5098
5099		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5100				CTLFLAG_RD, rxr, sizeof(rxr),
5101				igb_sysctl_rdh_handler, "IU",
5102				"Receive Descriptor Head");
5103		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5104				CTLFLAG_RD, rxr, sizeof(rxr),
5105				igb_sysctl_rdt_handler, "IU",
5106				"Receive Descriptor Tail");
5107		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5108				CTLFLAG_RD, &rxr->rx_packets,
5109				"Queue Packets Received");
5110		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5111				CTLFLAG_RD, &rxr->rx_bytes,
5112				"Queue Bytes Received");
5113		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5114				CTLFLAG_RD, &lro->lro_queued, 0,
5115				"LRO Queued");
5116		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5117				CTLFLAG_RD, &lro->lro_flushed, 0,
5118				"LRO Flushed");
5119	}
5120
5121	/* MAC stats get the own sub node */
5122
5123	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5124				    CTLFLAG_RD, NULL, "MAC Statistics");
5125	stat_list = SYSCTL_CHILDREN(stat_node);
5126
5127	/*
5128	** VF adapter has a very limited set of stats
5129	** since its not managing the metal, so to speak.
5130	*/
5131	if (adapter->hw.mac.type == e1000_vfadapt) {
5132	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5133			CTLFLAG_RD, &stats->gprc,
5134			"Good Packets Received");
5135	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5136			CTLFLAG_RD, &stats->gptc,
5137			"Good Packets Transmitted");
5138 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5139 			CTLFLAG_RD, &stats->gorc,
5140 			"Good Octets Received");
5141 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd",
5142 			CTLFLAG_RD, &stats->gotc,
5143 			"Good Octest Transmitted");
5144	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5145			CTLFLAG_RD, &stats->mprc,
5146			"Multicast Packets Received");
5147		return;
5148	}
5149
5150	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5151			CTLFLAG_RD, &stats->ecol,
5152			"Excessive collisions");
5153	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5154			CTLFLAG_RD, &stats->scc,
5155			"Single collisions");
5156	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5157			CTLFLAG_RD, &stats->mcc,
5158			"Multiple collisions");
5159	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5160			CTLFLAG_RD, &stats->latecol,
5161			"Late collisions");
5162	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5163			CTLFLAG_RD, &stats->colc,
5164			"Collision Count");
5165	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5166			CTLFLAG_RD, &stats->symerrs,
5167			"Symbol Errors");
5168	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5169			CTLFLAG_RD, &stats->sec,
5170			"Sequence Errors");
5171	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5172			CTLFLAG_RD, &stats->dc,
5173			"Defer Count");
5174	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5175			CTLFLAG_RD, &stats->mpc,
5176			"Missed Packets");
5177	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5178			CTLFLAG_RD, &stats->rnbc,
5179			"Receive No Buffers");
5180	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5181			CTLFLAG_RD, &stats->ruc,
5182			"Receive Undersize");
5183	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5184			CTLFLAG_RD, &stats->rfc,
5185			"Fragmented Packets Received ");
5186	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5187			CTLFLAG_RD, &stats->roc,
5188			"Oversized Packets Received");
5189	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5190			CTLFLAG_RD, &stats->rjc,
5191			"Recevied Jabber");
5192	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5193			CTLFLAG_RD, &stats->rxerrc,
5194			"Receive Errors");
5195	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5196			CTLFLAG_RD, &stats->crcerrs,
5197			"CRC errors");
5198	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5199			CTLFLAG_RD, &stats->algnerrc,
5200			"Alignment Errors");
5201	/* On 82575 these are collision counts */
5202	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5203			CTLFLAG_RD, &stats->cexterr,
5204			"Collision/Carrier extension errors");
5205	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_overruns",
5206			CTLFLAG_RD, &adapter->rx_overruns,
5207			"RX overruns");
5208	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "watchdog_timeouts",
5209			CTLFLAG_RD, &adapter->watchdog_events,
5210			"Watchdog timeouts");
5211	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5212			CTLFLAG_RD, &stats->xonrxc,
5213			"XON Received");
5214	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5215			CTLFLAG_RD, &stats->xontxc,
5216			"XON Transmitted");
5217	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5218			CTLFLAG_RD, &stats->xoffrxc,
5219			"XOFF Received");
5220	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5221			CTLFLAG_RD, &stats->xofftxc,
5222			"XOFF Transmitted");
5223	/* Packet Reception Stats */
5224	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5225			CTLFLAG_RD, &stats->tpr,
5226			"Total Packets Received ");
5227	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5228			CTLFLAG_RD, &stats->gprc,
5229			"Good Packets Received");
5230	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5231			CTLFLAG_RD, &stats->bprc,
5232			"Broadcast Packets Received");
5233	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5234			CTLFLAG_RD, &stats->mprc,
5235			"Multicast Packets Received");
5236	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5237			CTLFLAG_RD, &stats->prc64,
5238			"64 byte frames received ");
5239	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5240			CTLFLAG_RD, &stats->prc127,
5241			"65-127 byte frames received");
5242	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5243			CTLFLAG_RD, &stats->prc255,
5244			"128-255 byte frames received");
5245	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5246			CTLFLAG_RD, &stats->prc511,
5247			"256-511 byte frames received");
5248	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5249			CTLFLAG_RD, &stats->prc1023,
5250			"512-1023 byte frames received");
5251	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5252			CTLFLAG_RD, &stats->prc1522,
5253			"1023-1522 byte frames received");
5254 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5255 			CTLFLAG_RD, &stats->gorc,
5256 			"Good Octets Received");
5257
5258	/* Packet Transmission Stats */
5259 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octest_txd",
5260 			CTLFLAG_RD, &stats->gotc,
5261 			"Good Octest Transmitted");
5262	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5263			CTLFLAG_RD, &stats->tpt,
5264			"Total Packets Transmitted");
5265	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5266			CTLFLAG_RD, &stats->gptc,
5267			"Good Packets Transmitted");
5268	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5269			CTLFLAG_RD, &stats->bptc,
5270			"Broadcast Packets Transmitted");
5271	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5272			CTLFLAG_RD, &stats->mptc,
5273			"Multicast Packets Transmitted");
5274	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5275			CTLFLAG_RD, &stats->ptc64,
5276			"64 byte frames transmitted ");
5277	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5278			CTLFLAG_RD, &stats->ptc127,
5279			"65-127 byte frames transmitted");
5280	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5281			CTLFLAG_RD, &stats->ptc255,
5282			"128-255 byte frames transmitted");
5283	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5284			CTLFLAG_RD, &stats->ptc511,
5285			"256-511 byte frames transmitted");
5286	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5287			CTLFLAG_RD, &stats->ptc1023,
5288			"512-1023 byte frames transmitted");
5289	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5290			CTLFLAG_RD, &stats->ptc1522,
5291			"1024-1522 byte frames transmitted");
5292	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5293			CTLFLAG_RD, &stats->tsctc,
5294			"TSO Contexts Transmitted");
5295	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5296			CTLFLAG_RD, &stats->tsctfc,
5297			"TSO Contexts Failed");
5298
5299
5300	/* Interrupt Stats */
5301
5302	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5303				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5304	int_list = SYSCTL_CHILDREN(int_node);
5305
5306	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5307			CTLFLAG_RD, &stats->iac,
5308			"Interrupt Assertion Count");
5309
5310	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5311			CTLFLAG_RD, &stats->icrxptc,
5312			"Interrupt Cause Rx Pkt Timer Expire Count");
5313
5314	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5315			CTLFLAG_RD, &stats->icrxatc,
5316			"Interrupt Cause Rx Abs Timer Expire Count");
5317
5318	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5319			CTLFLAG_RD, &stats->ictxptc,
5320			"Interrupt Cause Tx Pkt Timer Expire Count");
5321
5322	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5323			CTLFLAG_RD, &stats->ictxatc,
5324			"Interrupt Cause Tx Abs Timer Expire Count");
5325
5326	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5327			CTLFLAG_RD, &stats->ictxqec,
5328			"Interrupt Cause Tx Queue Empty Count");
5329
5330	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5331			CTLFLAG_RD, &stats->ictxqmtc,
5332			"Interrupt Cause Tx Queue Min Thresh Count");
5333
5334	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5335			CTLFLAG_RD, &stats->icrxdmtc,
5336			"Interrupt Cause Rx Desc Min Thresh Count");
5337
5338	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5339			CTLFLAG_RD, &stats->icrxoc,
5340			"Interrupt Cause Receiver Overrun Count");
5341
5342	/* Host to Card Stats */
5343
5344	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5345				    CTLFLAG_RD, NULL,
5346				    "Host to Card Statistics");
5347
5348	host_list = SYSCTL_CHILDREN(host_node);
5349
5350	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5351			CTLFLAG_RD, &stats->cbtmpc,
5352			"Circuit Breaker Tx Packet Count");
5353
5354	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5355			CTLFLAG_RD, &stats->htdpmc,
5356			"Host Transmit Discarded Packets");
5357
5358	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5359			CTLFLAG_RD, &stats->rpthc,
5360			"Rx Packets To Host");
5361
5362	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5363			CTLFLAG_RD, &stats->cbrmpc,
5364			"Circuit Breaker Rx Packet Count");
5365
5366	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5367			CTLFLAG_RD, &stats->cbrdpc,
5368			"Circuit Breaker Rx Dropped Count");
5369
5370	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5371			CTLFLAG_RD, &stats->hgptc,
5372			"Host Good Packets Tx Count");
5373
5374	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5375			CTLFLAG_RD, &stats->htcbdpc,
5376			"Host Tx Circuit Breaker Dropped Count");
5377
5378	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5379			CTLFLAG_RD, &stats->hgorc,
5380			"Host Good Octets Received Count");
5381
5382	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5383			CTLFLAG_RD, &stats->hgotc,
5384			"Host Good Octets Transmit Count");
5385
5386	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5387			CTLFLAG_RD, &stats->lenerrs,
5388			"Length Errors");
5389
5390	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5391			CTLFLAG_RD, &stats->scvpc,
5392			"SerDes/SGMII Code Violation Pkt Count");
5393
5394	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5395			CTLFLAG_RD, &stats->hrmpc,
5396			"Header Redirection Missed Packet Count");
5397}
5398
5399
5400/**********************************************************************
5401 *
5402 *  This routine provides a way to dump out the adapter eeprom,
5403 *  often a useful debug/service tool. This only dumps the first
5404 *  32 words, stuff that matters is in that extent.
5405 *
5406 **********************************************************************/
5407static int
5408igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5409{
5410	struct adapter *adapter;
5411	int error;
5412	int result;
5413
5414	result = -1;
5415	error = sysctl_handle_int(oidp, &result, 0, req);
5416
5417	if (error || !req->newptr)
5418		return (error);
5419
5420	/*
5421	 * This value will cause a hex dump of the
5422	 * first 32 16-bit words of the EEPROM to
5423	 * the screen.
5424	 */
5425	if (result == 1) {
5426		adapter = (struct adapter *)arg1;
5427		igb_print_nvm_info(adapter);
5428        }
5429
5430	return (error);
5431}
5432
5433static void
5434igb_print_nvm_info(struct adapter *adapter)
5435{
5436	u16	eeprom_data;
5437	int	i, j, row = 0;
5438
5439	/* Its a bit crude, but it gets the job done */
5440	printf("\nInterface EEPROM Dump:\n");
5441	printf("Offset\n0x0000  ");
5442	for (i = 0, j = 0; i < 32; i++, j++) {
5443		if (j == 8) { /* Make the offset block */
5444			j = 0; ++row;
5445			printf("\n0x00%x0  ",row);
5446		}
5447		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5448		printf("%04x ", eeprom_data);
5449	}
5450	printf("\n");
5451}
5452
5453static void
5454igb_add_rx_process_limit(struct adapter *adapter, const char *name,
5455	const char *description, int *limit, int value)
5456{
5457	*limit = value;
5458	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5459	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5460	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5461}
5462