if_igb.c revision 219753
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 219753 2011-03-18 18:54:00Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_altq.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <sys/pcpu.h>
61#include <sys/smp.h>
62#include <machine/smp.h>
63#include <machine/bus.h>
64#include <machine/resource.h>
65
66#include <net/bpf.h>
67#include <net/ethernet.h>
68#include <net/if.h>
69#include <net/if_arp.h>
70#include <net/if_dl.h>
71#include <net/if_media.h>
72
73#include <net/if_types.h>
74#include <net/if_vlan_var.h>
75
76#include <netinet/in_systm.h>
77#include <netinet/in.h>
78#include <netinet/if_ether.h>
79#include <netinet/ip.h>
80#include <netinet/ip6.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_lro.h>
83#include <netinet/udp.h>
84
85#include <machine/in_cksum.h>
86#include <dev/led/led.h>
87#include <dev/pci/pcivar.h>
88#include <dev/pci/pcireg.h>
89
90#include "e1000_api.h"
91#include "e1000_82575.h"
92#include "if_igb.h"
93
94/*********************************************************************
95 *  Set this to one to display debug statistics
96 *********************************************************************/
97int	igb_display_debug_stats = 0;
98
99/*********************************************************************
100 *  Driver version:
101 *********************************************************************/
102char igb_driver_version[] = "version - 2.1.7";
103
104
105/*********************************************************************
106 *  PCI Device ID Table
107 *
108 *  Used by probe to select devices to load on
109 *  Last field stores an index into e1000_strings
110 *  Last entry must be all 0s
111 *
112 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113 *********************************************************************/
114
115static igb_vendor_info_t igb_vendor_info_array[] =
116{
117	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
132						PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
152	/* required last entry */
153	{ 0, 0, 0, 0, 0}
154};
155
156/*********************************************************************
157 *  Table of branding strings for all supported NICs.
158 *********************************************************************/
159
160static char *igb_strings[] = {
161	"Intel(R) PRO/1000 Network Connection"
162};
163
164/*********************************************************************
165 *  Function prototypes
166 *********************************************************************/
167static int	igb_probe(device_t);
168static int	igb_attach(device_t);
169static int	igb_detach(device_t);
170static int	igb_shutdown(device_t);
171static int	igb_suspend(device_t);
172static int	igb_resume(device_t);
173static void	igb_start(struct ifnet *);
174static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
175#if __FreeBSD_version >= 800000
176static int	igb_mq_start(struct ifnet *, struct mbuf *);
177static int	igb_mq_start_locked(struct ifnet *,
178		    struct tx_ring *, struct mbuf *);
179static void	igb_qflush(struct ifnet *);
180#endif
181static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
182static void	igb_init(void *);
183static void	igb_init_locked(struct adapter *);
184static void	igb_stop(void *);
185static void	igb_media_status(struct ifnet *, struct ifmediareq *);
186static int	igb_media_change(struct ifnet *);
187static void	igb_identify_hardware(struct adapter *);
188static int	igb_allocate_pci_resources(struct adapter *);
189static int	igb_allocate_msix(struct adapter *);
190static int	igb_allocate_legacy(struct adapter *);
191static int	igb_setup_msix(struct adapter *);
192static void	igb_free_pci_resources(struct adapter *);
193static void	igb_local_timer(void *);
194static void	igb_reset(struct adapter *);
195static int	igb_setup_interface(device_t, struct adapter *);
196static int	igb_allocate_queues(struct adapter *);
197static void	igb_configure_queues(struct adapter *);
198
199static int	igb_allocate_transmit_buffers(struct tx_ring *);
200static void	igb_setup_transmit_structures(struct adapter *);
201static void	igb_setup_transmit_ring(struct tx_ring *);
202static void	igb_initialize_transmit_units(struct adapter *);
203static void	igb_free_transmit_structures(struct adapter *);
204static void	igb_free_transmit_buffers(struct tx_ring *);
205
206static int	igb_allocate_receive_buffers(struct rx_ring *);
207static int	igb_setup_receive_structures(struct adapter *);
208static int	igb_setup_receive_ring(struct rx_ring *);
209static void	igb_initialize_receive_units(struct adapter *);
210static void	igb_free_receive_structures(struct adapter *);
211static void	igb_free_receive_buffers(struct rx_ring *);
212static void	igb_free_receive_ring(struct rx_ring *);
213
214static void	igb_enable_intr(struct adapter *);
215static void	igb_disable_intr(struct adapter *);
216static void	igb_update_stats_counters(struct adapter *);
217static bool	igb_txeof(struct tx_ring *);
218
219static __inline	void igb_rx_discard(struct rx_ring *, int);
220static __inline void igb_rx_input(struct rx_ring *,
221		    struct ifnet *, struct mbuf *, u32);
222
223static bool	igb_rxeof(struct igb_queue *, int, int *);
224static void	igb_rx_checksum(u32, struct mbuf *, u32);
225static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
226static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
227static void	igb_set_promisc(struct adapter *);
228static void	igb_disable_promisc(struct adapter *);
229static void	igb_set_multi(struct adapter *);
230static void	igb_update_link_status(struct adapter *);
231static void	igb_refresh_mbufs(struct rx_ring *, int);
232
233static void	igb_register_vlan(void *, struct ifnet *, u16);
234static void	igb_unregister_vlan(void *, struct ifnet *, u16);
235static void	igb_setup_vlan_hw_support(struct adapter *);
236
237static int	igb_xmit(struct tx_ring *, struct mbuf **);
238static int	igb_dma_malloc(struct adapter *, bus_size_t,
239		    struct igb_dma_alloc *, int);
240static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
241static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
242static void	igb_print_nvm_info(struct adapter *);
243static int 	igb_is_valid_ether_addr(u8 *);
244static void     igb_add_hw_stats(struct adapter *);
245
246static void	igb_vf_init_stats(struct adapter *);
247static void	igb_update_vf_stats_counters(struct adapter *);
248
249/* Management and WOL Support */
250static void	igb_init_manageability(struct adapter *);
251static void	igb_release_manageability(struct adapter *);
252static void     igb_get_hw_control(struct adapter *);
253static void     igb_release_hw_control(struct adapter *);
254static void     igb_enable_wakeup(device_t);
255static void     igb_led_func(void *, int);
256
257static int	igb_irq_fast(void *);
258static void	igb_msix_que(void *);
259static void	igb_msix_link(void *);
260static void	igb_handle_que(void *context, int pending);
261static void	igb_handle_link(void *context, int pending);
262
263static void	igb_set_sysctl_value(struct adapter *, const char *,
264		    const char *, int *, int);
265
266#ifdef DEVICE_POLLING
267static poll_handler_t igb_poll;
268#endif /* POLLING */
269
270/*********************************************************************
271 *  FreeBSD Device Interface Entry Points
272 *********************************************************************/
273
274static device_method_t igb_methods[] = {
275	/* Device interface */
276	DEVMETHOD(device_probe, igb_probe),
277	DEVMETHOD(device_attach, igb_attach),
278	DEVMETHOD(device_detach, igb_detach),
279	DEVMETHOD(device_shutdown, igb_shutdown),
280	DEVMETHOD(device_suspend, igb_suspend),
281	DEVMETHOD(device_resume, igb_resume),
282	{0, 0}
283};
284
285static driver_t igb_driver = {
286	"igb", igb_methods, sizeof(struct adapter),
287};
288
289static devclass_t igb_devclass;
290DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
291MODULE_DEPEND(igb, pci, 1, 1, 1);
292MODULE_DEPEND(igb, ether, 1, 1, 1);
293
294/*********************************************************************
295 *  Tunable default values.
296 *********************************************************************/
297
298/* Descriptor defaults */
299static int igb_rxd = IGB_DEFAULT_RXD;
300static int igb_txd = IGB_DEFAULT_TXD;
301TUNABLE_INT("hw.igb.rxd", &igb_rxd);
302TUNABLE_INT("hw.igb.txd", &igb_txd);
303
304/*
305** AIM: Adaptive Interrupt Moderation
306** which means that the interrupt rate
307** is varied over time based on the
308** traffic for that interrupt vector
309*/
310static int igb_enable_aim = TRUE;
311TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
312
313/*
314 * MSIX should be the default for best performance,
315 * but this allows it to be forced off for testing.
316 */
317static int igb_enable_msix = 1;
318TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
319
320/*
321** Tuneable Interrupt rate
322*/
323static int igb_max_interrupt_rate = 8000;
324TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
325
326/*
327** Header split causes the packet header to
328** be dma'd to a seperate mbuf from the payload.
329** this can have memory alignment benefits. But
330** another plus is that small packets often fit
331** into the header and thus use no cluster. Its
332** a very workload dependent type feature.
333*/
334static bool igb_header_split = FALSE;
335TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
336
337/*
338** This will autoconfigure based on
339** the number of CPUs if left at 0.
340*/
341static int igb_num_queues = 0;
342TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
343
344/* How many packets rxeof tries to clean at a time */
345static int igb_rx_process_limit = 100;
346TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
347
348/* Flow control setting - default to FULL */
349static int igb_fc_setting = e1000_fc_full;
350TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
351
352/* Energy Efficient Ethernet - default to off */
353static int igb_eee_setting = FALSE;
354TUNABLE_INT("hw.igb.ee_setting", &igb_eee_setting);
355
356/*
357** DMA Coalescing, only for i350 - default to off,
358** this feature is for power savings
359*/
360static int igb_dma_coalesce = FALSE;
361TUNABLE_INT("hw.igb.dma_coalesce", &igb_dma_coalesce);
362
363/*********************************************************************
364 *  Device identification routine
365 *
366 *  igb_probe determines if the driver should be loaded on
367 *  adapter based on PCI vendor/device id of the adapter.
368 *
369 *  return BUS_PROBE_DEFAULT on success, positive on failure
370 *********************************************************************/
371
372static int
373igb_probe(device_t dev)
374{
375	char		adapter_name[60];
376	uint16_t	pci_vendor_id = 0;
377	uint16_t	pci_device_id = 0;
378	uint16_t	pci_subvendor_id = 0;
379	uint16_t	pci_subdevice_id = 0;
380	igb_vendor_info_t *ent;
381
382	INIT_DEBUGOUT("igb_probe: begin");
383
384	pci_vendor_id = pci_get_vendor(dev);
385	if (pci_vendor_id != IGB_VENDOR_ID)
386		return (ENXIO);
387
388	pci_device_id = pci_get_device(dev);
389	pci_subvendor_id = pci_get_subvendor(dev);
390	pci_subdevice_id = pci_get_subdevice(dev);
391
392	ent = igb_vendor_info_array;
393	while (ent->vendor_id != 0) {
394		if ((pci_vendor_id == ent->vendor_id) &&
395		    (pci_device_id == ent->device_id) &&
396
397		    ((pci_subvendor_id == ent->subvendor_id) ||
398		    (ent->subvendor_id == PCI_ANY_ID)) &&
399
400		    ((pci_subdevice_id == ent->subdevice_id) ||
401		    (ent->subdevice_id == PCI_ANY_ID))) {
402			sprintf(adapter_name, "%s %s",
403				igb_strings[ent->index],
404				igb_driver_version);
405			device_set_desc_copy(dev, adapter_name);
406			return (BUS_PROBE_DEFAULT);
407		}
408		ent++;
409	}
410
411	return (ENXIO);
412}
413
414/*********************************************************************
415 *  Device initialization routine
416 *
417 *  The attach entry point is called when the driver is being loaded.
418 *  This routine identifies the type of hardware, allocates all resources
419 *  and initializes the hardware.
420 *
421 *  return 0 on success, positive on failure
422 *********************************************************************/
423
424static int
425igb_attach(device_t dev)
426{
427	struct adapter	*adapter;
428	int		error = 0;
429	u16		eeprom_data;
430
431	INIT_DEBUGOUT("igb_attach: begin");
432
433	adapter = device_get_softc(dev);
434	adapter->dev = adapter->osdep.dev = dev;
435	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
436
437	/* SYSCTL stuff */
438	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
439	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
440	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
441	    igb_sysctl_nvm_info, "I", "NVM Information");
442
443	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
444	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
445	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
446	    &igb_enable_aim, 1, "Interrupt Moderation");
447
448	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
449
450	/* Determine hardware and mac info */
451	igb_identify_hardware(adapter);
452
453	/* Setup PCI resources */
454	if (igb_allocate_pci_resources(adapter)) {
455		device_printf(dev, "Allocation of PCI resources failed\n");
456		error = ENXIO;
457		goto err_pci;
458	}
459
460	/* Do Shared Code initialization */
461	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
462		device_printf(dev, "Setup of Shared code failed\n");
463		error = ENXIO;
464		goto err_pci;
465	}
466
467	e1000_get_bus_info(&adapter->hw);
468
469	/* Sysctl for limiting the amount of work done in the taskqueue */
470	igb_set_sysctl_value(adapter, "rx_processing_limit",
471	    "max number of rx packets to process", &adapter->rx_process_limit,
472	    igb_rx_process_limit);
473
474       /* Sysctl for setting the interface flow control */
475	igb_set_sysctl_value(adapter, "flow_control",
476	    "configure flow control",
477	    &adapter->fc_setting, igb_fc_setting);
478
479	/*
480	 * Validate number of transmit and receive descriptors. It
481	 * must not exceed hardware maximum, and must be multiple
482	 * of E1000_DBA_ALIGN.
483	 */
484	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
485	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
486		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
487		    IGB_DEFAULT_TXD, igb_txd);
488		adapter->num_tx_desc = IGB_DEFAULT_TXD;
489	} else
490		adapter->num_tx_desc = igb_txd;
491	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
492	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
493		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
494		    IGB_DEFAULT_RXD, igb_rxd);
495		adapter->num_rx_desc = IGB_DEFAULT_RXD;
496	} else
497		adapter->num_rx_desc = igb_rxd;
498
499	adapter->hw.mac.autoneg = DO_AUTO_NEG;
500	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
501	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
502
503	/* Copper options */
504	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
505		adapter->hw.phy.mdix = AUTO_ALL_MODES;
506		adapter->hw.phy.disable_polarity_correction = FALSE;
507		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
508	}
509
510	/*
511	 * Set the frame limits assuming
512	 * standard ethernet sized frames.
513	 */
514	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
515	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
516
517	/*
518	** Allocate and Setup Queues
519	*/
520	if (igb_allocate_queues(adapter)) {
521		error = ENOMEM;
522		goto err_pci;
523	}
524
525	/* Allocate the appropriate stats memory */
526	if (adapter->vf_ifp) {
527		adapter->stats =
528		    (struct e1000_vf_stats *)malloc(sizeof \
529		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
530		igb_vf_init_stats(adapter);
531	} else
532		adapter->stats =
533		    (struct e1000_hw_stats *)malloc(sizeof \
534		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
535	if (adapter->stats == NULL) {
536		device_printf(dev, "Can not allocate stats memory\n");
537		error = ENOMEM;
538		goto err_late;
539	}
540
541	/* Allocate multicast array memory. */
542	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
543	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
544	if (adapter->mta == NULL) {
545		device_printf(dev, "Can not allocate multicast setup array\n");
546		error = ENOMEM;
547		goto err_late;
548	}
549
550	/* Some adapter-specific advanced features */
551	if (adapter->hw.mac.type >= e1000_i350) {
552		igb_set_sysctl_value(adapter, "dma_coalesce",
553		    "configure dma coalesce",
554		    &adapter->dma_coalesce, igb_dma_coalesce);
555		igb_set_sysctl_value(adapter, "eee_control",
556		    "enable Energy Efficient Ethernet",
557		    &adapter->hw.dev_spec._82575.eee_disable,
558		    igb_eee_setting);
559		e1000_set_eee_i350(&adapter->hw);
560	}
561
562	/*
563	** Start from a known state, this is
564	** important in reading the nvm and
565	** mac from that.
566	*/
567	e1000_reset_hw(&adapter->hw);
568
569	/* Make sure we have a good EEPROM before we read from it */
570	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
571		/*
572		** Some PCI-E parts fail the first check due to
573		** the link being in sleep state, call it again,
574		** if it fails a second time its a real issue.
575		*/
576		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577			device_printf(dev,
578			    "The EEPROM Checksum Is Not Valid\n");
579			error = EIO;
580			goto err_late;
581		}
582	}
583
584	/*
585	** Copy the permanent MAC address out of the EEPROM
586	*/
587	if (e1000_read_mac_addr(&adapter->hw) < 0) {
588		device_printf(dev, "EEPROM read error while reading MAC"
589		    " address\n");
590		error = EIO;
591		goto err_late;
592	}
593	/* Check its sanity */
594	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595		device_printf(dev, "Invalid MAC address\n");
596		error = EIO;
597		goto err_late;
598	}
599
600	/*
601	** Configure Interrupts
602	*/
603	if ((adapter->msix > 1) && (igb_enable_msix))
604		error = igb_allocate_msix(adapter);
605	else /* MSI or Legacy */
606		error = igb_allocate_legacy(adapter);
607	if (error)
608		goto err_late;
609
610	/* Setup OS specific network interface */
611	if (igb_setup_interface(dev, adapter) != 0)
612		goto err_late;
613
614	/* Now get a good starting state */
615	igb_reset(adapter);
616
617	/* Initialize statistics */
618	igb_update_stats_counters(adapter);
619
620	adapter->hw.mac.get_link_status = 1;
621	igb_update_link_status(adapter);
622
623	/* Indicate SOL/IDER usage */
624	if (e1000_check_reset_block(&adapter->hw))
625		device_printf(dev,
626		    "PHY reset is blocked due to SOL/IDER session.\n");
627
628	/* Determine if we have to control management hardware */
629	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
630
631	/*
632	 * Setup Wake-on-Lan
633	 */
634	/* APME bit in EEPROM is mapped to WUC.APME */
635	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
636	if (eeprom_data)
637		adapter->wol = E1000_WUFC_MAG;
638
639	/* Register for VLAN events */
640	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
641	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
642	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
643	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
644
645	igb_add_hw_stats(adapter);
646
647	/* Tell the stack that the interface is not active */
648	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
649
650	adapter->led_dev = led_create(igb_led_func, adapter,
651	    device_get_nameunit(dev));
652
653	INIT_DEBUGOUT("igb_attach: end");
654
655	return (0);
656
657err_late:
658	igb_free_transmit_structures(adapter);
659	igb_free_receive_structures(adapter);
660	igb_release_hw_control(adapter);
661	if (adapter->ifp != NULL)
662		if_free(adapter->ifp);
663err_pci:
664	igb_free_pci_resources(adapter);
665	free(adapter->mta, M_DEVBUF);
666	IGB_CORE_LOCK_DESTROY(adapter);
667
668	return (error);
669}
670
671/*********************************************************************
672 *  Device removal routine
673 *
674 *  The detach entry point is called when the driver is being removed.
675 *  This routine stops the adapter and deallocates all the resources
676 *  that were allocated for driver operation.
677 *
678 *  return 0 on success, positive on failure
679 *********************************************************************/
680
681static int
682igb_detach(device_t dev)
683{
684	struct adapter	*adapter = device_get_softc(dev);
685	struct ifnet	*ifp = adapter->ifp;
686
687	INIT_DEBUGOUT("igb_detach: begin");
688
689	/* Make sure VLANS are not using driver */
690	if (adapter->ifp->if_vlantrunk != NULL) {
691		device_printf(dev,"Vlan in use, detach first\n");
692		return (EBUSY);
693	}
694
695	if (adapter->led_dev != NULL)
696		led_destroy(adapter->led_dev);
697
698#ifdef DEVICE_POLLING
699	if (ifp->if_capenable & IFCAP_POLLING)
700		ether_poll_deregister(ifp);
701#endif
702
703	IGB_CORE_LOCK(adapter);
704	adapter->in_detach = 1;
705	igb_stop(adapter);
706	IGB_CORE_UNLOCK(adapter);
707
708	e1000_phy_hw_reset(&adapter->hw);
709
710	/* Give control back to firmware */
711	igb_release_manageability(adapter);
712	igb_release_hw_control(adapter);
713
714	if (adapter->wol) {
715		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
716		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
717		igb_enable_wakeup(dev);
718	}
719
720	/* Unregister VLAN events */
721	if (adapter->vlan_attach != NULL)
722		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
723	if (adapter->vlan_detach != NULL)
724		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
725
726	ether_ifdetach(adapter->ifp);
727
728	callout_drain(&adapter->timer);
729
730	igb_free_pci_resources(adapter);
731	bus_generic_detach(dev);
732	if_free(ifp);
733
734	igb_free_transmit_structures(adapter);
735	igb_free_receive_structures(adapter);
736	free(adapter->mta, M_DEVBUF);
737
738	IGB_CORE_LOCK_DESTROY(adapter);
739
740	return (0);
741}
742
743/*********************************************************************
744 *
745 *  Shutdown entry point
746 *
747 **********************************************************************/
748
749static int
750igb_shutdown(device_t dev)
751{
752	return igb_suspend(dev);
753}
754
755/*
756 * Suspend/resume device methods.
757 */
758static int
759igb_suspend(device_t dev)
760{
761	struct adapter *adapter = device_get_softc(dev);
762
763	IGB_CORE_LOCK(adapter);
764
765	igb_stop(adapter);
766
767        igb_release_manageability(adapter);
768	igb_release_hw_control(adapter);
769
770        if (adapter->wol) {
771                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
772                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
773                igb_enable_wakeup(dev);
774        }
775
776	IGB_CORE_UNLOCK(adapter);
777
778	return bus_generic_suspend(dev);
779}
780
781static int
782igb_resume(device_t dev)
783{
784	struct adapter *adapter = device_get_softc(dev);
785	struct ifnet *ifp = adapter->ifp;
786
787	IGB_CORE_LOCK(adapter);
788	igb_init_locked(adapter);
789	igb_init_manageability(adapter);
790
791	if ((ifp->if_flags & IFF_UP) &&
792	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
793		igb_start(ifp);
794
795	IGB_CORE_UNLOCK(adapter);
796
797	return bus_generic_resume(dev);
798}
799
800
801/*********************************************************************
802 *  Transmit entry point
803 *
804 *  igb_start is called by the stack to initiate a transmit.
805 *  The driver will remain in this routine as long as there are
806 *  packets to transmit and transmit resources are available.
807 *  In case resources are not available stack is notified and
808 *  the packet is requeued.
809 **********************************************************************/
810
811static void
812igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
813{
814	struct adapter	*adapter = ifp->if_softc;
815	struct mbuf	*m_head;
816
817	IGB_TX_LOCK_ASSERT(txr);
818
819	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
820	    IFF_DRV_RUNNING)
821		return;
822	if (!adapter->link_active)
823		return;
824
825	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
826		/* Cleanup if TX descriptors are low */
827		if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
828			igb_txeof(txr);
829		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
830			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
831			break;
832		}
833		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
834		if (m_head == NULL)
835			break;
836		/*
837		 *  Encapsulation can modify our pointer, and or make it
838		 *  NULL on failure.  In that event, we can't requeue.
839		 */
840		if (igb_xmit(txr, &m_head)) {
841			if (m_head == NULL)
842				break;
843			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
844			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
845			break;
846		}
847
848		/* Send a copy of the frame to the BPF listener */
849		ETHER_BPF_MTAP(ifp, m_head);
850
851		/* Set watchdog on */
852		txr->watchdog_time = ticks;
853		txr->queue_status = IGB_QUEUE_WORKING;
854	}
855}
856
857/*
858 * Legacy TX driver routine, called from the
859 * stack, always uses tx[0], and spins for it.
860 * Should not be used with multiqueue tx
861 */
862static void
863igb_start(struct ifnet *ifp)
864{
865	struct adapter	*adapter = ifp->if_softc;
866	struct tx_ring	*txr = adapter->tx_rings;
867
868	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
869		IGB_TX_LOCK(txr);
870		igb_start_locked(txr, ifp);
871		IGB_TX_UNLOCK(txr);
872	}
873	return;
874}
875
876#if __FreeBSD_version >= 800000
877/*
878** Multiqueue Transmit driver
879**
880*/
881static int
882igb_mq_start(struct ifnet *ifp, struct mbuf *m)
883{
884	struct adapter		*adapter = ifp->if_softc;
885	struct igb_queue	*que;
886	struct tx_ring		*txr;
887	int 			i = 0, err = 0;
888
889	/* Which queue to use */
890	if ((m->m_flags & M_FLOWID) != 0)
891		i = m->m_pkthdr.flowid % adapter->num_queues;
892
893	txr = &adapter->tx_rings[i];
894	que = &adapter->queues[i];
895
896	if (IGB_TX_TRYLOCK(txr)) {
897		err = igb_mq_start_locked(ifp, txr, m);
898		IGB_TX_UNLOCK(txr);
899	} else {
900		err = drbr_enqueue(ifp, txr->br, m);
901		taskqueue_enqueue(que->tq, &que->que_task);
902	}
903
904	return (err);
905}
906
907static int
908igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
909{
910	struct adapter  *adapter = txr->adapter;
911        struct mbuf     *next;
912        int             err = 0, enq;
913
914	IGB_TX_LOCK_ASSERT(txr);
915
916	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
917	    IFF_DRV_RUNNING || adapter->link_active == 0) {
918		if (m != NULL)
919			err = drbr_enqueue(ifp, txr->br, m);
920		return (err);
921	}
922
923	enq = 0;
924	if (m == NULL) {
925		next = drbr_dequeue(ifp, txr->br);
926	} else if (drbr_needs_enqueue(ifp, txr->br)) {
927		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
928			return (err);
929		next = drbr_dequeue(ifp, txr->br);
930	} else
931		next = m;
932
933	/* Process the queue */
934	while (next != NULL) {
935		/* Call cleanup if number of TX descriptors low */
936		if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
937			igb_txeof(txr);
938		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
939			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
940			break;
941		}
942		if ((err = igb_xmit(txr, &next)) != 0) {
943			if (next != NULL)
944				err = drbr_enqueue(ifp, txr->br, next);
945			break;
946		}
947		enq++;
948		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
949		ETHER_BPF_MTAP(ifp, next);
950		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
951			break;
952		next = drbr_dequeue(ifp, txr->br);
953	}
954	if (enq > 0) {
955		/* Set the watchdog */
956		txr->queue_status = IGB_QUEUE_WORKING;
957		txr->watchdog_time = ticks;
958	}
959	return (err);
960}
961
962/*
963** Flush all ring buffers
964*/
965static void
966igb_qflush(struct ifnet *ifp)
967{
968	struct adapter	*adapter = ifp->if_softc;
969	struct tx_ring	*txr = adapter->tx_rings;
970	struct mbuf	*m;
971
972	for (int i = 0; i < adapter->num_queues; i++, txr++) {
973		IGB_TX_LOCK(txr);
974		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
975			m_freem(m);
976		IGB_TX_UNLOCK(txr);
977	}
978	if_qflush(ifp);
979}
980#endif /* __FreeBSD_version >= 800000 */
981
982/*********************************************************************
983 *  Ioctl entry point
984 *
985 *  igb_ioctl is called when the user wants to configure the
986 *  interface.
987 *
988 *  return 0 on success, positive on failure
989 **********************************************************************/
990
991static int
992igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
993{
994	struct adapter	*adapter = ifp->if_softc;
995	struct ifreq *ifr = (struct ifreq *)data;
996#ifdef INET
997	struct ifaddr *ifa = (struct ifaddr *)data;
998#endif
999	int error = 0;
1000
1001	if (adapter->in_detach)
1002		return (error);
1003
1004	switch (command) {
1005	case SIOCSIFADDR:
1006#ifdef INET
1007		if (ifa->ifa_addr->sa_family == AF_INET) {
1008			/*
1009			 * XXX
1010			 * Since resetting hardware takes a very long time
1011			 * and results in link renegotiation we only
1012			 * initialize the hardware only when it is absolutely
1013			 * required.
1014			 */
1015			ifp->if_flags |= IFF_UP;
1016			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1017				IGB_CORE_LOCK(adapter);
1018				igb_init_locked(adapter);
1019				IGB_CORE_UNLOCK(adapter);
1020			}
1021			if (!(ifp->if_flags & IFF_NOARP))
1022				arp_ifinit(ifp, ifa);
1023		} else
1024#endif
1025			error = ether_ioctl(ifp, command, data);
1026		break;
1027	case SIOCSIFMTU:
1028	    {
1029		int max_frame_size;
1030
1031		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1032
1033		IGB_CORE_LOCK(adapter);
1034		max_frame_size = 9234;
1035		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1036		    ETHER_CRC_LEN) {
1037			IGB_CORE_UNLOCK(adapter);
1038			error = EINVAL;
1039			break;
1040		}
1041
1042		ifp->if_mtu = ifr->ifr_mtu;
1043		adapter->max_frame_size =
1044		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1045		igb_init_locked(adapter);
1046		IGB_CORE_UNLOCK(adapter);
1047		break;
1048	    }
1049	case SIOCSIFFLAGS:
1050		IOCTL_DEBUGOUT("ioctl rcv'd:\
1051		    SIOCSIFFLAGS (Set Interface Flags)");
1052		IGB_CORE_LOCK(adapter);
1053		if (ifp->if_flags & IFF_UP) {
1054			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1055				if ((ifp->if_flags ^ adapter->if_flags) &
1056				    (IFF_PROMISC | IFF_ALLMULTI)) {
1057					igb_disable_promisc(adapter);
1058					igb_set_promisc(adapter);
1059				}
1060			} else
1061				igb_init_locked(adapter);
1062		} else
1063			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1064				igb_stop(adapter);
1065		adapter->if_flags = ifp->if_flags;
1066		IGB_CORE_UNLOCK(adapter);
1067		break;
1068	case SIOCADDMULTI:
1069	case SIOCDELMULTI:
1070		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1071		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1072			IGB_CORE_LOCK(adapter);
1073			igb_disable_intr(adapter);
1074			igb_set_multi(adapter);
1075#ifdef DEVICE_POLLING
1076			if (!(ifp->if_capenable & IFCAP_POLLING))
1077#endif
1078				igb_enable_intr(adapter);
1079			IGB_CORE_UNLOCK(adapter);
1080		}
1081		break;
1082	case SIOCSIFMEDIA:
1083		/*
1084		** As the speed/duplex settings are being
1085		** changed, we need toreset the PHY.
1086		*/
1087		adapter->hw.phy.reset_disable = FALSE;
1088		/* Check SOL/IDER usage */
1089		IGB_CORE_LOCK(adapter);
1090		if (e1000_check_reset_block(&adapter->hw)) {
1091			IGB_CORE_UNLOCK(adapter);
1092			device_printf(adapter->dev, "Media change is"
1093			    " blocked due to SOL/IDER session.\n");
1094			break;
1095		}
1096		IGB_CORE_UNLOCK(adapter);
1097	case SIOCGIFMEDIA:
1098		IOCTL_DEBUGOUT("ioctl rcv'd: \
1099		    SIOCxIFMEDIA (Get/Set Interface Media)");
1100		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1101		break;
1102	case SIOCSIFCAP:
1103	    {
1104		int mask, reinit;
1105
1106		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1107		reinit = 0;
1108		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1109#ifdef DEVICE_POLLING
1110		if (mask & IFCAP_POLLING) {
1111			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1112				error = ether_poll_register(igb_poll, ifp);
1113				if (error)
1114					return (error);
1115				IGB_CORE_LOCK(adapter);
1116				igb_disable_intr(adapter);
1117				ifp->if_capenable |= IFCAP_POLLING;
1118				IGB_CORE_UNLOCK(adapter);
1119			} else {
1120				error = ether_poll_deregister(ifp);
1121				/* Enable interrupt even in error case */
1122				IGB_CORE_LOCK(adapter);
1123				igb_enable_intr(adapter);
1124				ifp->if_capenable &= ~IFCAP_POLLING;
1125				IGB_CORE_UNLOCK(adapter);
1126			}
1127		}
1128#endif
1129		if (mask & IFCAP_HWCSUM) {
1130			ifp->if_capenable ^= IFCAP_HWCSUM;
1131			reinit = 1;
1132		}
1133		if (mask & IFCAP_TSO4) {
1134			ifp->if_capenable ^= IFCAP_TSO4;
1135			reinit = 1;
1136		}
1137		if (mask & IFCAP_VLAN_HWTAGGING) {
1138			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1139			reinit = 1;
1140		}
1141		if (mask & IFCAP_VLAN_HWFILTER) {
1142			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1143			reinit = 1;
1144		}
1145		if (mask & IFCAP_LRO) {
1146			ifp->if_capenable ^= IFCAP_LRO;
1147			reinit = 1;
1148		}
1149		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1150			igb_init(adapter);
1151		VLAN_CAPABILITIES(ifp);
1152		break;
1153	    }
1154
1155	default:
1156		error = ether_ioctl(ifp, command, data);
1157		break;
1158	}
1159
1160	return (error);
1161}
1162
1163
1164/*********************************************************************
1165 *  Init entry point
1166 *
1167 *  This routine is used in two ways. It is used by the stack as
1168 *  init entry point in network interface structure. It is also used
1169 *  by the driver as a hw/sw initialization routine to get to a
1170 *  consistent state.
1171 *
1172 *  return 0 on success, positive on failure
1173 **********************************************************************/
1174
1175static void
1176igb_init_locked(struct adapter *adapter)
1177{
1178	struct ifnet	*ifp = adapter->ifp;
1179	device_t	dev = adapter->dev;
1180
1181	INIT_DEBUGOUT("igb_init: begin");
1182
1183	IGB_CORE_LOCK_ASSERT(adapter);
1184
1185	igb_disable_intr(adapter);
1186	callout_stop(&adapter->timer);
1187
1188	/* Get the latest mac address, User can use a LAA */
1189        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1190              ETHER_ADDR_LEN);
1191
1192	/* Put the address into the Receive Address Array */
1193	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1194
1195	igb_reset(adapter);
1196	igb_update_link_status(adapter);
1197
1198	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1199
1200	/* Set hardware offload abilities */
1201	ifp->if_hwassist = 0;
1202	if (ifp->if_capenable & IFCAP_TXCSUM) {
1203		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1204#if __FreeBSD_version >= 800000
1205		if (adapter->hw.mac.type == e1000_82576)
1206			ifp->if_hwassist |= CSUM_SCTP;
1207#endif
1208	}
1209
1210	if (ifp->if_capenable & IFCAP_TSO4)
1211		ifp->if_hwassist |= CSUM_TSO;
1212
1213	/* Configure for OS presence */
1214	igb_init_manageability(adapter);
1215
1216	/* Prepare transmit descriptors and buffers */
1217	igb_setup_transmit_structures(adapter);
1218	igb_initialize_transmit_units(adapter);
1219
1220	/* Setup Multicast table */
1221	igb_set_multi(adapter);
1222
1223	/*
1224	** Figure out the desired mbuf pool
1225	** for doing jumbo/packetsplit
1226	*/
1227	if (adapter->max_frame_size <= 2048)
1228		adapter->rx_mbuf_sz = MCLBYTES;
1229	else if (adapter->max_frame_size <= 4096)
1230		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1231	else
1232		adapter->rx_mbuf_sz = MJUM9BYTES;
1233
1234	/* Prepare receive descriptors and buffers */
1235	if (igb_setup_receive_structures(adapter)) {
1236		device_printf(dev, "Could not setup receive structures\n");
1237		return;
1238	}
1239	igb_initialize_receive_units(adapter);
1240
1241        /* Enable VLAN support */
1242	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1243		igb_setup_vlan_hw_support(adapter);
1244
1245	/* Don't lose promiscuous settings */
1246	igb_set_promisc(adapter);
1247
1248	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1249	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1250
1251	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1252	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1253
1254	if (adapter->msix > 1) /* Set up queue routing */
1255		igb_configure_queues(adapter);
1256
1257	/* this clears any pending interrupts */
1258	E1000_READ_REG(&adapter->hw, E1000_ICR);
1259#ifdef DEVICE_POLLING
1260	/*
1261	 * Only enable interrupts if we are not polling, make sure
1262	 * they are off otherwise.
1263	 */
1264	if (ifp->if_capenable & IFCAP_POLLING)
1265		igb_disable_intr(adapter);
1266	else
1267#endif /* DEVICE_POLLING */
1268	{
1269	igb_enable_intr(adapter);
1270	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1271	}
1272
1273	/* Don't reset the phy next time init gets called */
1274	adapter->hw.phy.reset_disable = TRUE;
1275}
1276
1277static void
1278igb_init(void *arg)
1279{
1280	struct adapter *adapter = arg;
1281
1282	IGB_CORE_LOCK(adapter);
1283	igb_init_locked(adapter);
1284	IGB_CORE_UNLOCK(adapter);
1285}
1286
1287
1288static void
1289igb_handle_que(void *context, int pending)
1290{
1291	struct igb_queue *que = context;
1292	struct adapter *adapter = que->adapter;
1293	struct tx_ring *txr = que->txr;
1294	struct ifnet	*ifp = adapter->ifp;
1295
1296	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1297		bool	more;
1298
1299		more = igb_rxeof(que, -1, NULL);
1300
1301		IGB_TX_LOCK(txr);
1302		if (igb_txeof(txr))
1303			more = TRUE;
1304#if __FreeBSD_version >= 800000
1305		if (!drbr_empty(ifp, txr->br))
1306			igb_mq_start_locked(ifp, txr, NULL);
1307#else
1308		igb_start_locked(txr, ifp);
1309#endif
1310		IGB_TX_UNLOCK(txr);
1311		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1312			taskqueue_enqueue(que->tq, &que->que_task);
1313			return;
1314		}
1315	}
1316
1317#ifdef DEVICE_POLLING
1318	if (ifp->if_capenable & IFCAP_POLLING)
1319		return;
1320#endif
1321	/* Reenable this interrupt */
1322	if (que->eims)
1323		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1324	else
1325		igb_enable_intr(adapter);
1326}
1327
1328/* Deal with link in a sleepable context */
1329static void
1330igb_handle_link(void *context, int pending)
1331{
1332	struct adapter *adapter = context;
1333
1334	adapter->hw.mac.get_link_status = 1;
1335	igb_update_link_status(adapter);
1336}
1337
1338/*********************************************************************
1339 *
1340 *  MSI/Legacy Deferred
1341 *  Interrupt Service routine
1342 *
1343 *********************************************************************/
1344static int
1345igb_irq_fast(void *arg)
1346{
1347	struct adapter		*adapter = arg;
1348	struct igb_queue	*que = adapter->queues;
1349	u32			reg_icr;
1350
1351
1352	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1353
1354	/* Hot eject?  */
1355	if (reg_icr == 0xffffffff)
1356		return FILTER_STRAY;
1357
1358	/* Definitely not our interrupt.  */
1359	if (reg_icr == 0x0)
1360		return FILTER_STRAY;
1361
1362	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1363		return FILTER_STRAY;
1364
1365	/*
1366	 * Mask interrupts until the taskqueue is finished running.  This is
1367	 * cheap, just assume that it is needed.  This also works around the
1368	 * MSI message reordering errata on certain systems.
1369	 */
1370	igb_disable_intr(adapter);
1371	taskqueue_enqueue(que->tq, &que->que_task);
1372
1373	/* Link status change */
1374	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1375		taskqueue_enqueue(que->tq, &adapter->link_task);
1376
1377	if (reg_icr & E1000_ICR_RXO)
1378		adapter->rx_overruns++;
1379	return FILTER_HANDLED;
1380}
1381
1382#ifdef DEVICE_POLLING
1383/*********************************************************************
1384 *
1385 *  Legacy polling routine : if using this code you MUST be sure that
1386 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1387 *
1388 *********************************************************************/
1389#if __FreeBSD_version >= 800000
1390#define POLL_RETURN_COUNT(a) (a)
1391static int
1392#else
1393#define POLL_RETURN_COUNT(a)
1394static void
1395#endif
1396igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1397{
1398	struct adapter		*adapter = ifp->if_softc;
1399	struct igb_queue	*que = adapter->queues;
1400	struct tx_ring		*txr = adapter->tx_rings;
1401	u32			reg_icr, rx_done = 0;
1402	u32			loop = IGB_MAX_LOOP;
1403	bool			more;
1404
1405	IGB_CORE_LOCK(adapter);
1406	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1407		IGB_CORE_UNLOCK(adapter);
1408		return POLL_RETURN_COUNT(rx_done);
1409	}
1410
1411	if (cmd == POLL_AND_CHECK_STATUS) {
1412		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1413		/* Link status change */
1414		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1415			igb_handle_link(adapter, 0);
1416
1417		if (reg_icr & E1000_ICR_RXO)
1418			adapter->rx_overruns++;
1419	}
1420	IGB_CORE_UNLOCK(adapter);
1421
1422	igb_rxeof(que, count, &rx_done);
1423
1424	IGB_TX_LOCK(txr);
1425	do {
1426		more = igb_txeof(txr);
1427	} while (loop-- && more);
1428#if __FreeBSD_version >= 800000
1429	if (!drbr_empty(ifp, txr->br))
1430		igb_mq_start_locked(ifp, txr, NULL);
1431#else
1432	igb_start_locked(txr, ifp);
1433#endif
1434	IGB_TX_UNLOCK(txr);
1435	return POLL_RETURN_COUNT(rx_done);
1436}
1437#endif /* DEVICE_POLLING */
1438
1439/*********************************************************************
1440 *
1441 *  MSIX TX Interrupt Service routine
1442 *
1443 **********************************************************************/
1444static void
1445igb_msix_que(void *arg)
1446{
1447	struct igb_queue *que = arg;
1448	struct adapter *adapter = que->adapter;
1449	struct tx_ring *txr = que->txr;
1450	struct rx_ring *rxr = que->rxr;
1451	u32		newitr = 0;
1452	bool		more_tx, more_rx;
1453
1454	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1455	++que->irqs;
1456
1457	IGB_TX_LOCK(txr);
1458	more_tx = igb_txeof(txr);
1459	IGB_TX_UNLOCK(txr);
1460
1461	/* If RX ring is depleted do refresh first */
1462	if (rxr->next_to_check == rxr->next_to_refresh)
1463		igb_refresh_mbufs(rxr, rxr->next_to_check);
1464
1465	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1466
1467	if (igb_enable_aim == FALSE)
1468		goto no_calc;
1469	/*
1470	** Do Adaptive Interrupt Moderation:
1471        **  - Write out last calculated setting
1472	**  - Calculate based on average size over
1473	**    the last interval.
1474	*/
1475        if (que->eitr_setting)
1476                E1000_WRITE_REG(&adapter->hw,
1477                    E1000_EITR(que->msix), que->eitr_setting);
1478
1479        que->eitr_setting = 0;
1480
1481        /* Idle, do nothing */
1482        if ((txr->bytes == 0) && (rxr->bytes == 0))
1483                goto no_calc;
1484
1485        /* Used half Default if sub-gig */
1486        if (adapter->link_speed != 1000)
1487                newitr = IGB_DEFAULT_ITR / 2;
1488        else {
1489		if ((txr->bytes) && (txr->packets))
1490                	newitr = txr->bytes/txr->packets;
1491		if ((rxr->bytes) && (rxr->packets))
1492			newitr = max(newitr,
1493			    (rxr->bytes / rxr->packets));
1494                newitr += 24; /* account for hardware frame, crc */
1495		/* set an upper boundary */
1496		newitr = min(newitr, 3000);
1497		/* Be nice to the mid range */
1498                if ((newitr > 300) && (newitr < 1200))
1499                        newitr = (newitr / 3);
1500                else
1501                        newitr = (newitr / 2);
1502        }
1503        newitr &= 0x7FFC;  /* Mask invalid bits */
1504        if (adapter->hw.mac.type == e1000_82575)
1505                newitr |= newitr << 16;
1506        else
1507                newitr |= E1000_EITR_CNT_IGNR;
1508
1509        /* save for next interrupt */
1510        que->eitr_setting = newitr;
1511
1512        /* Reset state */
1513        txr->bytes = 0;
1514        txr->packets = 0;
1515        rxr->bytes = 0;
1516        rxr->packets = 0;
1517
1518no_calc:
1519	/* Schedule a clean task if needed*/
1520	if (more_tx || more_rx ||
1521	    (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE))
1522		taskqueue_enqueue(que->tq, &que->que_task);
1523	else
1524		/* Reenable this interrupt */
1525		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1526	return;
1527}
1528
1529
1530/*********************************************************************
1531 *
1532 *  MSIX Link Interrupt Service routine
1533 *
1534 **********************************************************************/
1535
1536static void
1537igb_msix_link(void *arg)
1538{
1539	struct adapter	*adapter = arg;
1540	u32       	icr;
1541
1542	++adapter->link_irq;
1543	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1544	if (!(icr & E1000_ICR_LSC))
1545		goto spurious;
1546	igb_handle_link(adapter, 0);
1547
1548spurious:
1549	/* Rearm */
1550	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1551	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1552	return;
1553}
1554
1555
1556/*********************************************************************
1557 *
1558 *  Media Ioctl callback
1559 *
1560 *  This routine is called whenever the user queries the status of
1561 *  the interface using ifconfig.
1562 *
1563 **********************************************************************/
1564static void
1565igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1566{
1567	struct adapter *adapter = ifp->if_softc;
1568	u_char fiber_type = IFM_1000_SX;
1569
1570	INIT_DEBUGOUT("igb_media_status: begin");
1571
1572	IGB_CORE_LOCK(adapter);
1573	igb_update_link_status(adapter);
1574
1575	ifmr->ifm_status = IFM_AVALID;
1576	ifmr->ifm_active = IFM_ETHER;
1577
1578	if (!adapter->link_active) {
1579		IGB_CORE_UNLOCK(adapter);
1580		return;
1581	}
1582
1583	ifmr->ifm_status |= IFM_ACTIVE;
1584
1585	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1586	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1587		ifmr->ifm_active |= fiber_type | IFM_FDX;
1588	else {
1589		switch (adapter->link_speed) {
1590		case 10:
1591			ifmr->ifm_active |= IFM_10_T;
1592			break;
1593		case 100:
1594			ifmr->ifm_active |= IFM_100_TX;
1595			break;
1596		case 1000:
1597			ifmr->ifm_active |= IFM_1000_T;
1598			break;
1599		}
1600		if (adapter->link_duplex == FULL_DUPLEX)
1601			ifmr->ifm_active |= IFM_FDX;
1602		else
1603			ifmr->ifm_active |= IFM_HDX;
1604	}
1605	IGB_CORE_UNLOCK(adapter);
1606}
1607
1608/*********************************************************************
1609 *
1610 *  Media Ioctl callback
1611 *
1612 *  This routine is called when the user changes speed/duplex using
1613 *  media/mediopt option with ifconfig.
1614 *
1615 **********************************************************************/
1616static int
1617igb_media_change(struct ifnet *ifp)
1618{
1619	struct adapter *adapter = ifp->if_softc;
1620	struct ifmedia  *ifm = &adapter->media;
1621
1622	INIT_DEBUGOUT("igb_media_change: begin");
1623
1624	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1625		return (EINVAL);
1626
1627	IGB_CORE_LOCK(adapter);
1628	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1629	case IFM_AUTO:
1630		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1631		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1632		break;
1633	case IFM_1000_LX:
1634	case IFM_1000_SX:
1635	case IFM_1000_T:
1636		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1637		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1638		break;
1639	case IFM_100_TX:
1640		adapter->hw.mac.autoneg = FALSE;
1641		adapter->hw.phy.autoneg_advertised = 0;
1642		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1643			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1644		else
1645			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1646		break;
1647	case IFM_10_T:
1648		adapter->hw.mac.autoneg = FALSE;
1649		adapter->hw.phy.autoneg_advertised = 0;
1650		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1651			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1652		else
1653			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1654		break;
1655	default:
1656		device_printf(adapter->dev, "Unsupported media type\n");
1657	}
1658
1659	igb_init_locked(adapter);
1660	IGB_CORE_UNLOCK(adapter);
1661
1662	return (0);
1663}
1664
1665
1666/*********************************************************************
1667 *
1668 *  This routine maps the mbufs to Advanced TX descriptors.
1669 *  used by the 82575 adapter.
1670 *
1671 **********************************************************************/
1672
1673static int
1674igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1675{
1676	struct adapter		*adapter = txr->adapter;
1677	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1678	bus_dmamap_t		map;
1679	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1680	union e1000_adv_tx_desc	*txd = NULL;
1681	struct mbuf		*m_head;
1682	u32			olinfo_status = 0, cmd_type_len = 0;
1683	int			nsegs, i, j, error, first, last = 0;
1684	u32			hdrlen = 0;
1685
1686	m_head = *m_headp;
1687
1688
1689	/* Set basic descriptor constants */
1690	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1691	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1692	if (m_head->m_flags & M_VLANTAG)
1693		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1694
1695	/*
1696         * Map the packet for DMA.
1697	 *
1698	 * Capture the first descriptor index,
1699	 * this descriptor will have the index
1700	 * of the EOP which is the only one that
1701	 * now gets a DONE bit writeback.
1702	 */
1703	first = txr->next_avail_desc;
1704	tx_buffer = &txr->tx_buffers[first];
1705	tx_buffer_mapped = tx_buffer;
1706	map = tx_buffer->map;
1707
1708	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1709	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1710
1711	if (error == EFBIG) {
1712		struct mbuf *m;
1713
1714		m = m_defrag(*m_headp, M_DONTWAIT);
1715		if (m == NULL) {
1716			adapter->mbuf_defrag_failed++;
1717			m_freem(*m_headp);
1718			*m_headp = NULL;
1719			return (ENOBUFS);
1720		}
1721		*m_headp = m;
1722
1723		/* Try it again */
1724		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1725		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1726
1727		if (error == ENOMEM) {
1728			adapter->no_tx_dma_setup++;
1729			return (error);
1730		} else if (error != 0) {
1731			adapter->no_tx_dma_setup++;
1732			m_freem(*m_headp);
1733			*m_headp = NULL;
1734			return (error);
1735		}
1736	} else if (error == ENOMEM) {
1737		adapter->no_tx_dma_setup++;
1738		return (error);
1739	} else if (error != 0) {
1740		adapter->no_tx_dma_setup++;
1741		m_freem(*m_headp);
1742		*m_headp = NULL;
1743		return (error);
1744	}
1745
1746	/* Check again to be sure we have enough descriptors */
1747        if (nsegs > (txr->tx_avail - 2)) {
1748                txr->no_desc_avail++;
1749		bus_dmamap_unload(txr->txtag, map);
1750		return (ENOBUFS);
1751        }
1752	m_head = *m_headp;
1753
1754        /*
1755         * Set up the context descriptor:
1756         * used when any hardware offload is done.
1757	 * This includes CSUM, VLAN, and TSO. It
1758	 * will use the first descriptor.
1759         */
1760        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1761		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1762			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1763			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1764			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1765		} else
1766			return (ENXIO);
1767	} else if (igb_tx_ctx_setup(txr, m_head))
1768		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1769
1770	/* Calculate payload length */
1771	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1772	    << E1000_ADVTXD_PAYLEN_SHIFT);
1773
1774	/* 82575 needs the queue index added */
1775	if (adapter->hw.mac.type == e1000_82575)
1776		olinfo_status |= txr->me << 4;
1777
1778	/* Set up our transmit descriptors */
1779	i = txr->next_avail_desc;
1780	for (j = 0; j < nsegs; j++) {
1781		bus_size_t seg_len;
1782		bus_addr_t seg_addr;
1783
1784		tx_buffer = &txr->tx_buffers[i];
1785		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1786		seg_addr = segs[j].ds_addr;
1787		seg_len  = segs[j].ds_len;
1788
1789		txd->read.buffer_addr = htole64(seg_addr);
1790		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1791		txd->read.olinfo_status = htole32(olinfo_status);
1792		last = i;
1793		if (++i == adapter->num_tx_desc)
1794			i = 0;
1795		tx_buffer->m_head = NULL;
1796		tx_buffer->next_eop = -1;
1797	}
1798
1799	txr->next_avail_desc = i;
1800	txr->tx_avail -= nsegs;
1801
1802        tx_buffer->m_head = m_head;
1803	tx_buffer_mapped->map = tx_buffer->map;
1804	tx_buffer->map = map;
1805        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1806
1807        /*
1808         * Last Descriptor of Packet
1809	 * needs End Of Packet (EOP)
1810	 * and Report Status (RS)
1811         */
1812        txd->read.cmd_type_len |=
1813	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1814	/*
1815	 * Keep track in the first buffer which
1816	 * descriptor will be written back
1817	 */
1818	tx_buffer = &txr->tx_buffers[first];
1819	tx_buffer->next_eop = last;
1820	txr->watchdog_time = ticks;
1821
1822	/*
1823	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1824	 * that this frame is available to transmit.
1825	 */
1826	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1827	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1828	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1829	++txr->tx_packets;
1830
1831	return (0);
1832
1833}
1834
1835static void
1836igb_set_promisc(struct adapter *adapter)
1837{
1838	struct ifnet	*ifp = adapter->ifp;
1839	struct e1000_hw *hw = &adapter->hw;
1840	u32		reg;
1841
1842	if (adapter->vf_ifp) {
1843		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1844		return;
1845	}
1846
1847	reg = E1000_READ_REG(hw, E1000_RCTL);
1848	if (ifp->if_flags & IFF_PROMISC) {
1849		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1850		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1851	} else if (ifp->if_flags & IFF_ALLMULTI) {
1852		reg |= E1000_RCTL_MPE;
1853		reg &= ~E1000_RCTL_UPE;
1854		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1855	}
1856}
1857
1858static void
1859igb_disable_promisc(struct adapter *adapter)
1860{
1861	struct e1000_hw *hw = &adapter->hw;
1862	u32		reg;
1863
1864	if (adapter->vf_ifp) {
1865		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1866		return;
1867	}
1868	reg = E1000_READ_REG(hw, E1000_RCTL);
1869	reg &=  (~E1000_RCTL_UPE);
1870	reg &=  (~E1000_RCTL_MPE);
1871	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1872}
1873
1874
1875/*********************************************************************
1876 *  Multicast Update
1877 *
1878 *  This routine is called whenever multicast address list is updated.
1879 *
1880 **********************************************************************/
1881
1882static void
1883igb_set_multi(struct adapter *adapter)
1884{
1885	struct ifnet	*ifp = adapter->ifp;
1886	struct ifmultiaddr *ifma;
1887	u32 reg_rctl = 0;
1888	u8  *mta;
1889
1890	int mcnt = 0;
1891
1892	IOCTL_DEBUGOUT("igb_set_multi: begin");
1893
1894	mta = adapter->mta;
1895	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
1896	    MAX_NUM_MULTICAST_ADDRESSES);
1897
1898#if __FreeBSD_version < 800000
1899	IF_ADDR_LOCK(ifp);
1900#else
1901	if_maddr_rlock(ifp);
1902#endif
1903	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1904		if (ifma->ifma_addr->sa_family != AF_LINK)
1905			continue;
1906
1907		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1908			break;
1909
1910		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1911		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1912		mcnt++;
1913	}
1914#if __FreeBSD_version < 800000
1915	IF_ADDR_UNLOCK(ifp);
1916#else
1917	if_maddr_runlock(ifp);
1918#endif
1919
1920	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1921		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1922		reg_rctl |= E1000_RCTL_MPE;
1923		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1924	} else
1925		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1926}
1927
1928
1929/*********************************************************************
1930 *  Timer routine:
1931 *  	This routine checks for link status,
1932 *	updates statistics, and does the watchdog.
1933 *
1934 **********************************************************************/
1935
1936static void
1937igb_local_timer(void *arg)
1938{
1939	struct adapter		*adapter = arg;
1940	device_t		dev = adapter->dev;
1941	struct tx_ring		*txr = adapter->tx_rings;
1942
1943
1944	IGB_CORE_LOCK_ASSERT(adapter);
1945
1946	igb_update_link_status(adapter);
1947	igb_update_stats_counters(adapter);
1948
1949	/*
1950	** If flow control has paused us since last checking
1951	** it invalidates the watchdog timing, so dont run it.
1952	*/
1953	if (adapter->pause_frames) {
1954		adapter->pause_frames = 0;
1955		goto out;
1956	}
1957
1958        /*
1959        ** Watchdog: check for time since any descriptor was cleaned
1960        */
1961	for (int i = 0; i < adapter->num_queues; i++, txr++)
1962		if (txr->queue_status == IGB_QUEUE_HUNG)
1963			goto timeout;
1964out:
1965	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1966#ifndef DEVICE_POLLING
1967	/* Schedule all queue interrupts - deadlock protection */
1968	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
1969#endif
1970	return;
1971
1972timeout:
1973	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1974	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1975            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1976            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1977	device_printf(dev,"TX(%d) desc avail = %d,"
1978            "Next TX to Clean = %d\n",
1979            txr->me, txr->tx_avail, txr->next_to_clean);
1980	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1981	adapter->watchdog_events++;
1982	igb_init_locked(adapter);
1983}
1984
1985static void
1986igb_update_link_status(struct adapter *adapter)
1987{
1988	struct e1000_hw *hw = &adapter->hw;
1989	struct ifnet *ifp = adapter->ifp;
1990	device_t dev = adapter->dev;
1991	struct tx_ring *txr = adapter->tx_rings;
1992	u32 link_check, thstat, ctrl;
1993
1994	link_check = thstat = ctrl = 0;
1995
1996	/* Get the cached link value or read for real */
1997        switch (hw->phy.media_type) {
1998        case e1000_media_type_copper:
1999                if (hw->mac.get_link_status) {
2000			/* Do the work to read phy */
2001                        e1000_check_for_link(hw);
2002                        link_check = !hw->mac.get_link_status;
2003                } else
2004                        link_check = TRUE;
2005                break;
2006        case e1000_media_type_fiber:
2007                e1000_check_for_link(hw);
2008                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2009                                 E1000_STATUS_LU);
2010                break;
2011        case e1000_media_type_internal_serdes:
2012                e1000_check_for_link(hw);
2013                link_check = adapter->hw.mac.serdes_has_link;
2014                break;
2015	/* VF device is type_unknown */
2016        case e1000_media_type_unknown:
2017                e1000_check_for_link(hw);
2018		link_check = !hw->mac.get_link_status;
2019		/* Fall thru */
2020        default:
2021                break;
2022        }
2023
2024	/* Check for thermal downshift or shutdown */
2025	if (hw->mac.type == e1000_i350) {
2026		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2027		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2028	}
2029
2030	/* Now we check if a transition has happened */
2031	if (link_check && (adapter->link_active == 0)) {
2032		e1000_get_speed_and_duplex(&adapter->hw,
2033		    &adapter->link_speed, &adapter->link_duplex);
2034		if (bootverbose)
2035			device_printf(dev, "Link is up %d Mbps %s\n",
2036			    adapter->link_speed,
2037			    ((adapter->link_duplex == FULL_DUPLEX) ?
2038			    "Full Duplex" : "Half Duplex"));
2039		adapter->link_active = 1;
2040		ifp->if_baudrate = adapter->link_speed * 1000000;
2041		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2042		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2043			device_printf(dev, "Link: thermal downshift\n");
2044		/* This can sleep */
2045		if_link_state_change(ifp, LINK_STATE_UP);
2046	} else if (!link_check && (adapter->link_active == 1)) {
2047		ifp->if_baudrate = adapter->link_speed = 0;
2048		adapter->link_duplex = 0;
2049		if (bootverbose)
2050			device_printf(dev, "Link is Down\n");
2051		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2052		    (thstat & E1000_THSTAT_PWR_DOWN))
2053			device_printf(dev, "Link: thermal shutdown\n");
2054		adapter->link_active = 0;
2055		/* This can sleep */
2056		if_link_state_change(ifp, LINK_STATE_DOWN);
2057		/* Turn off watchdogs */
2058		for (int i = 0; i < adapter->num_queues; i++, txr++)
2059			txr->queue_status = IGB_QUEUE_IDLE;
2060	}
2061}
2062
2063/*********************************************************************
2064 *
2065 *  This routine disables all traffic on the adapter by issuing a
2066 *  global reset on the MAC and deallocates TX/RX buffers.
2067 *
2068 **********************************************************************/
2069
2070static void
2071igb_stop(void *arg)
2072{
2073	struct adapter	*adapter = arg;
2074	struct ifnet	*ifp = adapter->ifp;
2075	struct tx_ring *txr = adapter->tx_rings;
2076
2077	IGB_CORE_LOCK_ASSERT(adapter);
2078
2079	INIT_DEBUGOUT("igb_stop: begin");
2080
2081	igb_disable_intr(adapter);
2082
2083	callout_stop(&adapter->timer);
2084
2085	/* Tell the stack that the interface is no longer active */
2086	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2087
2088	/* Unarm watchdog timer. */
2089	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2090		IGB_TX_LOCK(txr);
2091		txr->queue_status = IGB_QUEUE_IDLE;
2092		IGB_TX_UNLOCK(txr);
2093	}
2094
2095	e1000_reset_hw(&adapter->hw);
2096	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2097
2098	e1000_led_off(&adapter->hw);
2099	e1000_cleanup_led(&adapter->hw);
2100}
2101
2102
2103/*********************************************************************
2104 *
2105 *  Determine hardware revision.
2106 *
2107 **********************************************************************/
2108static void
2109igb_identify_hardware(struct adapter *adapter)
2110{
2111	device_t dev = adapter->dev;
2112
2113	/* Make sure our PCI config space has the necessary stuff set */
2114	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2115	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2116	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2117		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2118		    "bits were not set!\n");
2119		adapter->hw.bus.pci_cmd_word |=
2120		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2121		pci_write_config(dev, PCIR_COMMAND,
2122		    adapter->hw.bus.pci_cmd_word, 2);
2123	}
2124
2125	/* Save off the information about this board */
2126	adapter->hw.vendor_id = pci_get_vendor(dev);
2127	adapter->hw.device_id = pci_get_device(dev);
2128	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2129	adapter->hw.subsystem_vendor_id =
2130	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2131	adapter->hw.subsystem_device_id =
2132	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2133
2134	/* Set MAC type early for PCI setup */
2135	e1000_set_mac_type(&adapter->hw);
2136
2137	/* Are we a VF device? */
2138	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2139	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2140		adapter->vf_ifp = 1;
2141	else
2142		adapter->vf_ifp = 0;
2143}
2144
2145static int
2146igb_allocate_pci_resources(struct adapter *adapter)
2147{
2148	device_t	dev = adapter->dev;
2149	int		rid;
2150
2151	rid = PCIR_BAR(0);
2152	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2153	    &rid, RF_ACTIVE);
2154	if (adapter->pci_mem == NULL) {
2155		device_printf(dev, "Unable to allocate bus resource: memory\n");
2156		return (ENXIO);
2157	}
2158	adapter->osdep.mem_bus_space_tag =
2159	    rman_get_bustag(adapter->pci_mem);
2160	adapter->osdep.mem_bus_space_handle =
2161	    rman_get_bushandle(adapter->pci_mem);
2162	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2163
2164	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2165
2166	/* This will setup either MSI/X or MSI */
2167	adapter->msix = igb_setup_msix(adapter);
2168	adapter->hw.back = &adapter->osdep;
2169
2170	return (0);
2171}
2172
2173/*********************************************************************
2174 *
2175 *  Setup the Legacy or MSI Interrupt handler
2176 *
2177 **********************************************************************/
2178static int
2179igb_allocate_legacy(struct adapter *adapter)
2180{
2181	device_t		dev = adapter->dev;
2182	struct igb_queue	*que = adapter->queues;
2183	int			error, rid = 0;
2184
2185	/* Turn off all interrupts */
2186	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2187
2188	/* MSI RID is 1 */
2189	if (adapter->msix == 1)
2190		rid = 1;
2191
2192	/* We allocate a single interrupt resource */
2193	adapter->res = bus_alloc_resource_any(dev,
2194	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2195	if (adapter->res == NULL) {
2196		device_printf(dev, "Unable to allocate bus resource: "
2197		    "interrupt\n");
2198		return (ENXIO);
2199	}
2200
2201	/*
2202	 * Try allocating a fast interrupt and the associated deferred
2203	 * processing contexts.
2204	 */
2205	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2206	/* Make tasklet for deferred link handling */
2207	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2208	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2209	    taskqueue_thread_enqueue, &que->tq);
2210	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2211	    device_get_nameunit(adapter->dev));
2212	if ((error = bus_setup_intr(dev, adapter->res,
2213	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2214	    adapter, &adapter->tag)) != 0) {
2215		device_printf(dev, "Failed to register fast interrupt "
2216			    "handler: %d\n", error);
2217		taskqueue_free(que->tq);
2218		que->tq = NULL;
2219		return (error);
2220	}
2221
2222	return (0);
2223}
2224
2225
2226/*********************************************************************
2227 *
2228 *  Setup the MSIX Queue Interrupt handlers:
2229 *
2230 **********************************************************************/
2231static int
2232igb_allocate_msix(struct adapter *adapter)
2233{
2234	device_t		dev = adapter->dev;
2235	struct igb_queue	*que = adapter->queues;
2236	int			error, rid, vector = 0;
2237
2238
2239	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2240		rid = vector +1;
2241		que->res = bus_alloc_resource_any(dev,
2242		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2243		if (que->res == NULL) {
2244			device_printf(dev,
2245			    "Unable to allocate bus resource: "
2246			    "MSIX Queue Interrupt\n");
2247			return (ENXIO);
2248		}
2249		error = bus_setup_intr(dev, que->res,
2250	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2251		    igb_msix_que, que, &que->tag);
2252		if (error) {
2253			que->res = NULL;
2254			device_printf(dev, "Failed to register Queue handler");
2255			return (error);
2256		}
2257#if __FreeBSD_version >= 800504
2258		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2259#endif
2260		que->msix = vector;
2261		if (adapter->hw.mac.type == e1000_82575)
2262			que->eims = E1000_EICR_TX_QUEUE0 << i;
2263		else
2264			que->eims = 1 << vector;
2265		/*
2266		** Bind the msix vector, and thus the
2267		** rings to the corresponding cpu.
2268		*/
2269		if (adapter->num_queues > 1)
2270			bus_bind_intr(dev, que->res, i);
2271		/* Make tasklet for deferred handling */
2272		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2273		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2274		    taskqueue_thread_enqueue, &que->tq);
2275		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2276		    device_get_nameunit(adapter->dev));
2277	}
2278
2279	/* And Link */
2280	rid = vector + 1;
2281	adapter->res = bus_alloc_resource_any(dev,
2282	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2283	if (adapter->res == NULL) {
2284		device_printf(dev,
2285		    "Unable to allocate bus resource: "
2286		    "MSIX Link Interrupt\n");
2287		return (ENXIO);
2288	}
2289	if ((error = bus_setup_intr(dev, adapter->res,
2290	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2291	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2292		device_printf(dev, "Failed to register Link handler");
2293		return (error);
2294	}
2295#if __FreeBSD_version >= 800504
2296	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2297#endif
2298	adapter->linkvec = vector;
2299
2300	return (0);
2301}
2302
2303
2304static void
2305igb_configure_queues(struct adapter *adapter)
2306{
2307	struct	e1000_hw	*hw = &adapter->hw;
2308	struct	igb_queue	*que;
2309	u32			tmp, ivar = 0, newitr = 0;
2310
2311	/* First turn on RSS capability */
2312	if (adapter->hw.mac.type != e1000_82575)
2313		E1000_WRITE_REG(hw, E1000_GPIE,
2314		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2315		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2316
2317	/* Turn on MSIX */
2318	switch (adapter->hw.mac.type) {
2319	case e1000_82580:
2320	case e1000_i350:
2321	case e1000_vfadapt:
2322	case e1000_vfadapt_i350:
2323		/* RX entries */
2324		for (int i = 0; i < adapter->num_queues; i++) {
2325			u32 index = i >> 1;
2326			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2327			que = &adapter->queues[i];
2328			if (i & 1) {
2329				ivar &= 0xFF00FFFF;
2330				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2331			} else {
2332				ivar &= 0xFFFFFF00;
2333				ivar |= que->msix | E1000_IVAR_VALID;
2334			}
2335			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2336		}
2337		/* TX entries */
2338		for (int i = 0; i < adapter->num_queues; i++) {
2339			u32 index = i >> 1;
2340			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2341			que = &adapter->queues[i];
2342			if (i & 1) {
2343				ivar &= 0x00FFFFFF;
2344				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2345			} else {
2346				ivar &= 0xFFFF00FF;
2347				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2348			}
2349			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2350			adapter->que_mask |= que->eims;
2351		}
2352
2353		/* And for the link interrupt */
2354		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2355		adapter->link_mask = 1 << adapter->linkvec;
2356		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2357		break;
2358	case e1000_82576:
2359		/* RX entries */
2360		for (int i = 0; i < adapter->num_queues; i++) {
2361			u32 index = i & 0x7; /* Each IVAR has two entries */
2362			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2363			que = &adapter->queues[i];
2364			if (i < 8) {
2365				ivar &= 0xFFFFFF00;
2366				ivar |= que->msix | E1000_IVAR_VALID;
2367			} else {
2368				ivar &= 0xFF00FFFF;
2369				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2370			}
2371			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2372			adapter->que_mask |= que->eims;
2373		}
2374		/* TX entries */
2375		for (int i = 0; i < adapter->num_queues; i++) {
2376			u32 index = i & 0x7; /* Each IVAR has two entries */
2377			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2378			que = &adapter->queues[i];
2379			if (i < 8) {
2380				ivar &= 0xFFFF00FF;
2381				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2382			} else {
2383				ivar &= 0x00FFFFFF;
2384				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2385			}
2386			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2387			adapter->que_mask |= que->eims;
2388		}
2389
2390		/* And for the link interrupt */
2391		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2392		adapter->link_mask = 1 << adapter->linkvec;
2393		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2394		break;
2395
2396	case e1000_82575:
2397                /* enable MSI-X support*/
2398		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2399                tmp |= E1000_CTRL_EXT_PBA_CLR;
2400                /* Auto-Mask interrupts upon ICR read. */
2401                tmp |= E1000_CTRL_EXT_EIAME;
2402                tmp |= E1000_CTRL_EXT_IRCA;
2403                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2404
2405		/* Queues */
2406		for (int i = 0; i < adapter->num_queues; i++) {
2407			que = &adapter->queues[i];
2408			tmp = E1000_EICR_RX_QUEUE0 << i;
2409			tmp |= E1000_EICR_TX_QUEUE0 << i;
2410			que->eims = tmp;
2411			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2412			    i, que->eims);
2413			adapter->que_mask |= que->eims;
2414		}
2415
2416		/* Link */
2417		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2418		    E1000_EIMS_OTHER);
2419		adapter->link_mask |= E1000_EIMS_OTHER;
2420	default:
2421		break;
2422	}
2423
2424	/* Set the starting interrupt rate */
2425	if (igb_max_interrupt_rate > 0)
2426		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2427
2428        if (hw->mac.type == e1000_82575)
2429                newitr |= newitr << 16;
2430        else
2431                newitr |= E1000_EITR_CNT_IGNR;
2432
2433	for (int i = 0; i < adapter->num_queues; i++) {
2434		que = &adapter->queues[i];
2435		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2436	}
2437
2438	return;
2439}
2440
2441
2442static void
2443igb_free_pci_resources(struct adapter *adapter)
2444{
2445	struct		igb_queue *que = adapter->queues;
2446	device_t	dev = adapter->dev;
2447	int		rid;
2448
2449	/*
2450	** There is a slight possibility of a failure mode
2451	** in attach that will result in entering this function
2452	** before interrupt resources have been initialized, and
2453	** in that case we do not want to execute the loops below
2454	** We can detect this reliably by the state of the adapter
2455	** res pointer.
2456	*/
2457	if (adapter->res == NULL)
2458		goto mem;
2459
2460	/*
2461	 * First release all the interrupt resources:
2462	 */
2463	for (int i = 0; i < adapter->num_queues; i++, que++) {
2464		rid = que->msix + 1;
2465		if (que->tag != NULL) {
2466			bus_teardown_intr(dev, que->res, que->tag);
2467			que->tag = NULL;
2468		}
2469		if (que->res != NULL)
2470			bus_release_resource(dev,
2471			    SYS_RES_IRQ, rid, que->res);
2472	}
2473
2474	/* Clean the Legacy or Link interrupt last */
2475	if (adapter->linkvec) /* we are doing MSIX */
2476		rid = adapter->linkvec + 1;
2477	else
2478		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2479
2480	if (adapter->tag != NULL) {
2481		bus_teardown_intr(dev, adapter->res, adapter->tag);
2482		adapter->tag = NULL;
2483	}
2484	if (adapter->res != NULL)
2485		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2486
2487mem:
2488	if (adapter->msix)
2489		pci_release_msi(dev);
2490
2491	if (adapter->msix_mem != NULL)
2492		bus_release_resource(dev, SYS_RES_MEMORY,
2493		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2494
2495	if (adapter->pci_mem != NULL)
2496		bus_release_resource(dev, SYS_RES_MEMORY,
2497		    PCIR_BAR(0), adapter->pci_mem);
2498
2499}
2500
2501/*
2502 * Setup Either MSI/X or MSI
2503 */
2504static int
2505igb_setup_msix(struct adapter *adapter)
2506{
2507	device_t dev = adapter->dev;
2508	int rid, want, queues, msgs;
2509
2510	/* tuneable override */
2511	if (igb_enable_msix == 0)
2512		goto msi;
2513
2514	/* First try MSI/X */
2515	rid = PCIR_BAR(IGB_MSIX_BAR);
2516	adapter->msix_mem = bus_alloc_resource_any(dev,
2517	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2518       	if (!adapter->msix_mem) {
2519		/* May not be enabled */
2520		device_printf(adapter->dev,
2521		    "Unable to map MSIX table \n");
2522		goto msi;
2523	}
2524
2525	msgs = pci_msix_count(dev);
2526	if (msgs == 0) { /* system has msix disabled */
2527		bus_release_resource(dev, SYS_RES_MEMORY,
2528		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2529		adapter->msix_mem = NULL;
2530		goto msi;
2531	}
2532
2533	/* Figure out a reasonable auto config value */
2534	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2535
2536	/* Manual override */
2537	if (igb_num_queues != 0)
2538		queues = igb_num_queues;
2539	if (queues > 8)  /* max queues */
2540		queues = 8;
2541
2542	/* Can have max of 4 queues on 82575 */
2543	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2544		queues = 4;
2545
2546	/* Limit the VF devices to one queue */
2547	if (adapter->vf_ifp)
2548		queues = 1;
2549
2550	/*
2551	** One vector (RX/TX pair) per queue
2552	** plus an additional for Link interrupt
2553	*/
2554	want = queues + 1;
2555	if (msgs >= want)
2556		msgs = want;
2557	else {
2558               	device_printf(adapter->dev,
2559		    "MSIX Configuration Problem, "
2560		    "%d vectors configured, but %d queues wanted!\n",
2561		    msgs, want);
2562		return (ENXIO);
2563	}
2564	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2565               	device_printf(adapter->dev,
2566		    "Using MSIX interrupts with %d vectors\n", msgs);
2567		adapter->num_queues = queues;
2568		return (msgs);
2569	}
2570msi:
2571       	msgs = pci_msi_count(dev);
2572       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2573               	device_printf(adapter->dev,"Using MSI interrupt\n");
2574	return (msgs);
2575}
2576
2577/*********************************************************************
2578 *
2579 *  Set up an fresh starting state
2580 *
2581 **********************************************************************/
2582static void
2583igb_reset(struct adapter *adapter)
2584{
2585	device_t	dev = adapter->dev;
2586	struct e1000_hw *hw = &adapter->hw;
2587	struct e1000_fc_info *fc = &hw->fc;
2588	struct ifnet	*ifp = adapter->ifp;
2589	u32		pba = 0;
2590	u16		hwm;
2591
2592	INIT_DEBUGOUT("igb_reset: begin");
2593
2594	/* Let the firmware know the OS is in control */
2595	igb_get_hw_control(adapter);
2596
2597	/*
2598	 * Packet Buffer Allocation (PBA)
2599	 * Writing PBA sets the receive portion of the buffer
2600	 * the remainder is used for the transmit buffer.
2601	 */
2602	switch (hw->mac.type) {
2603	case e1000_82575:
2604		pba = E1000_PBA_32K;
2605		break;
2606	case e1000_82576:
2607	case e1000_vfadapt:
2608		pba = E1000_READ_REG(hw, E1000_RXPBS);
2609		pba &= E1000_RXPBS_SIZE_MASK_82576;
2610		break;
2611	case e1000_82580:
2612	case e1000_i350:
2613	case e1000_vfadapt_i350:
2614		pba = E1000_READ_REG(hw, E1000_RXPBS);
2615		pba = e1000_rxpbs_adjust_82580(pba);
2616		break;
2617		pba = E1000_PBA_35K;
2618	default:
2619		break;
2620	}
2621
2622	/* Special needs in case of Jumbo frames */
2623	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2624		u32 tx_space, min_tx, min_rx;
2625		pba = E1000_READ_REG(hw, E1000_PBA);
2626		tx_space = pba >> 16;
2627		pba &= 0xffff;
2628		min_tx = (adapter->max_frame_size +
2629		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2630		min_tx = roundup2(min_tx, 1024);
2631		min_tx >>= 10;
2632                min_rx = adapter->max_frame_size;
2633                min_rx = roundup2(min_rx, 1024);
2634                min_rx >>= 10;
2635		if (tx_space < min_tx &&
2636		    ((min_tx - tx_space) < pba)) {
2637			pba = pba - (min_tx - tx_space);
2638			/*
2639                         * if short on rx space, rx wins
2640                         * and must trump tx adjustment
2641			 */
2642                        if (pba < min_rx)
2643                                pba = min_rx;
2644		}
2645		E1000_WRITE_REG(hw, E1000_PBA, pba);
2646	}
2647
2648	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2649
2650	/*
2651	 * These parameters control the automatic generation (Tx) and
2652	 * response (Rx) to Ethernet PAUSE frames.
2653	 * - High water mark should allow for at least two frames to be
2654	 *   received after sending an XOFF.
2655	 * - Low water mark works best when it is very near the high water mark.
2656	 *   This allows the receiver to restart by sending XON when it has
2657	 *   drained a bit.
2658	 */
2659	hwm = min(((pba << 10) * 9 / 10),
2660	    ((pba << 10) - 2 * adapter->max_frame_size));
2661
2662	if (hw->mac.type < e1000_82576) {
2663		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2664		fc->low_water = fc->high_water - 8;
2665	} else {
2666		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2667		fc->low_water = fc->high_water - 16;
2668	}
2669
2670	fc->pause_time = IGB_FC_PAUSE_TIME;
2671	fc->send_xon = TRUE;
2672
2673	/* Set Flow control, use the tunable location if sane */
2674	if ((igb_fc_setting >= 0) && (igb_fc_setting < 4))
2675		fc->requested_mode = adapter->fc_setting;
2676	else
2677		fc->requested_mode = e1000_fc_none;
2678
2679	fc->current_mode = fc->requested_mode;
2680
2681	/* Issue a global reset */
2682	e1000_reset_hw(hw);
2683	E1000_WRITE_REG(hw, E1000_WUC, 0);
2684
2685	if (e1000_init_hw(hw) < 0)
2686		device_printf(dev, "Hardware Initialization Failed\n");
2687
2688	/* Setup DMA Coalescing */
2689	if ((hw->mac.type == e1000_i350) &&
2690	    (adapter->dma_coalesce == TRUE)) {
2691		u32 reg;
2692
2693		hwm = (pba - 4) << 10;
2694		reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2695		    & E1000_DMACR_DMACTHR_MASK);
2696
2697		/* transition to L0x or L1 if available..*/
2698		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2699
2700		/* timer = +-1000 usec in 32usec intervals */
2701		reg |= (1000 >> 5);
2702		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2703
2704		/* No lower threshold */
2705		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2706
2707		/* set hwm to PBA -  2 * max frame size */
2708		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2709
2710		/* Set the interval before transition */
2711		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2712		reg |= 0x800000FF; /* 255 usec */
2713		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2714
2715		/* free space in tx packet buffer to wake from DMA coal */
2716		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2717		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2718
2719		/* make low power state decision controlled by DMA coal */
2720		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2721		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2722		    reg | E1000_PCIEMISC_LX_DECISION);
2723		device_printf(dev, "DMA Coalescing enabled\n");
2724	}
2725
2726	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2727	e1000_get_phy_info(hw);
2728	e1000_check_for_link(hw);
2729	return;
2730}
2731
2732/*********************************************************************
2733 *
2734 *  Setup networking device structure and register an interface.
2735 *
2736 **********************************************************************/
2737static int
2738igb_setup_interface(device_t dev, struct adapter *adapter)
2739{
2740	struct ifnet   *ifp;
2741
2742	INIT_DEBUGOUT("igb_setup_interface: begin");
2743
2744	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2745	if (ifp == NULL) {
2746		device_printf(dev, "can not allocate ifnet structure\n");
2747		return (-1);
2748	}
2749	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2750	ifp->if_mtu = ETHERMTU;
2751	ifp->if_init =  igb_init;
2752	ifp->if_softc = adapter;
2753	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2754	ifp->if_ioctl = igb_ioctl;
2755	ifp->if_start = igb_start;
2756#if __FreeBSD_version >= 800000
2757	ifp->if_transmit = igb_mq_start;
2758	ifp->if_qflush = igb_qflush;
2759#endif
2760	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2761	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2762	IFQ_SET_READY(&ifp->if_snd);
2763
2764	ether_ifattach(ifp, adapter->hw.mac.addr);
2765
2766	ifp->if_capabilities = ifp->if_capenable = 0;
2767
2768	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2769	ifp->if_capabilities |= IFCAP_TSO4;
2770	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2771	ifp->if_capenable = ifp->if_capabilities;
2772
2773	/* Don't enable LRO by default */
2774	ifp->if_capabilities |= IFCAP_LRO;
2775
2776#ifdef DEVICE_POLLING
2777	ifp->if_capabilities |= IFCAP_POLLING;
2778#endif
2779
2780	/*
2781	 * Tell the upper layer(s) we
2782	 * support full VLAN capability.
2783	 */
2784	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2785	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2786	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2787
2788	/*
2789	** Dont turn this on by default, if vlans are
2790	** created on another pseudo device (eg. lagg)
2791	** then vlan events are not passed thru, breaking
2792	** operation, but with HW FILTER off it works. If
2793	** using vlans directly on the em driver you can
2794	** enable this and get full hardware tag filtering.
2795	*/
2796	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2797
2798	/*
2799	 * Specify the media types supported by this adapter and register
2800	 * callbacks to update media and link information
2801	 */
2802	ifmedia_init(&adapter->media, IFM_IMASK,
2803	    igb_media_change, igb_media_status);
2804	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2805	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2806		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2807			    0, NULL);
2808		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2809	} else {
2810		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2811		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2812			    0, NULL);
2813		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2814			    0, NULL);
2815		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2816			    0, NULL);
2817		if (adapter->hw.phy.type != e1000_phy_ife) {
2818			ifmedia_add(&adapter->media,
2819				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2820			ifmedia_add(&adapter->media,
2821				IFM_ETHER | IFM_1000_T, 0, NULL);
2822		}
2823	}
2824	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2825	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2826	return (0);
2827}
2828
2829
2830/*
2831 * Manage DMA'able memory.
2832 */
2833static void
2834igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2835{
2836	if (error)
2837		return;
2838	*(bus_addr_t *) arg = segs[0].ds_addr;
2839}
2840
2841static int
2842igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2843        struct igb_dma_alloc *dma, int mapflags)
2844{
2845	int error;
2846
2847	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2848				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2849				BUS_SPACE_MAXADDR,	/* lowaddr */
2850				BUS_SPACE_MAXADDR,	/* highaddr */
2851				NULL, NULL,		/* filter, filterarg */
2852				size,			/* maxsize */
2853				1,			/* nsegments */
2854				size,			/* maxsegsize */
2855				0,			/* flags */
2856				NULL,			/* lockfunc */
2857				NULL,			/* lockarg */
2858				&dma->dma_tag);
2859	if (error) {
2860		device_printf(adapter->dev,
2861		    "%s: bus_dma_tag_create failed: %d\n",
2862		    __func__, error);
2863		goto fail_0;
2864	}
2865
2866	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2867	    BUS_DMA_NOWAIT, &dma->dma_map);
2868	if (error) {
2869		device_printf(adapter->dev,
2870		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2871		    __func__, (uintmax_t)size, error);
2872		goto fail_2;
2873	}
2874
2875	dma->dma_paddr = 0;
2876	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2877	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2878	if (error || dma->dma_paddr == 0) {
2879		device_printf(adapter->dev,
2880		    "%s: bus_dmamap_load failed: %d\n",
2881		    __func__, error);
2882		goto fail_3;
2883	}
2884
2885	return (0);
2886
2887fail_3:
2888	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2889fail_2:
2890	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2891	bus_dma_tag_destroy(dma->dma_tag);
2892fail_0:
2893	dma->dma_map = NULL;
2894	dma->dma_tag = NULL;
2895
2896	return (error);
2897}
2898
2899static void
2900igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2901{
2902	if (dma->dma_tag == NULL)
2903		return;
2904	if (dma->dma_map != NULL) {
2905		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2906		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2907		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2908		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2909		dma->dma_map = NULL;
2910	}
2911	bus_dma_tag_destroy(dma->dma_tag);
2912	dma->dma_tag = NULL;
2913}
2914
2915
2916/*********************************************************************
2917 *
2918 *  Allocate memory for the transmit and receive rings, and then
2919 *  the descriptors associated with each, called only once at attach.
2920 *
2921 **********************************************************************/
2922static int
2923igb_allocate_queues(struct adapter *adapter)
2924{
2925	device_t dev = adapter->dev;
2926	struct igb_queue	*que = NULL;
2927	struct tx_ring		*txr = NULL;
2928	struct rx_ring		*rxr = NULL;
2929	int rsize, tsize, error = E1000_SUCCESS;
2930	int txconf = 0, rxconf = 0;
2931
2932	/* First allocate the top level queue structs */
2933	if (!(adapter->queues =
2934	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2935	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2936		device_printf(dev, "Unable to allocate queue memory\n");
2937		error = ENOMEM;
2938		goto fail;
2939	}
2940
2941	/* Next allocate the TX ring struct memory */
2942	if (!(adapter->tx_rings =
2943	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2944	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2945		device_printf(dev, "Unable to allocate TX ring memory\n");
2946		error = ENOMEM;
2947		goto tx_fail;
2948	}
2949
2950	/* Now allocate the RX */
2951	if (!(adapter->rx_rings =
2952	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2953	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2954		device_printf(dev, "Unable to allocate RX ring memory\n");
2955		error = ENOMEM;
2956		goto rx_fail;
2957	}
2958
2959	tsize = roundup2(adapter->num_tx_desc *
2960	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2961	/*
2962	 * Now set up the TX queues, txconf is needed to handle the
2963	 * possibility that things fail midcourse and we need to
2964	 * undo memory gracefully
2965	 */
2966	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2967		/* Set up some basics */
2968		txr = &adapter->tx_rings[i];
2969		txr->adapter = adapter;
2970		txr->me = i;
2971
2972		/* Initialize the TX lock */
2973		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2974		    device_get_nameunit(dev), txr->me);
2975		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2976
2977		if (igb_dma_malloc(adapter, tsize,
2978			&txr->txdma, BUS_DMA_NOWAIT)) {
2979			device_printf(dev,
2980			    "Unable to allocate TX Descriptor memory\n");
2981			error = ENOMEM;
2982			goto err_tx_desc;
2983		}
2984		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2985		bzero((void *)txr->tx_base, tsize);
2986
2987        	/* Now allocate transmit buffers for the ring */
2988        	if (igb_allocate_transmit_buffers(txr)) {
2989			device_printf(dev,
2990			    "Critical Failure setting up transmit buffers\n");
2991			error = ENOMEM;
2992			goto err_tx_desc;
2993        	}
2994#if __FreeBSD_version >= 800000
2995		/* Allocate a buf ring */
2996		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2997		    M_WAITOK, &txr->tx_mtx);
2998#endif
2999	}
3000
3001	/*
3002	 * Next the RX queues...
3003	 */
3004	rsize = roundup2(adapter->num_rx_desc *
3005	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3006	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3007		rxr = &adapter->rx_rings[i];
3008		rxr->adapter = adapter;
3009		rxr->me = i;
3010
3011		/* Initialize the RX lock */
3012		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3013		    device_get_nameunit(dev), txr->me);
3014		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3015
3016		if (igb_dma_malloc(adapter, rsize,
3017			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3018			device_printf(dev,
3019			    "Unable to allocate RxDescriptor memory\n");
3020			error = ENOMEM;
3021			goto err_rx_desc;
3022		}
3023		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3024		bzero((void *)rxr->rx_base, rsize);
3025
3026        	/* Allocate receive buffers for the ring*/
3027		if (igb_allocate_receive_buffers(rxr)) {
3028			device_printf(dev,
3029			    "Critical Failure setting up receive buffers\n");
3030			error = ENOMEM;
3031			goto err_rx_desc;
3032		}
3033	}
3034
3035	/*
3036	** Finally set up the queue holding structs
3037	*/
3038	for (int i = 0; i < adapter->num_queues; i++) {
3039		que = &adapter->queues[i];
3040		que->adapter = adapter;
3041		que->txr = &adapter->tx_rings[i];
3042		que->rxr = &adapter->rx_rings[i];
3043	}
3044
3045	return (0);
3046
3047err_rx_desc:
3048	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3049		igb_dma_free(adapter, &rxr->rxdma);
3050err_tx_desc:
3051	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3052		igb_dma_free(adapter, &txr->txdma);
3053	free(adapter->rx_rings, M_DEVBUF);
3054rx_fail:
3055#if __FreeBSD_version >= 800000
3056	buf_ring_free(txr->br, M_DEVBUF);
3057#endif
3058	free(adapter->tx_rings, M_DEVBUF);
3059tx_fail:
3060	free(adapter->queues, M_DEVBUF);
3061fail:
3062	return (error);
3063}
3064
3065/*********************************************************************
3066 *
3067 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3068 *  the information needed to transmit a packet on the wire. This is
3069 *  called only once at attach, setup is done every reset.
3070 *
3071 **********************************************************************/
3072static int
3073igb_allocate_transmit_buffers(struct tx_ring *txr)
3074{
3075	struct adapter *adapter = txr->adapter;
3076	device_t dev = adapter->dev;
3077	struct igb_tx_buffer *txbuf;
3078	int error, i;
3079
3080	/*
3081	 * Setup DMA descriptor areas.
3082	 */
3083	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3084			       1, 0,			/* alignment, bounds */
3085			       BUS_SPACE_MAXADDR,	/* lowaddr */
3086			       BUS_SPACE_MAXADDR,	/* highaddr */
3087			       NULL, NULL,		/* filter, filterarg */
3088			       IGB_TSO_SIZE,		/* maxsize */
3089			       IGB_MAX_SCATTER,		/* nsegments */
3090			       PAGE_SIZE,		/* maxsegsize */
3091			       0,			/* flags */
3092			       NULL,			/* lockfunc */
3093			       NULL,			/* lockfuncarg */
3094			       &txr->txtag))) {
3095		device_printf(dev,"Unable to allocate TX DMA tag\n");
3096		goto fail;
3097	}
3098
3099	if (!(txr->tx_buffers =
3100	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3101	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3102		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3103		error = ENOMEM;
3104		goto fail;
3105	}
3106
3107        /* Create the descriptor buffer dma maps */
3108	txbuf = txr->tx_buffers;
3109	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3110		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3111		if (error != 0) {
3112			device_printf(dev, "Unable to create TX DMA map\n");
3113			goto fail;
3114		}
3115	}
3116
3117	return 0;
3118fail:
3119	/* We free all, it handles case where we are in the middle */
3120	igb_free_transmit_structures(adapter);
3121	return (error);
3122}
3123
3124/*********************************************************************
3125 *
3126 *  Initialize a transmit ring.
3127 *
3128 **********************************************************************/
3129static void
3130igb_setup_transmit_ring(struct tx_ring *txr)
3131{
3132	struct adapter *adapter = txr->adapter;
3133	struct igb_tx_buffer *txbuf;
3134	int i;
3135
3136	/* Clear the old descriptor contents */
3137	IGB_TX_LOCK(txr);
3138	bzero((void *)txr->tx_base,
3139	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3140	/* Reset indices */
3141	txr->next_avail_desc = 0;
3142	txr->next_to_clean = 0;
3143
3144	/* Free any existing tx buffers. */
3145        txbuf = txr->tx_buffers;
3146	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3147		if (txbuf->m_head != NULL) {
3148			bus_dmamap_sync(txr->txtag, txbuf->map,
3149			    BUS_DMASYNC_POSTWRITE);
3150			bus_dmamap_unload(txr->txtag, txbuf->map);
3151			m_freem(txbuf->m_head);
3152			txbuf->m_head = NULL;
3153		}
3154		/* clear the watch index */
3155		txbuf->next_eop = -1;
3156        }
3157
3158	/* Set number of descriptors available */
3159	txr->tx_avail = adapter->num_tx_desc;
3160
3161	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3162	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3163	IGB_TX_UNLOCK(txr);
3164}
3165
3166/*********************************************************************
3167 *
3168 *  Initialize all transmit rings.
3169 *
3170 **********************************************************************/
3171static void
3172igb_setup_transmit_structures(struct adapter *adapter)
3173{
3174	struct tx_ring *txr = adapter->tx_rings;
3175
3176	for (int i = 0; i < adapter->num_queues; i++, txr++)
3177		igb_setup_transmit_ring(txr);
3178
3179	return;
3180}
3181
3182/*********************************************************************
3183 *
3184 *  Enable transmit unit.
3185 *
3186 **********************************************************************/
3187static void
3188igb_initialize_transmit_units(struct adapter *adapter)
3189{
3190	struct tx_ring	*txr = adapter->tx_rings;
3191	struct e1000_hw *hw = &adapter->hw;
3192	u32		tctl, txdctl;
3193
3194	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3195	tctl = txdctl = 0;
3196
3197	/* Setup the Tx Descriptor Rings */
3198	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3199		u64 bus_addr = txr->txdma.dma_paddr;
3200
3201		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3202		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3203		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3204		    (uint32_t)(bus_addr >> 32));
3205		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3206		    (uint32_t)bus_addr);
3207
3208		/* Setup the HW Tx Head and Tail descriptor pointers */
3209		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3210		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3211
3212		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3213		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3214		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3215
3216		txr->queue_status = IGB_QUEUE_IDLE;
3217
3218		txdctl |= IGB_TX_PTHRESH;
3219		txdctl |= IGB_TX_HTHRESH << 8;
3220		txdctl |= IGB_TX_WTHRESH << 16;
3221		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3222		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3223	}
3224
3225	if (adapter->vf_ifp)
3226		return;
3227
3228	e1000_config_collision_dist(hw);
3229
3230	/* Program the Transmit Control Register */
3231	tctl = E1000_READ_REG(hw, E1000_TCTL);
3232	tctl &= ~E1000_TCTL_CT;
3233	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3234		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3235
3236	/* This write will effectively turn on the transmit unit. */
3237	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3238}
3239
3240/*********************************************************************
3241 *
3242 *  Free all transmit rings.
3243 *
3244 **********************************************************************/
3245static void
3246igb_free_transmit_structures(struct adapter *adapter)
3247{
3248	struct tx_ring *txr = adapter->tx_rings;
3249
3250	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3251		IGB_TX_LOCK(txr);
3252		igb_free_transmit_buffers(txr);
3253		igb_dma_free(adapter, &txr->txdma);
3254		IGB_TX_UNLOCK(txr);
3255		IGB_TX_LOCK_DESTROY(txr);
3256	}
3257	free(adapter->tx_rings, M_DEVBUF);
3258}
3259
3260/*********************************************************************
3261 *
3262 *  Free transmit ring related data structures.
3263 *
3264 **********************************************************************/
3265static void
3266igb_free_transmit_buffers(struct tx_ring *txr)
3267{
3268	struct adapter *adapter = txr->adapter;
3269	struct igb_tx_buffer *tx_buffer;
3270	int             i;
3271
3272	INIT_DEBUGOUT("free_transmit_ring: begin");
3273
3274	if (txr->tx_buffers == NULL)
3275		return;
3276
3277	tx_buffer = txr->tx_buffers;
3278	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3279		if (tx_buffer->m_head != NULL) {
3280			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3281			    BUS_DMASYNC_POSTWRITE);
3282			bus_dmamap_unload(txr->txtag,
3283			    tx_buffer->map);
3284			m_freem(tx_buffer->m_head);
3285			tx_buffer->m_head = NULL;
3286			if (tx_buffer->map != NULL) {
3287				bus_dmamap_destroy(txr->txtag,
3288				    tx_buffer->map);
3289				tx_buffer->map = NULL;
3290			}
3291		} else if (tx_buffer->map != NULL) {
3292			bus_dmamap_unload(txr->txtag,
3293			    tx_buffer->map);
3294			bus_dmamap_destroy(txr->txtag,
3295			    tx_buffer->map);
3296			tx_buffer->map = NULL;
3297		}
3298	}
3299#if __FreeBSD_version >= 800000
3300	if (txr->br != NULL)
3301		buf_ring_free(txr->br, M_DEVBUF);
3302#endif
3303	if (txr->tx_buffers != NULL) {
3304		free(txr->tx_buffers, M_DEVBUF);
3305		txr->tx_buffers = NULL;
3306	}
3307	if (txr->txtag != NULL) {
3308		bus_dma_tag_destroy(txr->txtag);
3309		txr->txtag = NULL;
3310	}
3311	return;
3312}
3313
3314/**********************************************************************
3315 *
3316 *  Setup work for hardware segmentation offload (TSO)
3317 *
3318 **********************************************************************/
3319static boolean_t
3320igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3321{
3322	struct adapter *adapter = txr->adapter;
3323	struct e1000_adv_tx_context_desc *TXD;
3324	struct igb_tx_buffer        *tx_buffer;
3325	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3326	u32 mss_l4len_idx = 0;
3327	u16 vtag = 0;
3328	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3329	struct ether_vlan_header *eh;
3330	struct ip *ip;
3331	struct tcphdr *th;
3332
3333
3334	/*
3335	 * Determine where frame payload starts.
3336	 * Jump over vlan headers if already present
3337	 */
3338	eh = mtod(mp, struct ether_vlan_header *);
3339	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3340		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3341	else
3342		ehdrlen = ETHER_HDR_LEN;
3343
3344	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3345	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3346		return FALSE;
3347
3348	/* Only supports IPV4 for now */
3349	ctxd = txr->next_avail_desc;
3350	tx_buffer = &txr->tx_buffers[ctxd];
3351	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3352
3353	ip = (struct ip *)(mp->m_data + ehdrlen);
3354	if (ip->ip_p != IPPROTO_TCP)
3355                return FALSE;   /* 0 */
3356	ip->ip_sum = 0;
3357	ip_hlen = ip->ip_hl << 2;
3358	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3359	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3360	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3361	tcp_hlen = th->th_off << 2;
3362	/*
3363	 * Calculate header length, this is used
3364	 * in the transmit desc in igb_xmit
3365	 */
3366	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3367
3368	/* VLAN MACLEN IPLEN */
3369	if (mp->m_flags & M_VLANTAG) {
3370		vtag = htole16(mp->m_pkthdr.ether_vtag);
3371		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3372	}
3373
3374	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3375	vlan_macip_lens |= ip_hlen;
3376	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3377
3378	/* ADV DTYPE TUCMD */
3379	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3380	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3381	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3382	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3383
3384	/* MSS L4LEN IDX */
3385	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3386	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3387	/* 82575 needs the queue index added */
3388	if (adapter->hw.mac.type == e1000_82575)
3389		mss_l4len_idx |= txr->me << 4;
3390	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3391
3392	TXD->seqnum_seed = htole32(0);
3393	tx_buffer->m_head = NULL;
3394	tx_buffer->next_eop = -1;
3395
3396	if (++ctxd == adapter->num_tx_desc)
3397		ctxd = 0;
3398
3399	txr->tx_avail--;
3400	txr->next_avail_desc = ctxd;
3401	return TRUE;
3402}
3403
3404
3405/*********************************************************************
3406 *
3407 *  Context Descriptor setup for VLAN or CSUM
3408 *
3409 **********************************************************************/
3410
3411static bool
3412igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3413{
3414	struct adapter *adapter = txr->adapter;
3415	struct e1000_adv_tx_context_desc *TXD;
3416	struct igb_tx_buffer        *tx_buffer;
3417	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3418	struct ether_vlan_header *eh;
3419	struct ip *ip = NULL;
3420	struct ip6_hdr *ip6;
3421	int  ehdrlen, ctxd, ip_hlen = 0;
3422	u16	etype, vtag = 0;
3423	u8	ipproto = 0;
3424	bool	offload = TRUE;
3425
3426	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3427		offload = FALSE;
3428
3429	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3430	ctxd = txr->next_avail_desc;
3431	tx_buffer = &txr->tx_buffers[ctxd];
3432	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3433
3434	/*
3435	** In advanced descriptors the vlan tag must
3436	** be placed into the context descriptor, thus
3437	** we need to be here just for that setup.
3438	*/
3439	if (mp->m_flags & M_VLANTAG) {
3440		vtag = htole16(mp->m_pkthdr.ether_vtag);
3441		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3442	} else if (offload == FALSE)
3443		return FALSE;
3444
3445	/*
3446	 * Determine where frame payload starts.
3447	 * Jump over vlan headers if already present,
3448	 * helpful for QinQ too.
3449	 */
3450	eh = mtod(mp, struct ether_vlan_header *);
3451	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3452		etype = ntohs(eh->evl_proto);
3453		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3454	} else {
3455		etype = ntohs(eh->evl_encap_proto);
3456		ehdrlen = ETHER_HDR_LEN;
3457	}
3458
3459	/* Set the ether header length */
3460	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3461
3462	switch (etype) {
3463		case ETHERTYPE_IP:
3464			ip = (struct ip *)(mp->m_data + ehdrlen);
3465			ip_hlen = ip->ip_hl << 2;
3466			if (mp->m_len < ehdrlen + ip_hlen) {
3467				offload = FALSE;
3468				break;
3469			}
3470			ipproto = ip->ip_p;
3471			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3472			break;
3473		case ETHERTYPE_IPV6:
3474			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3475			ip_hlen = sizeof(struct ip6_hdr);
3476			ipproto = ip6->ip6_nxt;
3477			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3478			break;
3479		default:
3480			offload = FALSE;
3481			break;
3482	}
3483
3484	vlan_macip_lens |= ip_hlen;
3485	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3486
3487	switch (ipproto) {
3488		case IPPROTO_TCP:
3489			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3490				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3491			break;
3492		case IPPROTO_UDP:
3493			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3494				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3495			break;
3496#if __FreeBSD_version >= 800000
3497		case IPPROTO_SCTP:
3498			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3499				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3500			break;
3501#endif
3502		default:
3503			offload = FALSE;
3504			break;
3505	}
3506
3507	/* 82575 needs the queue index added */
3508	if (adapter->hw.mac.type == e1000_82575)
3509		mss_l4len_idx = txr->me << 4;
3510
3511	/* Now copy bits into descriptor */
3512	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3513	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3514	TXD->seqnum_seed = htole32(0);
3515	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3516
3517	tx_buffer->m_head = NULL;
3518	tx_buffer->next_eop = -1;
3519
3520	/* We've consumed the first desc, adjust counters */
3521	if (++ctxd == adapter->num_tx_desc)
3522		ctxd = 0;
3523	txr->next_avail_desc = ctxd;
3524	--txr->tx_avail;
3525
3526        return (offload);
3527}
3528
3529
3530/**********************************************************************
3531 *
3532 *  Examine each tx_buffer in the used queue. If the hardware is done
3533 *  processing the packet then free associated resources. The
3534 *  tx_buffer is put back on the free queue.
3535 *
3536 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3537 **********************************************************************/
3538static bool
3539igb_txeof(struct tx_ring *txr)
3540{
3541	struct adapter	*adapter = txr->adapter;
3542        int first, last, done, processed;
3543        struct igb_tx_buffer *tx_buffer;
3544        struct e1000_tx_desc   *tx_desc, *eop_desc;
3545	struct ifnet   *ifp = adapter->ifp;
3546
3547	IGB_TX_LOCK_ASSERT(txr);
3548
3549        if (txr->tx_avail == adapter->num_tx_desc) {
3550		txr->queue_status = IGB_QUEUE_IDLE;
3551                return FALSE;
3552	}
3553
3554	processed = 0;
3555        first = txr->next_to_clean;
3556        tx_desc = &txr->tx_base[first];
3557        tx_buffer = &txr->tx_buffers[first];
3558	last = tx_buffer->next_eop;
3559        eop_desc = &txr->tx_base[last];
3560
3561	/*
3562	 * What this does is get the index of the
3563	 * first descriptor AFTER the EOP of the
3564	 * first packet, that way we can do the
3565	 * simple comparison on the inner while loop.
3566	 */
3567	if (++last == adapter->num_tx_desc)
3568 		last = 0;
3569	done = last;
3570
3571        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3572            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3573
3574        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3575		/* We clean the range of the packet */
3576		while (first != done) {
3577                	tx_desc->upper.data = 0;
3578                	tx_desc->lower.data = 0;
3579                	tx_desc->buffer_addr = 0;
3580                	++txr->tx_avail;
3581			++processed;
3582
3583			if (tx_buffer->m_head) {
3584				txr->bytes +=
3585				    tx_buffer->m_head->m_pkthdr.len;
3586				bus_dmamap_sync(txr->txtag,
3587				    tx_buffer->map,
3588				    BUS_DMASYNC_POSTWRITE);
3589				bus_dmamap_unload(txr->txtag,
3590				    tx_buffer->map);
3591
3592                        	m_freem(tx_buffer->m_head);
3593                        	tx_buffer->m_head = NULL;
3594                	}
3595			tx_buffer->next_eop = -1;
3596			txr->watchdog_time = ticks;
3597
3598	                if (++first == adapter->num_tx_desc)
3599				first = 0;
3600
3601	                tx_buffer = &txr->tx_buffers[first];
3602			tx_desc = &txr->tx_base[first];
3603		}
3604		++txr->packets;
3605		++ifp->if_opackets;
3606		/* See if we can continue to the next packet */
3607		last = tx_buffer->next_eop;
3608		if (last != -1) {
3609        		eop_desc = &txr->tx_base[last];
3610			/* Get new done point */
3611			if (++last == adapter->num_tx_desc) last = 0;
3612			done = last;
3613		} else
3614			break;
3615        }
3616        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3617            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3618
3619        txr->next_to_clean = first;
3620
3621	/*
3622	** Watchdog calculation, we know there's
3623	** work outstanding or the first return
3624	** would have been taken, so none processed
3625	** for too long indicates a hang.
3626	*/
3627	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3628		txr->queue_status = IGB_QUEUE_HUNG;
3629
3630        /*
3631         * If we have a minimum free, clear IFF_DRV_OACTIVE
3632         * to tell the stack that it is OK to send packets.
3633         */
3634        if (txr->tx_avail > IGB_TX_OP_THRESHOLD)
3635                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3636
3637	/* All clean, turn off the watchdog */
3638	if (txr->tx_avail == adapter->num_tx_desc) {
3639		txr->queue_status = IGB_QUEUE_IDLE;
3640		return (FALSE);
3641	}
3642
3643	return (TRUE);
3644}
3645
3646
3647/*********************************************************************
3648 *
3649 *  Refresh mbuf buffers for RX descriptor rings
3650 *   - now keeps its own state so discards due to resource
3651 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3652 *     it just returns, keeping its placeholder, thus it can simply
3653 *     be recalled to try again.
3654 *
3655 **********************************************************************/
3656static void
3657igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3658{
3659	struct adapter		*adapter = rxr->adapter;
3660	bus_dma_segment_t	hseg[1];
3661	bus_dma_segment_t	pseg[1];
3662	struct igb_rx_buf	*rxbuf;
3663	struct mbuf		*mh, *mp;
3664	int			i, j, nsegs, error;
3665	bool			refreshed = FALSE;
3666
3667	i = j = rxr->next_to_refresh;
3668	/*
3669	** Get one descriptor beyond
3670	** our work mark to control
3671	** the loop.
3672        */
3673	if (++j == adapter->num_rx_desc)
3674		j = 0;
3675
3676	while (j != limit) {
3677		rxbuf = &rxr->rx_buffers[i];
3678		/* No hdr mbuf used with header split off */
3679		if (rxr->hdr_split == FALSE)
3680			goto no_split;
3681		if (rxbuf->m_head == NULL) {
3682			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3683			if (mh == NULL)
3684				goto update;
3685		} else
3686			mh = rxbuf->m_head;
3687
3688		mh->m_pkthdr.len = mh->m_len = MHLEN;
3689		mh->m_len = MHLEN;
3690		mh->m_flags |= M_PKTHDR;
3691		/* Get the memory mapping */
3692		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3693		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3694		if (error != 0) {
3695			printf("Refresh mbufs: hdr dmamap load"
3696			    " failure - %d\n", error);
3697			m_free(mh);
3698			rxbuf->m_head = NULL;
3699			goto update;
3700		}
3701		rxbuf->m_head = mh;
3702		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3703		    BUS_DMASYNC_PREREAD);
3704		rxr->rx_base[i].read.hdr_addr =
3705		    htole64(hseg[0].ds_addr);
3706no_split:
3707		if (rxbuf->m_pack == NULL) {
3708			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3709			    M_PKTHDR, adapter->rx_mbuf_sz);
3710			if (mp == NULL)
3711				goto update;
3712		} else
3713			mp = rxbuf->m_pack;
3714
3715		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3716		/* Get the memory mapping */
3717		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3718		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3719		if (error != 0) {
3720			printf("Refresh mbufs: payload dmamap load"
3721			    " failure - %d\n", error);
3722			m_free(mp);
3723			rxbuf->m_pack = NULL;
3724			goto update;
3725		}
3726		rxbuf->m_pack = mp;
3727		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3728		    BUS_DMASYNC_PREREAD);
3729		rxr->rx_base[i].read.pkt_addr =
3730		    htole64(pseg[0].ds_addr);
3731		refreshed = TRUE; /* I feel wefreshed :) */
3732
3733		i = j; /* our next is precalculated */
3734		rxr->next_to_refresh = i;
3735		if (++j == adapter->num_rx_desc)
3736			j = 0;
3737	}
3738update:
3739	if (refreshed) /* update tail */
3740		E1000_WRITE_REG(&adapter->hw,
3741		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3742	return;
3743}
3744
3745
3746/*********************************************************************
3747 *
3748 *  Allocate memory for rx_buffer structures. Since we use one
3749 *  rx_buffer per received packet, the maximum number of rx_buffer's
3750 *  that we'll need is equal to the number of receive descriptors
3751 *  that we've allocated.
3752 *
3753 **********************************************************************/
3754static int
3755igb_allocate_receive_buffers(struct rx_ring *rxr)
3756{
3757	struct	adapter 	*adapter = rxr->adapter;
3758	device_t 		dev = adapter->dev;
3759	struct igb_rx_buf	*rxbuf;
3760	int             	i, bsize, error;
3761
3762	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3763	if (!(rxr->rx_buffers =
3764	    (struct igb_rx_buf *) malloc(bsize,
3765	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3766		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3767		error = ENOMEM;
3768		goto fail;
3769	}
3770
3771	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3772				   1, 0,		/* alignment, bounds */
3773				   BUS_SPACE_MAXADDR,	/* lowaddr */
3774				   BUS_SPACE_MAXADDR,	/* highaddr */
3775				   NULL, NULL,		/* filter, filterarg */
3776				   MSIZE,		/* maxsize */
3777				   1,			/* nsegments */
3778				   MSIZE,		/* maxsegsize */
3779				   0,			/* flags */
3780				   NULL,		/* lockfunc */
3781				   NULL,		/* lockfuncarg */
3782				   &rxr->htag))) {
3783		device_printf(dev, "Unable to create RX DMA tag\n");
3784		goto fail;
3785	}
3786
3787	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3788				   1, 0,		/* alignment, bounds */
3789				   BUS_SPACE_MAXADDR,	/* lowaddr */
3790				   BUS_SPACE_MAXADDR,	/* highaddr */
3791				   NULL, NULL,		/* filter, filterarg */
3792				   MJUM9BYTES,		/* maxsize */
3793				   1,			/* nsegments */
3794				   MJUM9BYTES,		/* maxsegsize */
3795				   0,			/* flags */
3796				   NULL,		/* lockfunc */
3797				   NULL,		/* lockfuncarg */
3798				   &rxr->ptag))) {
3799		device_printf(dev, "Unable to create RX payload DMA tag\n");
3800		goto fail;
3801	}
3802
3803	for (i = 0; i < adapter->num_rx_desc; i++) {
3804		rxbuf = &rxr->rx_buffers[i];
3805		error = bus_dmamap_create(rxr->htag,
3806		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3807		if (error) {
3808			device_printf(dev,
3809			    "Unable to create RX head DMA maps\n");
3810			goto fail;
3811		}
3812		error = bus_dmamap_create(rxr->ptag,
3813		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3814		if (error) {
3815			device_printf(dev,
3816			    "Unable to create RX packet DMA maps\n");
3817			goto fail;
3818		}
3819	}
3820
3821	return (0);
3822
3823fail:
3824	/* Frees all, but can handle partial completion */
3825	igb_free_receive_structures(adapter);
3826	return (error);
3827}
3828
3829
3830static void
3831igb_free_receive_ring(struct rx_ring *rxr)
3832{
3833	struct	adapter		*adapter;
3834	struct igb_rx_buf	*rxbuf;
3835	int i;
3836
3837	adapter = rxr->adapter;
3838	i = rxr->next_to_check;
3839	while (i != rxr->next_to_refresh) {
3840		rxbuf = &rxr->rx_buffers[i];
3841		if (rxbuf->m_head != NULL) {
3842			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3843			    BUS_DMASYNC_POSTREAD);
3844			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3845			rxbuf->m_head->m_flags |= M_PKTHDR;
3846			m_freem(rxbuf->m_head);
3847		}
3848		if (rxbuf->m_pack != NULL) {
3849			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3850			    BUS_DMASYNC_POSTREAD);
3851			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3852			rxbuf->m_pack->m_flags |= M_PKTHDR;
3853			m_freem(rxbuf->m_pack);
3854		}
3855		rxbuf->m_head = NULL;
3856		rxbuf->m_pack = NULL;
3857
3858		if (++i == adapter->num_rx_desc)
3859			i = 0;
3860	}
3861	rxr->next_to_check = 0;
3862	rxr->next_to_refresh = 0;
3863}
3864
3865
3866/*********************************************************************
3867 *
3868 *  Initialize a receive ring and its buffers.
3869 *
3870 **********************************************************************/
3871static int
3872igb_setup_receive_ring(struct rx_ring *rxr)
3873{
3874	struct	adapter		*adapter;
3875	struct  ifnet		*ifp;
3876	device_t		dev;
3877	struct igb_rx_buf	*rxbuf;
3878	bus_dma_segment_t	pseg[1], hseg[1];
3879	struct lro_ctrl		*lro = &rxr->lro;
3880	int			i, j, nsegs, error = 0;
3881
3882	adapter = rxr->adapter;
3883	dev = adapter->dev;
3884	ifp = adapter->ifp;
3885
3886	IGB_RX_LOCK(rxr);
3887	/* Invalidate all descriptors */
3888	for (i = 0; i < adapter->num_rx_desc; i++) {
3889		union e1000_adv_rx_desc* cur;
3890		cur = &rxr->rx_base[i];
3891		cur->wb.upper.status_error = 0;
3892        }
3893
3894	/* Configure for header split? */
3895	if (igb_header_split)
3896		rxr->hdr_split = TRUE;
3897
3898        /* Get our indices */
3899	i = j = rxr->next_to_refresh;
3900	if (++j == adapter->num_rx_desc)
3901		j = 0;
3902        /* Now replenish the ring mbufs */
3903	while (j != rxr->next_to_check) {
3904		struct mbuf	*mh, *mp;
3905
3906		rxbuf = &rxr->rx_buffers[i];
3907		if (rxr->hdr_split == FALSE)
3908			goto skip_head;
3909
3910		/* First the header */
3911		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3912		if (rxbuf->m_head == NULL) {
3913			error = ENOBUFS;
3914                        goto fail;
3915		}
3916		m_adj(rxbuf->m_head, ETHER_ALIGN);
3917		mh = rxbuf->m_head;
3918		mh->m_len = mh->m_pkthdr.len = MHLEN;
3919		mh->m_flags |= M_PKTHDR;
3920		/* Get the memory mapping */
3921		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3922		    rxbuf->hmap, rxbuf->m_head, hseg,
3923		    &nsegs, BUS_DMA_NOWAIT);
3924		if (error != 0) /* Nothing elegant to do here */
3925                        goto fail;
3926		bus_dmamap_sync(rxr->htag,
3927		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3928		/* Update descriptor */
3929		rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr);
3930
3931skip_head:
3932		/* Now the payload cluster */
3933		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3934		    M_PKTHDR, adapter->rx_mbuf_sz);
3935		if (rxbuf->m_pack == NULL) {
3936			error = ENOBUFS;
3937                        goto fail;
3938		}
3939		mp = rxbuf->m_pack;
3940		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3941		/* Get the memory mapping */
3942		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3943		    rxbuf->pmap, mp, pseg,
3944		    &nsegs, BUS_DMA_NOWAIT);
3945		if (error != 0)
3946                        goto fail;
3947		bus_dmamap_sync(rxr->ptag,
3948		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
3949		/* Update descriptor */
3950		rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr);
3951
3952		/* Setup for next loop */
3953		i = j;
3954		if (++j == adapter->num_rx_desc)
3955			j = 0;
3956        }
3957
3958	/* Setup our descriptor indices */
3959	rxr->next_to_refresh = i;
3960	rxr->lro_enabled = FALSE;
3961	rxr->rx_split_packets = 0;
3962	rxr->rx_bytes = 0;
3963
3964	rxr->fmp = NULL;
3965	rxr->lmp = NULL;
3966	rxr->discard = FALSE;
3967
3968	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3969	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3970
3971	/*
3972	** Now set up the LRO interface, we
3973	** also only do head split when LRO
3974	** is enabled, since so often they
3975	** are undesireable in similar setups.
3976	*/
3977	if (ifp->if_capenable & IFCAP_LRO) {
3978		error = tcp_lro_init(lro);
3979		if (error) {
3980			device_printf(dev, "LRO Initialization failed!\n");
3981			goto fail;
3982		}
3983		INIT_DEBUGOUT("RX LRO Initialized\n");
3984		rxr->lro_enabled = TRUE;
3985		lro->ifp = adapter->ifp;
3986	}
3987
3988	IGB_RX_UNLOCK(rxr);
3989	return (0);
3990
3991fail:
3992	rxr->next_to_refresh = i;
3993	igb_free_receive_ring(rxr);
3994	IGB_RX_UNLOCK(rxr);
3995	return (error);
3996}
3997
3998/*********************************************************************
3999 *
4000 *  Initialize all receive rings.
4001 *
4002 **********************************************************************/
4003static int
4004igb_setup_receive_structures(struct adapter *adapter)
4005{
4006	struct rx_ring *rxr = adapter->rx_rings;
4007	int i;
4008
4009	for (i = 0; i < adapter->num_queues; i++, rxr++)
4010		if (igb_setup_receive_ring(rxr))
4011			goto fail;
4012
4013	return (0);
4014fail:
4015	/*
4016	 * Free RX buffers allocated so far, we will only handle
4017	 * the rings that completed, the failing case will have
4018	 * cleaned up for itself. 'i' is the endpoint.
4019	 */
4020	for (int j = 0; j > i; ++j) {
4021		rxr = &adapter->rx_rings[i];
4022		IGB_RX_LOCK(rxr);
4023		igb_free_receive_ring(rxr);
4024		IGB_RX_UNLOCK(rxr);
4025	}
4026
4027	return (ENOBUFS);
4028}
4029
4030/*********************************************************************
4031 *
4032 *  Enable receive unit.
4033 *
4034 **********************************************************************/
4035static void
4036igb_initialize_receive_units(struct adapter *adapter)
4037{
4038	struct rx_ring	*rxr = adapter->rx_rings;
4039	struct ifnet	*ifp = adapter->ifp;
4040	struct e1000_hw *hw = &adapter->hw;
4041	u32		rctl, rxcsum, psize, srrctl = 0;
4042
4043	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4044
4045	/*
4046	 * Make sure receives are disabled while setting
4047	 * up the descriptor ring
4048	 */
4049	rctl = E1000_READ_REG(hw, E1000_RCTL);
4050	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4051
4052	/*
4053	** Set up for header split
4054	*/
4055	if (igb_header_split) {
4056		/* Use a standard mbuf for the header */
4057		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4058		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4059	} else
4060		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4061
4062	/*
4063	** Set up for jumbo frames
4064	*/
4065	if (ifp->if_mtu > ETHERMTU) {
4066		rctl |= E1000_RCTL_LPE;
4067		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4068			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4069			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4070		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4071			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4072			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4073		}
4074		/* Set maximum packet len */
4075		psize = adapter->max_frame_size;
4076		/* are we on a vlan? */
4077		if (adapter->ifp->if_vlantrunk != NULL)
4078			psize += VLAN_TAG_SIZE;
4079		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4080	} else {
4081		rctl &= ~E1000_RCTL_LPE;
4082		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4083		rctl |= E1000_RCTL_SZ_2048;
4084	}
4085
4086	/* Setup the Base and Length of the Rx Descriptor Rings */
4087	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4088		u64 bus_addr = rxr->rxdma.dma_paddr;
4089		u32 rxdctl;
4090
4091		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4092		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4093		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4094		    (uint32_t)(bus_addr >> 32));
4095		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4096		    (uint32_t)bus_addr);
4097		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4098		/* Enable this Queue */
4099		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4100		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4101		rxdctl &= 0xFFF00000;
4102		rxdctl |= IGB_RX_PTHRESH;
4103		rxdctl |= IGB_RX_HTHRESH << 8;
4104		rxdctl |= IGB_RX_WTHRESH << 16;
4105		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4106	}
4107
4108	/*
4109	** Setup for RX MultiQueue
4110	*/
4111	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4112	if (adapter->num_queues >1) {
4113		u32 random[10], mrqc, shift = 0;
4114		union igb_reta {
4115			u32 dword;
4116			u8  bytes[4];
4117		} reta;
4118
4119		arc4rand(&random, sizeof(random), 0);
4120		if (adapter->hw.mac.type == e1000_82575)
4121			shift = 6;
4122		/* Warning FM follows */
4123		for (int i = 0; i < 128; i++) {
4124			reta.bytes[i & 3] =
4125			    (i % adapter->num_queues) << shift;
4126			if ((i & 3) == 3)
4127				E1000_WRITE_REG(hw,
4128				    E1000_RETA(i >> 2), reta.dword);
4129		}
4130		/* Now fill in hash table */
4131		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4132		for (int i = 0; i < 10; i++)
4133			E1000_WRITE_REG_ARRAY(hw,
4134			    E1000_RSSRK(0), i, random[i]);
4135
4136		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4137		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4138		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4139		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4140		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4141		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4142		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4143		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4144
4145		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4146
4147		/*
4148		** NOTE: Receive Full-Packet Checksum Offload
4149		** is mutually exclusive with Multiqueue. However
4150		** this is not the same as TCP/IP checksums which
4151		** still work.
4152		*/
4153		rxcsum |= E1000_RXCSUM_PCSD;
4154#if __FreeBSD_version >= 800000
4155		/* For SCTP Offload */
4156		if ((hw->mac.type == e1000_82576)
4157		    && (ifp->if_capenable & IFCAP_RXCSUM))
4158			rxcsum |= E1000_RXCSUM_CRCOFL;
4159#endif
4160	} else {
4161		/* Non RSS setup */
4162		if (ifp->if_capenable & IFCAP_RXCSUM) {
4163			rxcsum |= E1000_RXCSUM_IPPCSE;
4164#if __FreeBSD_version >= 800000
4165			if (adapter->hw.mac.type == e1000_82576)
4166				rxcsum |= E1000_RXCSUM_CRCOFL;
4167#endif
4168		} else
4169			rxcsum &= ~E1000_RXCSUM_TUOFL;
4170	}
4171	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4172
4173	/* Setup the Receive Control Register */
4174	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4175	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4176		   E1000_RCTL_RDMTS_HALF |
4177		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4178	/* Strip CRC bytes. */
4179	rctl |= E1000_RCTL_SECRC;
4180	/* Make sure VLAN Filters are off */
4181	rctl &= ~E1000_RCTL_VFE;
4182	/* Don't store bad packets */
4183	rctl &= ~E1000_RCTL_SBP;
4184
4185	/* Enable Receives */
4186	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4187
4188	/*
4189	 * Setup the HW Rx Head and Tail Descriptor Pointers
4190	 *   - needs to be after enable
4191	 */
4192	for (int i = 0; i < adapter->num_queues; i++) {
4193		rxr = &adapter->rx_rings[i];
4194		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4195		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4196	}
4197	return;
4198}
4199
4200/*********************************************************************
4201 *
4202 *  Free receive rings.
4203 *
4204 **********************************************************************/
4205static void
4206igb_free_receive_structures(struct adapter *adapter)
4207{
4208	struct rx_ring *rxr = adapter->rx_rings;
4209
4210	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4211		struct lro_ctrl	*lro = &rxr->lro;
4212		igb_free_receive_buffers(rxr);
4213		tcp_lro_free(lro);
4214		igb_dma_free(adapter, &rxr->rxdma);
4215	}
4216
4217	free(adapter->rx_rings, M_DEVBUF);
4218}
4219
4220/*********************************************************************
4221 *
4222 *  Free receive ring data structures.
4223 *
4224 **********************************************************************/
4225static void
4226igb_free_receive_buffers(struct rx_ring *rxr)
4227{
4228	struct adapter		*adapter = rxr->adapter;
4229	struct igb_rx_buf	*rxbuf;
4230	int i;
4231
4232	INIT_DEBUGOUT("free_receive_structures: begin");
4233
4234	/* Cleanup any existing buffers */
4235	if (rxr->rx_buffers != NULL) {
4236		for (i = 0; i < adapter->num_rx_desc; i++) {
4237			rxbuf = &rxr->rx_buffers[i];
4238			if (rxbuf->m_head != NULL) {
4239				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4240				    BUS_DMASYNC_POSTREAD);
4241				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4242				rxbuf->m_head->m_flags |= M_PKTHDR;
4243				m_freem(rxbuf->m_head);
4244			}
4245			if (rxbuf->m_pack != NULL) {
4246				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4247				    BUS_DMASYNC_POSTREAD);
4248				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4249				rxbuf->m_pack->m_flags |= M_PKTHDR;
4250				m_freem(rxbuf->m_pack);
4251			}
4252			rxbuf->m_head = NULL;
4253			rxbuf->m_pack = NULL;
4254			if (rxbuf->hmap != NULL) {
4255				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4256				rxbuf->hmap = NULL;
4257			}
4258			if (rxbuf->pmap != NULL) {
4259				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4260				rxbuf->pmap = NULL;
4261			}
4262		}
4263		if (rxr->rx_buffers != NULL) {
4264			free(rxr->rx_buffers, M_DEVBUF);
4265			rxr->rx_buffers = NULL;
4266		}
4267	}
4268
4269	if (rxr->htag != NULL) {
4270		bus_dma_tag_destroy(rxr->htag);
4271		rxr->htag = NULL;
4272	}
4273	if (rxr->ptag != NULL) {
4274		bus_dma_tag_destroy(rxr->ptag);
4275		rxr->ptag = NULL;
4276	}
4277}
4278
4279static __inline void
4280igb_rx_discard(struct rx_ring *rxr, int i)
4281{
4282	struct igb_rx_buf	*rbuf;
4283
4284	rbuf = &rxr->rx_buffers[i];
4285
4286	/* Partially received? Free the chain */
4287	if (rxr->fmp != NULL) {
4288		rxr->fmp->m_flags |= M_PKTHDR;
4289		m_freem(rxr->fmp);
4290		rxr->fmp = NULL;
4291		rxr->lmp = NULL;
4292	}
4293
4294	/*
4295	** With advanced descriptors the writeback
4296	** clobbers the buffer addrs, so its easier
4297	** to just free the existing mbufs and take
4298	** the normal refresh path to get new buffers
4299	** and mapping.
4300	*/
4301	if (rbuf->m_head) {
4302		m_free(rbuf->m_head);
4303		rbuf->m_head = NULL;
4304	}
4305
4306	if (rbuf->m_pack) {
4307		m_free(rbuf->m_pack);
4308		rbuf->m_pack = NULL;
4309	}
4310
4311	return;
4312}
4313
4314static __inline void
4315igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4316{
4317
4318	/*
4319	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4320	 * should be computed by hardware. Also it should not have VLAN tag in
4321	 * ethernet header.
4322	 */
4323	if (rxr->lro_enabled &&
4324	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4325	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4326	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4327	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4328	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4329	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4330		/*
4331		 * Send to the stack if:
4332		 **  - LRO not enabled, or
4333		 **  - no LRO resources, or
4334		 **  - lro enqueue fails
4335		 */
4336		if (rxr->lro.lro_cnt != 0)
4337			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4338				return;
4339	}
4340	IGB_RX_UNLOCK(rxr);
4341	(*ifp->if_input)(ifp, m);
4342	IGB_RX_LOCK(rxr);
4343}
4344
4345/*********************************************************************
4346 *
4347 *  This routine executes in interrupt context. It replenishes
4348 *  the mbufs in the descriptor and sends data which has been
4349 *  dma'ed into host memory to upper layer.
4350 *
4351 *  We loop at most count times if count is > 0, or until done if
4352 *  count < 0.
4353 *
4354 *  Return TRUE if more to clean, FALSE otherwise
4355 *********************************************************************/
4356static bool
4357igb_rxeof(struct igb_queue *que, int count, int *done)
4358{
4359	struct adapter		*adapter = que->adapter;
4360	struct rx_ring		*rxr = que->rxr;
4361	struct ifnet		*ifp = adapter->ifp;
4362	struct lro_ctrl		*lro = &rxr->lro;
4363	struct lro_entry	*queued;
4364	int			i, processed = 0, rxdone = 0;
4365	u32			ptype, staterr = 0;
4366	union e1000_adv_rx_desc	*cur;
4367
4368	IGB_RX_LOCK(rxr);
4369	/* Sync the ring. */
4370	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4371	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4372
4373	/* Main clean loop */
4374	for (i = rxr->next_to_check; count != 0;) {
4375		struct mbuf		*sendmp, *mh, *mp;
4376		struct igb_rx_buf	*rxbuf;
4377		u16			hlen, plen, hdr, vtag;
4378		bool			eop = FALSE;
4379
4380		cur = &rxr->rx_base[i];
4381		staterr = le32toh(cur->wb.upper.status_error);
4382		if ((staterr & E1000_RXD_STAT_DD) == 0)
4383			break;
4384		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4385			break;
4386		count--;
4387		sendmp = mh = mp = NULL;
4388		cur->wb.upper.status_error = 0;
4389		rxbuf = &rxr->rx_buffers[i];
4390		plen = le16toh(cur->wb.upper.length);
4391		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4392		if ((adapter->hw.mac.type == e1000_i350) &&
4393		    (staterr & E1000_RXDEXT_STATERR_LB))
4394			vtag = be16toh(cur->wb.upper.vlan);
4395		else
4396			vtag = le16toh(cur->wb.upper.vlan);
4397		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4398		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4399
4400		/* Make sure all segments of a bad packet are discarded */
4401		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4402		    (rxr->discard)) {
4403			ifp->if_ierrors++;
4404			++rxr->rx_discarded;
4405			if (!eop) /* Catch subsequent segs */
4406				rxr->discard = TRUE;
4407			else
4408				rxr->discard = FALSE;
4409			igb_rx_discard(rxr, i);
4410			goto next_desc;
4411		}
4412
4413		/*
4414		** The way the hardware is configured to
4415		** split, it will ONLY use the header buffer
4416		** when header split is enabled, otherwise we
4417		** get normal behavior, ie, both header and
4418		** payload are DMA'd into the payload buffer.
4419		**
4420		** The fmp test is to catch the case where a
4421		** packet spans multiple descriptors, in that
4422		** case only the first header is valid.
4423		*/
4424		if (rxr->hdr_split && rxr->fmp == NULL) {
4425			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4426			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4427			if (hlen > IGB_HDR_BUF)
4428				hlen = IGB_HDR_BUF;
4429			mh = rxr->rx_buffers[i].m_head;
4430			mh->m_len = hlen;
4431			/* clear buf pointer for refresh */
4432			rxbuf->m_head = NULL;
4433			/*
4434			** Get the payload length, this
4435			** could be zero if its a small
4436			** packet.
4437			*/
4438			if (plen > 0) {
4439				mp = rxr->rx_buffers[i].m_pack;
4440				mp->m_len = plen;
4441				mh->m_next = mp;
4442				/* clear buf pointer */
4443				rxbuf->m_pack = NULL;
4444				rxr->rx_split_packets++;
4445			}
4446		} else {
4447			/*
4448			** Either no header split, or a
4449			** secondary piece of a fragmented
4450			** split packet.
4451			*/
4452			mh = rxr->rx_buffers[i].m_pack;
4453			mh->m_len = plen;
4454			/* clear buf info for refresh */
4455			rxbuf->m_pack = NULL;
4456		}
4457
4458		++processed; /* So we know when to refresh */
4459
4460		/* Initial frame - setup */
4461		if (rxr->fmp == NULL) {
4462			mh->m_pkthdr.len = mh->m_len;
4463			/* Save the head of the chain */
4464			rxr->fmp = mh;
4465			rxr->lmp = mh;
4466			if (mp != NULL) {
4467				/* Add payload if split */
4468				mh->m_pkthdr.len += mp->m_len;
4469				rxr->lmp = mh->m_next;
4470			}
4471		} else {
4472			/* Chain mbuf's together */
4473			rxr->lmp->m_next = mh;
4474			rxr->lmp = rxr->lmp->m_next;
4475			rxr->fmp->m_pkthdr.len += mh->m_len;
4476		}
4477
4478		if (eop) {
4479			rxr->fmp->m_pkthdr.rcvif = ifp;
4480			ifp->if_ipackets++;
4481			rxr->rx_packets++;
4482			/* capture data for AIM */
4483			rxr->packets++;
4484			rxr->bytes += rxr->fmp->m_pkthdr.len;
4485			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4486
4487			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4488				igb_rx_checksum(staterr, rxr->fmp, ptype);
4489
4490			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4491			    (staterr & E1000_RXD_STAT_VP) != 0) {
4492				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4493				rxr->fmp->m_flags |= M_VLANTAG;
4494			}
4495#if __FreeBSD_version >= 800000
4496			rxr->fmp->m_pkthdr.flowid = que->msix;
4497			rxr->fmp->m_flags |= M_FLOWID;
4498#endif
4499			sendmp = rxr->fmp;
4500			/* Make sure to set M_PKTHDR. */
4501			sendmp->m_flags |= M_PKTHDR;
4502			rxr->fmp = NULL;
4503			rxr->lmp = NULL;
4504		}
4505
4506next_desc:
4507		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4508		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4509
4510		/* Advance our pointers to the next descriptor. */
4511		if (++i == adapter->num_rx_desc)
4512			i = 0;
4513		/*
4514		** Send to the stack or LRO
4515		*/
4516		if (sendmp != NULL) {
4517			rxr->next_to_check = i;
4518			igb_rx_input(rxr, ifp, sendmp, ptype);
4519			i = rxr->next_to_check;
4520			rxdone++;
4521		}
4522
4523		/* Every 8 descriptors we go to refresh mbufs */
4524		if (processed == 8) {
4525                        igb_refresh_mbufs(rxr, i);
4526                        processed = 0;
4527		}
4528	}
4529
4530	/* Catch any remainders */
4531	if (processed != 0 || i == rxr->next_to_refresh)
4532		igb_refresh_mbufs(rxr, i);
4533
4534	rxr->next_to_check = i;
4535
4536	/*
4537	 * Flush any outstanding LRO work
4538	 */
4539	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4540		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4541		tcp_lro_flush(lro, queued);
4542	}
4543
4544	if (done != NULL)
4545		*done = rxdone;
4546
4547	IGB_RX_UNLOCK(rxr);
4548	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4549}
4550
4551/*********************************************************************
4552 *
4553 *  Verify that the hardware indicated that the checksum is valid.
4554 *  Inform the stack about the status of checksum so that stack
4555 *  doesn't spend time verifying the checksum.
4556 *
4557 *********************************************************************/
4558static void
4559igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4560{
4561	u16 status = (u16)staterr;
4562	u8  errors = (u8) (staterr >> 24);
4563	int sctp;
4564
4565	/* Ignore Checksum bit is set */
4566	if (status & E1000_RXD_STAT_IXSM) {
4567		mp->m_pkthdr.csum_flags = 0;
4568		return;
4569	}
4570
4571	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4572	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4573		sctp = 1;
4574	else
4575		sctp = 0;
4576	if (status & E1000_RXD_STAT_IPCS) {
4577		/* Did it pass? */
4578		if (!(errors & E1000_RXD_ERR_IPE)) {
4579			/* IP Checksum Good */
4580			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4581			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4582		} else
4583			mp->m_pkthdr.csum_flags = 0;
4584	}
4585
4586	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4587		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4588#if __FreeBSD_version >= 800000
4589		if (sctp) /* reassign */
4590			type = CSUM_SCTP_VALID;
4591#endif
4592		/* Did it pass? */
4593		if (!(errors & E1000_RXD_ERR_TCPE)) {
4594			mp->m_pkthdr.csum_flags |= type;
4595			if (sctp == 0)
4596				mp->m_pkthdr.csum_data = htons(0xffff);
4597		}
4598	}
4599	return;
4600}
4601
4602/*
4603 * This routine is run via an vlan
4604 * config EVENT
4605 */
4606static void
4607igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4608{
4609	struct adapter	*adapter = ifp->if_softc;
4610	u32		index, bit;
4611
4612	if (ifp->if_softc !=  arg)   /* Not our event */
4613		return;
4614
4615	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4616                return;
4617
4618	IGB_CORE_LOCK(adapter);
4619	index = (vtag >> 5) & 0x7F;
4620	bit = vtag & 0x1F;
4621	adapter->shadow_vfta[index] |= (1 << bit);
4622	++adapter->num_vlans;
4623	/* Change hw filter setting */
4624	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4625		igb_setup_vlan_hw_support(adapter);
4626	IGB_CORE_UNLOCK(adapter);
4627}
4628
4629/*
4630 * This routine is run via an vlan
4631 * unconfig EVENT
4632 */
4633static void
4634igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4635{
4636	struct adapter	*adapter = ifp->if_softc;
4637	u32		index, bit;
4638
4639	if (ifp->if_softc !=  arg)
4640		return;
4641
4642	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4643                return;
4644
4645	IGB_CORE_LOCK(adapter);
4646	index = (vtag >> 5) & 0x7F;
4647	bit = vtag & 0x1F;
4648	adapter->shadow_vfta[index] &= ~(1 << bit);
4649	--adapter->num_vlans;
4650	/* Change hw filter setting */
4651	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4652		igb_setup_vlan_hw_support(adapter);
4653	IGB_CORE_UNLOCK(adapter);
4654}
4655
4656static void
4657igb_setup_vlan_hw_support(struct adapter *adapter)
4658{
4659	struct e1000_hw *hw = &adapter->hw;
4660	struct ifnet	*ifp = adapter->ifp;
4661	u32             reg;
4662
4663	if (adapter->vf_ifp) {
4664		e1000_rlpml_set_vf(hw,
4665		    adapter->max_frame_size + VLAN_TAG_SIZE);
4666		return;
4667	}
4668
4669	reg = E1000_READ_REG(hw, E1000_CTRL);
4670	reg |= E1000_CTRL_VME;
4671	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4672
4673	/* Enable the Filter Table */
4674	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4675		reg = E1000_READ_REG(hw, E1000_RCTL);
4676		reg &= ~E1000_RCTL_CFIEN;
4677		reg |= E1000_RCTL_VFE;
4678		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4679	}
4680
4681	/* Update the frame size */
4682	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4683	    adapter->max_frame_size + VLAN_TAG_SIZE);
4684
4685	/* Don't bother with table if no vlans */
4686	if ((adapter->num_vlans == 0) ||
4687	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4688                return;
4689	/*
4690	** A soft reset zero's out the VFTA, so
4691	** we need to repopulate it now.
4692	*/
4693	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4694                if (adapter->shadow_vfta[i] != 0) {
4695			if (adapter->vf_ifp)
4696				e1000_vfta_set_vf(hw,
4697				    adapter->shadow_vfta[i], TRUE);
4698			else
4699				E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4700                           	 i, adapter->shadow_vfta[i]);
4701		}
4702}
4703
4704static void
4705igb_enable_intr(struct adapter *adapter)
4706{
4707	/* With RSS set up what to auto clear */
4708	if (adapter->msix_mem) {
4709		u32 mask = (adapter->que_mask | adapter->link_mask);
4710		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4711		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4712		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4713		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4714		    E1000_IMS_LSC);
4715	} else {
4716		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4717		    IMS_ENABLE_MASK);
4718	}
4719	E1000_WRITE_FLUSH(&adapter->hw);
4720
4721	return;
4722}
4723
4724static void
4725igb_disable_intr(struct adapter *adapter)
4726{
4727	if (adapter->msix_mem) {
4728		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4729		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4730	}
4731	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4732	E1000_WRITE_FLUSH(&adapter->hw);
4733	return;
4734}
4735
4736/*
4737 * Bit of a misnomer, what this really means is
4738 * to enable OS management of the system... aka
4739 * to disable special hardware management features
4740 */
4741static void
4742igb_init_manageability(struct adapter *adapter)
4743{
4744	if (adapter->has_manage) {
4745		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4746		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4747
4748		/* disable hardware interception of ARP */
4749		manc &= ~(E1000_MANC_ARP_EN);
4750
4751                /* enable receiving management packets to the host */
4752		manc |= E1000_MANC_EN_MNG2HOST;
4753		manc2h |= 1 << 5;  /* Mng Port 623 */
4754		manc2h |= 1 << 6;  /* Mng Port 664 */
4755		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4756		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4757	}
4758}
4759
4760/*
4761 * Give control back to hardware management
4762 * controller if there is one.
4763 */
4764static void
4765igb_release_manageability(struct adapter *adapter)
4766{
4767	if (adapter->has_manage) {
4768		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4769
4770		/* re-enable hardware interception of ARP */
4771		manc |= E1000_MANC_ARP_EN;
4772		manc &= ~E1000_MANC_EN_MNG2HOST;
4773
4774		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4775	}
4776}
4777
4778/*
4779 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4780 * For ASF and Pass Through versions of f/w this means that
4781 * the driver is loaded.
4782 *
4783 */
4784static void
4785igb_get_hw_control(struct adapter *adapter)
4786{
4787	u32 ctrl_ext;
4788
4789	if (adapter->vf_ifp)
4790		return;
4791
4792	/* Let firmware know the driver has taken over */
4793	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4794	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4795	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4796}
4797
4798/*
4799 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4800 * For ASF and Pass Through versions of f/w this means that the
4801 * driver is no longer loaded.
4802 *
4803 */
4804static void
4805igb_release_hw_control(struct adapter *adapter)
4806{
4807	u32 ctrl_ext;
4808
4809	if (adapter->vf_ifp)
4810		return;
4811
4812	/* Let firmware taken over control of h/w */
4813	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4814	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4815	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4816}
4817
4818static int
4819igb_is_valid_ether_addr(uint8_t *addr)
4820{
4821	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4822
4823	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4824		return (FALSE);
4825	}
4826
4827	return (TRUE);
4828}
4829
4830
4831/*
4832 * Enable PCI Wake On Lan capability
4833 */
4834static void
4835igb_enable_wakeup(device_t dev)
4836{
4837	u16     cap, status;
4838	u8      id;
4839
4840	/* First find the capabilities pointer*/
4841	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4842	/* Read the PM Capabilities */
4843	id = pci_read_config(dev, cap, 1);
4844	if (id != PCIY_PMG)     /* Something wrong */
4845		return;
4846	/* OK, we have the power capabilities, so
4847	   now get the status register */
4848	cap += PCIR_POWER_STATUS;
4849	status = pci_read_config(dev, cap, 2);
4850	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4851	pci_write_config(dev, cap, status, 2);
4852	return;
4853}
4854
4855static void
4856igb_led_func(void *arg, int onoff)
4857{
4858	struct adapter	*adapter = arg;
4859
4860	IGB_CORE_LOCK(adapter);
4861	if (onoff) {
4862		e1000_setup_led(&adapter->hw);
4863		e1000_led_on(&adapter->hw);
4864	} else {
4865		e1000_led_off(&adapter->hw);
4866		e1000_cleanup_led(&adapter->hw);
4867	}
4868	IGB_CORE_UNLOCK(adapter);
4869}
4870
4871/**********************************************************************
4872 *
4873 *  Update the board statistics counters.
4874 *
4875 **********************************************************************/
4876static void
4877igb_update_stats_counters(struct adapter *adapter)
4878{
4879	struct ifnet		*ifp;
4880        struct e1000_hw		*hw = &adapter->hw;
4881	struct e1000_hw_stats	*stats;
4882
4883	/*
4884	** The virtual function adapter has only a
4885	** small controlled set of stats, do only
4886	** those and return.
4887	*/
4888	if (adapter->vf_ifp) {
4889		igb_update_vf_stats_counters(adapter);
4890		return;
4891	}
4892
4893	stats = (struct e1000_hw_stats	*)adapter->stats;
4894
4895	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4896	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4897		stats->symerrs +=
4898		    E1000_READ_REG(hw,E1000_SYMERRS);
4899		stats->sec += E1000_READ_REG(hw, E1000_SEC);
4900	}
4901
4902	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4903	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4904	stats->scc += E1000_READ_REG(hw, E1000_SCC);
4905	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4906
4907	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4908	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4909	stats->colc += E1000_READ_REG(hw, E1000_COLC);
4910	stats->dc += E1000_READ_REG(hw, E1000_DC);
4911	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4912	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4913	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4914	/*
4915	** For watchdog management we need to know if we have been
4916	** paused during the last interval, so capture that here.
4917	*/
4918        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4919        stats->xoffrxc += adapter->pause_frames;
4920	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
4921	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
4922	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
4923	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
4924	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
4925	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
4926	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
4927	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
4928	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
4929	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
4930	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
4931	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
4932
4933	/* For the 64-bit byte counters the low dword must be read first. */
4934	/* Both registers clear on the read of the high dword */
4935
4936	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
4937	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
4938	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
4939	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
4940
4941	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
4942	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
4943	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
4944	stats->roc += E1000_READ_REG(hw, E1000_ROC);
4945	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
4946
4947	stats->tor += E1000_READ_REG(hw, E1000_TORH);
4948	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
4949
4950	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
4951	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
4952	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
4953	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
4954	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
4955	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
4956	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
4957	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
4958	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
4959	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
4960
4961	/* Interrupt Counts */
4962
4963	stats->iac += E1000_READ_REG(hw, E1000_IAC);
4964	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
4965	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
4966	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
4967	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
4968	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
4969	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
4970	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
4971	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
4972
4973	/* Host to Card Statistics */
4974
4975	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
4976	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
4977	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
4978	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
4979	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
4980	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
4981	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
4982	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
4983	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
4984	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
4985	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
4986	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
4987	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
4988	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
4989
4990	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
4991	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
4992	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
4993	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
4994	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
4995	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
4996
4997	ifp = adapter->ifp;
4998	ifp->if_collisions = stats->colc;
4999
5000	/* Rx Errors */
5001	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5002	    stats->crcerrs + stats->algnerrc +
5003	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5004
5005	/* Tx Errors */
5006	ifp->if_oerrors = stats->ecol +
5007	    stats->latecol + adapter->watchdog_events;
5008
5009	/* Driver specific counters */
5010	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5011	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5012	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5013	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5014	adapter->packet_buf_alloc_tx =
5015	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5016	adapter->packet_buf_alloc_rx =
5017	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5018}
5019
5020
5021/**********************************************************************
5022 *
5023 *  Initialize the VF board statistics counters.
5024 *
5025 **********************************************************************/
5026static void
5027igb_vf_init_stats(struct adapter *adapter)
5028{
5029        struct e1000_hw *hw = &adapter->hw;
5030	struct e1000_vf_stats	*stats;
5031
5032	stats = (struct e1000_vf_stats	*)adapter->stats;
5033	if (stats == NULL)
5034		return;
5035        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5036        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5037        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5038        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5039        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5040}
5041
5042/**********************************************************************
5043 *
5044 *  Update the VF board statistics counters.
5045 *
5046 **********************************************************************/
5047static void
5048igb_update_vf_stats_counters(struct adapter *adapter)
5049{
5050	struct e1000_hw *hw = &adapter->hw;
5051	struct e1000_vf_stats	*stats;
5052
5053	if (adapter->link_speed == 0)
5054		return;
5055
5056	stats = (struct e1000_vf_stats	*)adapter->stats;
5057
5058	UPDATE_VF_REG(E1000_VFGPRC,
5059	    stats->last_gprc, stats->gprc);
5060	UPDATE_VF_REG(E1000_VFGORC,
5061	    stats->last_gorc, stats->gorc);
5062	UPDATE_VF_REG(E1000_VFGPTC,
5063	    stats->last_gptc, stats->gptc);
5064	UPDATE_VF_REG(E1000_VFGOTC,
5065	    stats->last_gotc, stats->gotc);
5066	UPDATE_VF_REG(E1000_VFMPRC,
5067	    stats->last_mprc, stats->mprc);
5068}
5069
5070/* Export a single 32-bit register via a read-only sysctl. */
5071static int
5072igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5073{
5074	struct adapter *adapter;
5075	u_int val;
5076
5077	adapter = oidp->oid_arg1;
5078	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5079	return (sysctl_handle_int(oidp, &val, 0, req));
5080}
5081
5082/*
5083**  Tuneable interrupt rate handler
5084*/
5085static int
5086igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5087{
5088	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5089	int			error;
5090	u32			reg, usec, rate;
5091
5092	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5093	usec = ((reg & 0x7FFC) >> 2);
5094	if (usec > 0)
5095		rate = 1000000 / usec;
5096	else
5097		rate = 0;
5098	error = sysctl_handle_int(oidp, &rate, 0, req);
5099	if (error || !req->newptr)
5100		return error;
5101	return 0;
5102}
5103
5104/*
5105 * Add sysctl variables, one per statistic, to the system.
5106 */
5107static void
5108igb_add_hw_stats(struct adapter *adapter)
5109{
5110	device_t dev = adapter->dev;
5111
5112	struct tx_ring *txr = adapter->tx_rings;
5113	struct rx_ring *rxr = adapter->rx_rings;
5114
5115	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5116	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5117	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5118	struct e1000_hw_stats *stats = adapter->stats;
5119
5120	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5121	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5122
5123#define QUEUE_NAME_LEN 32
5124	char namebuf[QUEUE_NAME_LEN];
5125
5126	/* Driver Statistics */
5127	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5128			CTLFLAG_RD, &adapter->link_irq, 0,
5129			"Link MSIX IRQ Handled");
5130	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5131			CTLFLAG_RD, &adapter->dropped_pkts,
5132			"Driver dropped packets");
5133	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5134			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5135			"Driver tx dma failure in xmit");
5136	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5137			CTLFLAG_RD, &adapter->rx_overruns,
5138			"RX overruns");
5139	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5140			CTLFLAG_RD, &adapter->watchdog_events,
5141			"Watchdog timeouts");
5142
5143	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5144			CTLFLAG_RD, &adapter->device_control,
5145			"Device Control Register");
5146	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5147			CTLFLAG_RD, &adapter->rx_control,
5148			"Receiver Control Register");
5149	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5150			CTLFLAG_RD, &adapter->int_mask,
5151			"Interrupt Mask");
5152	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5153			CTLFLAG_RD, &adapter->eint_mask,
5154			"Extended Interrupt Mask");
5155	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5156			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5157			"Transmit Buffer Packet Allocation");
5158	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5159			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5160			"Receive Buffer Packet Allocation");
5161	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5162			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5163			"Flow Control High Watermark");
5164	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5165			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5166			"Flow Control Low Watermark");
5167
5168	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5169		struct lro_ctrl *lro = &rxr->lro;
5170
5171		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5172		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5173					    CTLFLAG_RD, NULL, "Queue Name");
5174		queue_list = SYSCTL_CHILDREN(queue_node);
5175
5176		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5177				CTLFLAG_RD, &adapter->queues[i],
5178				sizeof(&adapter->queues[i]),
5179				igb_sysctl_interrupt_rate_handler,
5180				"IU", "Interrupt Rate");
5181
5182		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5183				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5184				igb_sysctl_reg_handler, "IU",
5185 				"Transmit Descriptor Head");
5186		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5187				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5188				igb_sysctl_reg_handler, "IU",
5189 				"Transmit Descriptor Tail");
5190		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5191				CTLFLAG_RD, &txr->no_desc_avail,
5192				"Queue No Descriptor Available");
5193		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5194				CTLFLAG_RD, &txr->tx_packets,
5195				"Queue Packets Transmitted");
5196
5197		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5198				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5199				igb_sysctl_reg_handler, "IU",
5200				"Receive Descriptor Head");
5201		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5202				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5203				igb_sysctl_reg_handler, "IU",
5204				"Receive Descriptor Tail");
5205		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5206				CTLFLAG_RD, &rxr->rx_packets,
5207				"Queue Packets Received");
5208		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5209				CTLFLAG_RD, &rxr->rx_bytes,
5210				"Queue Bytes Received");
5211		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5212				CTLFLAG_RD, &lro->lro_queued, 0,
5213				"LRO Queued");
5214		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5215				CTLFLAG_RD, &lro->lro_flushed, 0,
5216				"LRO Flushed");
5217	}
5218
5219	/* MAC stats get their own sub node */
5220
5221	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5222				    CTLFLAG_RD, NULL, "MAC Statistics");
5223	stat_list = SYSCTL_CHILDREN(stat_node);
5224
5225	/*
5226	** VF adapter has a very limited set of stats
5227	** since its not managing the metal, so to speak.
5228	*/
5229	if (adapter->vf_ifp) {
5230	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5231			CTLFLAG_RD, &stats->gprc,
5232			"Good Packets Received");
5233	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5234			CTLFLAG_RD, &stats->gptc,
5235			"Good Packets Transmitted");
5236 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5237 			CTLFLAG_RD, &stats->gorc,
5238 			"Good Octets Received");
5239 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5240 			CTLFLAG_RD, &stats->gotc,
5241 			"Good Octets Transmitted");
5242	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5243			CTLFLAG_RD, &stats->mprc,
5244			"Multicast Packets Received");
5245		return;
5246	}
5247
5248	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5249			CTLFLAG_RD, &stats->ecol,
5250			"Excessive collisions");
5251	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5252			CTLFLAG_RD, &stats->scc,
5253			"Single collisions");
5254	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5255			CTLFLAG_RD, &stats->mcc,
5256			"Multiple collisions");
5257	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5258			CTLFLAG_RD, &stats->latecol,
5259			"Late collisions");
5260	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5261			CTLFLAG_RD, &stats->colc,
5262			"Collision Count");
5263	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5264			CTLFLAG_RD, &stats->symerrs,
5265			"Symbol Errors");
5266	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5267			CTLFLAG_RD, &stats->sec,
5268			"Sequence Errors");
5269	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5270			CTLFLAG_RD, &stats->dc,
5271			"Defer Count");
5272	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5273			CTLFLAG_RD, &stats->mpc,
5274			"Missed Packets");
5275	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5276			CTLFLAG_RD, &stats->rnbc,
5277			"Receive No Buffers");
5278	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5279			CTLFLAG_RD, &stats->ruc,
5280			"Receive Undersize");
5281	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5282			CTLFLAG_RD, &stats->rfc,
5283			"Fragmented Packets Received ");
5284	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5285			CTLFLAG_RD, &stats->roc,
5286			"Oversized Packets Received");
5287	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5288			CTLFLAG_RD, &stats->rjc,
5289			"Recevied Jabber");
5290	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5291			CTLFLAG_RD, &stats->rxerrc,
5292			"Receive Errors");
5293	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5294			CTLFLAG_RD, &stats->crcerrs,
5295			"CRC errors");
5296	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5297			CTLFLAG_RD, &stats->algnerrc,
5298			"Alignment Errors");
5299	/* On 82575 these are collision counts */
5300	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5301			CTLFLAG_RD, &stats->cexterr,
5302			"Collision/Carrier extension errors");
5303	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5304			CTLFLAG_RD, &stats->xonrxc,
5305			"XON Received");
5306	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5307			CTLFLAG_RD, &stats->xontxc,
5308			"XON Transmitted");
5309	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5310			CTLFLAG_RD, &stats->xoffrxc,
5311			"XOFF Received");
5312	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5313			CTLFLAG_RD, &stats->xofftxc,
5314			"XOFF Transmitted");
5315	/* Packet Reception Stats */
5316	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5317			CTLFLAG_RD, &stats->tpr,
5318			"Total Packets Received ");
5319	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5320			CTLFLAG_RD, &stats->gprc,
5321			"Good Packets Received");
5322	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5323			CTLFLAG_RD, &stats->bprc,
5324			"Broadcast Packets Received");
5325	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5326			CTLFLAG_RD, &stats->mprc,
5327			"Multicast Packets Received");
5328	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5329			CTLFLAG_RD, &stats->prc64,
5330			"64 byte frames received ");
5331	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5332			CTLFLAG_RD, &stats->prc127,
5333			"65-127 byte frames received");
5334	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5335			CTLFLAG_RD, &stats->prc255,
5336			"128-255 byte frames received");
5337	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5338			CTLFLAG_RD, &stats->prc511,
5339			"256-511 byte frames received");
5340	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5341			CTLFLAG_RD, &stats->prc1023,
5342			"512-1023 byte frames received");
5343	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5344			CTLFLAG_RD, &stats->prc1522,
5345			"1023-1522 byte frames received");
5346 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5347 			CTLFLAG_RD, &stats->gorc,
5348 			"Good Octets Received");
5349
5350	/* Packet Transmission Stats */
5351 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5352 			CTLFLAG_RD, &stats->gotc,
5353 			"Good Octets Transmitted");
5354	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5355			CTLFLAG_RD, &stats->tpt,
5356			"Total Packets Transmitted");
5357	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5358			CTLFLAG_RD, &stats->gptc,
5359			"Good Packets Transmitted");
5360	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5361			CTLFLAG_RD, &stats->bptc,
5362			"Broadcast Packets Transmitted");
5363	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5364			CTLFLAG_RD, &stats->mptc,
5365			"Multicast Packets Transmitted");
5366	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5367			CTLFLAG_RD, &stats->ptc64,
5368			"64 byte frames transmitted ");
5369	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5370			CTLFLAG_RD, &stats->ptc127,
5371			"65-127 byte frames transmitted");
5372	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5373			CTLFLAG_RD, &stats->ptc255,
5374			"128-255 byte frames transmitted");
5375	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5376			CTLFLAG_RD, &stats->ptc511,
5377			"256-511 byte frames transmitted");
5378	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5379			CTLFLAG_RD, &stats->ptc1023,
5380			"512-1023 byte frames transmitted");
5381	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5382			CTLFLAG_RD, &stats->ptc1522,
5383			"1024-1522 byte frames transmitted");
5384	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5385			CTLFLAG_RD, &stats->tsctc,
5386			"TSO Contexts Transmitted");
5387	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5388			CTLFLAG_RD, &stats->tsctfc,
5389			"TSO Contexts Failed");
5390
5391
5392	/* Interrupt Stats */
5393
5394	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5395				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5396	int_list = SYSCTL_CHILDREN(int_node);
5397
5398	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5399			CTLFLAG_RD, &stats->iac,
5400			"Interrupt Assertion Count");
5401
5402	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5403			CTLFLAG_RD, &stats->icrxptc,
5404			"Interrupt Cause Rx Pkt Timer Expire Count");
5405
5406	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5407			CTLFLAG_RD, &stats->icrxatc,
5408			"Interrupt Cause Rx Abs Timer Expire Count");
5409
5410	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5411			CTLFLAG_RD, &stats->ictxptc,
5412			"Interrupt Cause Tx Pkt Timer Expire Count");
5413
5414	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5415			CTLFLAG_RD, &stats->ictxatc,
5416			"Interrupt Cause Tx Abs Timer Expire Count");
5417
5418	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5419			CTLFLAG_RD, &stats->ictxqec,
5420			"Interrupt Cause Tx Queue Empty Count");
5421
5422	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5423			CTLFLAG_RD, &stats->ictxqmtc,
5424			"Interrupt Cause Tx Queue Min Thresh Count");
5425
5426	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5427			CTLFLAG_RD, &stats->icrxdmtc,
5428			"Interrupt Cause Rx Desc Min Thresh Count");
5429
5430	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5431			CTLFLAG_RD, &stats->icrxoc,
5432			"Interrupt Cause Receiver Overrun Count");
5433
5434	/* Host to Card Stats */
5435
5436	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5437				    CTLFLAG_RD, NULL,
5438				    "Host to Card Statistics");
5439
5440	host_list = SYSCTL_CHILDREN(host_node);
5441
5442	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5443			CTLFLAG_RD, &stats->cbtmpc,
5444			"Circuit Breaker Tx Packet Count");
5445
5446	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5447			CTLFLAG_RD, &stats->htdpmc,
5448			"Host Transmit Discarded Packets");
5449
5450	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5451			CTLFLAG_RD, &stats->rpthc,
5452			"Rx Packets To Host");
5453
5454	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5455			CTLFLAG_RD, &stats->cbrmpc,
5456			"Circuit Breaker Rx Packet Count");
5457
5458	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5459			CTLFLAG_RD, &stats->cbrdpc,
5460			"Circuit Breaker Rx Dropped Count");
5461
5462	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5463			CTLFLAG_RD, &stats->hgptc,
5464			"Host Good Packets Tx Count");
5465
5466	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5467			CTLFLAG_RD, &stats->htcbdpc,
5468			"Host Tx Circuit Breaker Dropped Count");
5469
5470	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5471			CTLFLAG_RD, &stats->hgorc,
5472			"Host Good Octets Received Count");
5473
5474	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5475			CTLFLAG_RD, &stats->hgotc,
5476			"Host Good Octets Transmit Count");
5477
5478	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5479			CTLFLAG_RD, &stats->lenerrs,
5480			"Length Errors");
5481
5482	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5483			CTLFLAG_RD, &stats->scvpc,
5484			"SerDes/SGMII Code Violation Pkt Count");
5485
5486	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5487			CTLFLAG_RD, &stats->hrmpc,
5488			"Header Redirection Missed Packet Count");
5489}
5490
5491
5492/**********************************************************************
5493 *
5494 *  This routine provides a way to dump out the adapter eeprom,
5495 *  often a useful debug/service tool. This only dumps the first
5496 *  32 words, stuff that matters is in that extent.
5497 *
5498 **********************************************************************/
5499static int
5500igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5501{
5502	struct adapter *adapter;
5503	int error;
5504	int result;
5505
5506	result = -1;
5507	error = sysctl_handle_int(oidp, &result, 0, req);
5508
5509	if (error || !req->newptr)
5510		return (error);
5511
5512	/*
5513	 * This value will cause a hex dump of the
5514	 * first 32 16-bit words of the EEPROM to
5515	 * the screen.
5516	 */
5517	if (result == 1) {
5518		adapter = (struct adapter *)arg1;
5519		igb_print_nvm_info(adapter);
5520        }
5521
5522	return (error);
5523}
5524
5525static void
5526igb_print_nvm_info(struct adapter *adapter)
5527{
5528	u16	eeprom_data;
5529	int	i, j, row = 0;
5530
5531	/* Its a bit crude, but it gets the job done */
5532	printf("\nInterface EEPROM Dump:\n");
5533	printf("Offset\n0x0000  ");
5534	for (i = 0, j = 0; i < 32; i++, j++) {
5535		if (j == 8) { /* Make the offset block */
5536			j = 0; ++row;
5537			printf("\n0x00%x0  ",row);
5538		}
5539		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5540		printf("%04x ", eeprom_data);
5541	}
5542	printf("\n");
5543}
5544
5545static void
5546igb_set_sysctl_value(struct adapter *adapter, const char *name,
5547	const char *description, int *limit, int value)
5548{
5549	*limit = value;
5550	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5551	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5552	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5553}
5554
5555