if_igb.c revision 223198
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 223198 2011-06-17 20:06:52Z jhb $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_altq.h"
40#endif
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#if __FreeBSD_version >= 800000
45#include <sys/buf_ring.h>
46#endif
47#include <sys/bus.h>
48#include <sys/endian.h>
49#include <sys/kernel.h>
50#include <sys/kthread.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/module.h>
54#include <sys/rman.h>
55#include <sys/socket.h>
56#include <sys/sockio.h>
57#include <sys/sysctl.h>
58#include <sys/taskqueue.h>
59#include <sys/eventhandler.h>
60#include <sys/pcpu.h>
61#include <sys/smp.h>
62#include <machine/smp.h>
63#include <machine/bus.h>
64#include <machine/resource.h>
65
66#include <net/bpf.h>
67#include <net/ethernet.h>
68#include <net/if.h>
69#include <net/if_arp.h>
70#include <net/if_dl.h>
71#include <net/if_media.h>
72
73#include <net/if_types.h>
74#include <net/if_vlan_var.h>
75
76#include <netinet/in_systm.h>
77#include <netinet/in.h>
78#include <netinet/if_ether.h>
79#include <netinet/ip.h>
80#include <netinet/ip6.h>
81#include <netinet/tcp.h>
82#include <netinet/tcp_lro.h>
83#include <netinet/udp.h>
84
85#include <machine/in_cksum.h>
86#include <dev/led/led.h>
87#include <dev/pci/pcivar.h>
88#include <dev/pci/pcireg.h>
89
90#include "e1000_api.h"
91#include "e1000_82575.h"
92#include "if_igb.h"
93
94/*********************************************************************
95 *  Set this to one to display debug statistics
96 *********************************************************************/
97int	igb_display_debug_stats = 0;
98
99/*********************************************************************
100 *  Driver version:
101 *********************************************************************/
102char igb_driver_version[] = "version - 2.2.3";
103
104
105/*********************************************************************
106 *  PCI Device ID Table
107 *
108 *  Used by probe to select devices to load on
109 *  Last field stores an index into e1000_strings
110 *  Last entry must be all 0s
111 *
112 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
113 *********************************************************************/
114
115static igb_vendor_info_t igb_vendor_info_array[] =
116{
117	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
119						PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
132						PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
139						PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
141						PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
146						PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
152	/* required last entry */
153	{ 0, 0, 0, 0, 0}
154};
155
156/*********************************************************************
157 *  Table of branding strings for all supported NICs.
158 *********************************************************************/
159
160static char *igb_strings[] = {
161	"Intel(R) PRO/1000 Network Connection"
162};
163
164/*********************************************************************
165 *  Function prototypes
166 *********************************************************************/
167static int	igb_probe(device_t);
168static int	igb_attach(device_t);
169static int	igb_detach(device_t);
170static int	igb_shutdown(device_t);
171static int	igb_suspend(device_t);
172static int	igb_resume(device_t);
173#if __FreeBSD_version >= 800000
174static int	igb_mq_start(struct ifnet *, struct mbuf *);
175static int	igb_mq_start_locked(struct ifnet *,
176		    struct tx_ring *, struct mbuf *);
177static void	igb_qflush(struct ifnet *);
178static void	igb_deferred_mq_start(void *, int);
179#else
180static void	igb_start(struct ifnet *);
181static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
182#endif
183static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
184static void	igb_init(void *);
185static void	igb_init_locked(struct adapter *);
186static void	igb_stop(void *);
187static void	igb_media_status(struct ifnet *, struct ifmediareq *);
188static int	igb_media_change(struct ifnet *);
189static void	igb_identify_hardware(struct adapter *);
190static int	igb_allocate_pci_resources(struct adapter *);
191static int	igb_allocate_msix(struct adapter *);
192static int	igb_allocate_legacy(struct adapter *);
193static int	igb_setup_msix(struct adapter *);
194static void	igb_free_pci_resources(struct adapter *);
195static void	igb_local_timer(void *);
196static void	igb_reset(struct adapter *);
197static int	igb_setup_interface(device_t, struct adapter *);
198static int	igb_allocate_queues(struct adapter *);
199static void	igb_configure_queues(struct adapter *);
200
201static int	igb_allocate_transmit_buffers(struct tx_ring *);
202static void	igb_setup_transmit_structures(struct adapter *);
203static void	igb_setup_transmit_ring(struct tx_ring *);
204static void	igb_initialize_transmit_units(struct adapter *);
205static void	igb_free_transmit_structures(struct adapter *);
206static void	igb_free_transmit_buffers(struct tx_ring *);
207
208static int	igb_allocate_receive_buffers(struct rx_ring *);
209static int	igb_setup_receive_structures(struct adapter *);
210static int	igb_setup_receive_ring(struct rx_ring *);
211static void	igb_initialize_receive_units(struct adapter *);
212static void	igb_free_receive_structures(struct adapter *);
213static void	igb_free_receive_buffers(struct rx_ring *);
214static void	igb_free_receive_ring(struct rx_ring *);
215
216static void	igb_enable_intr(struct adapter *);
217static void	igb_disable_intr(struct adapter *);
218static void	igb_update_stats_counters(struct adapter *);
219static bool	igb_txeof(struct tx_ring *);
220
221static __inline	void igb_rx_discard(struct rx_ring *, int);
222static __inline void igb_rx_input(struct rx_ring *,
223		    struct ifnet *, struct mbuf *, u32);
224
225static bool	igb_rxeof(struct igb_queue *, int, int *);
226static void	igb_rx_checksum(u32, struct mbuf *, u32);
227static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
228static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
229static void	igb_set_promisc(struct adapter *);
230static void	igb_disable_promisc(struct adapter *);
231static void	igb_set_multi(struct adapter *);
232static void	igb_update_link_status(struct adapter *);
233static void	igb_refresh_mbufs(struct rx_ring *, int);
234
235static void	igb_register_vlan(void *, struct ifnet *, u16);
236static void	igb_unregister_vlan(void *, struct ifnet *, u16);
237static void	igb_setup_vlan_hw_support(struct adapter *);
238
239static int	igb_xmit(struct tx_ring *, struct mbuf **);
240static int	igb_dma_malloc(struct adapter *, bus_size_t,
241		    struct igb_dma_alloc *, int);
242static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
243static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
244static void	igb_print_nvm_info(struct adapter *);
245static int 	igb_is_valid_ether_addr(u8 *);
246static void     igb_add_hw_stats(struct adapter *);
247
248static void	igb_vf_init_stats(struct adapter *);
249static void	igb_update_vf_stats_counters(struct adapter *);
250
251/* Management and WOL Support */
252static void	igb_init_manageability(struct adapter *);
253static void	igb_release_manageability(struct adapter *);
254static void     igb_get_hw_control(struct adapter *);
255static void     igb_release_hw_control(struct adapter *);
256static void     igb_enable_wakeup(device_t);
257static void     igb_led_func(void *, int);
258
259static int	igb_irq_fast(void *);
260static void	igb_msix_que(void *);
261static void	igb_msix_link(void *);
262static void	igb_handle_que(void *context, int pending);
263static void	igb_handle_link(void *context, int pending);
264
265static void	igb_set_sysctl_value(struct adapter *, const char *,
266		    const char *, int *, int);
267static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
268
269#ifdef DEVICE_POLLING
270static poll_handler_t igb_poll;
271#endif /* POLLING */
272
273/*********************************************************************
274 *  FreeBSD Device Interface Entry Points
275 *********************************************************************/
276
277static device_method_t igb_methods[] = {
278	/* Device interface */
279	DEVMETHOD(device_probe, igb_probe),
280	DEVMETHOD(device_attach, igb_attach),
281	DEVMETHOD(device_detach, igb_detach),
282	DEVMETHOD(device_shutdown, igb_shutdown),
283	DEVMETHOD(device_suspend, igb_suspend),
284	DEVMETHOD(device_resume, igb_resume),
285	{0, 0}
286};
287
288static driver_t igb_driver = {
289	"igb", igb_methods, sizeof(struct adapter),
290};
291
292static devclass_t igb_devclass;
293DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
294MODULE_DEPEND(igb, pci, 1, 1, 1);
295MODULE_DEPEND(igb, ether, 1, 1, 1);
296
297/*********************************************************************
298 *  Tunable default values.
299 *********************************************************************/
300
301/* Descriptor defaults */
302static int igb_rxd = IGB_DEFAULT_RXD;
303static int igb_txd = IGB_DEFAULT_TXD;
304TUNABLE_INT("hw.igb.rxd", &igb_rxd);
305TUNABLE_INT("hw.igb.txd", &igb_txd);
306
307/*
308** AIM: Adaptive Interrupt Moderation
309** which means that the interrupt rate
310** is varied over time based on the
311** traffic for that interrupt vector
312*/
313static int igb_enable_aim = TRUE;
314TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
315
316/*
317 * MSIX should be the default for best performance,
318 * but this allows it to be forced off for testing.
319 */
320static int igb_enable_msix = 1;
321TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
322
323/*
324** Tuneable Interrupt rate
325*/
326static int igb_max_interrupt_rate = 8000;
327TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
328
329/*
330** Header split causes the packet header to
331** be dma'd to a seperate mbuf from the payload.
332** this can have memory alignment benefits. But
333** another plus is that small packets often fit
334** into the header and thus use no cluster. Its
335** a very workload dependent type feature.
336*/
337static bool igb_header_split = FALSE;
338TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
339
340/*
341** This will autoconfigure based on
342** the number of CPUs if left at 0.
343*/
344static int igb_num_queues = 0;
345TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
346
347/* How many packets rxeof tries to clean at a time */
348static int igb_rx_process_limit = 100;
349TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
350
351/* Flow control setting - default to FULL */
352static int igb_fc_setting = e1000_fc_full;
353TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
354
355/* Energy Efficient Ethernet - default to off */
356static int igb_eee_disabled = TRUE;
357TUNABLE_INT("hw.igb.eee_disabled", &igb_eee_disabled);
358
359/*
360** DMA Coalescing, only for i350 - default to off,
361** this feature is for power savings
362*/
363static int igb_dma_coalesce = FALSE;
364TUNABLE_INT("hw.igb.dma_coalesce", &igb_dma_coalesce);
365
366/*********************************************************************
367 *  Device identification routine
368 *
369 *  igb_probe determines if the driver should be loaded on
370 *  adapter based on PCI vendor/device id of the adapter.
371 *
372 *  return BUS_PROBE_DEFAULT on success, positive on failure
373 *********************************************************************/
374
375static int
376igb_probe(device_t dev)
377{
378	char		adapter_name[60];
379	uint16_t	pci_vendor_id = 0;
380	uint16_t	pci_device_id = 0;
381	uint16_t	pci_subvendor_id = 0;
382	uint16_t	pci_subdevice_id = 0;
383	igb_vendor_info_t *ent;
384
385	INIT_DEBUGOUT("igb_probe: begin");
386
387	pci_vendor_id = pci_get_vendor(dev);
388	if (pci_vendor_id != IGB_VENDOR_ID)
389		return (ENXIO);
390
391	pci_device_id = pci_get_device(dev);
392	pci_subvendor_id = pci_get_subvendor(dev);
393	pci_subdevice_id = pci_get_subdevice(dev);
394
395	ent = igb_vendor_info_array;
396	while (ent->vendor_id != 0) {
397		if ((pci_vendor_id == ent->vendor_id) &&
398		    (pci_device_id == ent->device_id) &&
399
400		    ((pci_subvendor_id == ent->subvendor_id) ||
401		    (ent->subvendor_id == PCI_ANY_ID)) &&
402
403		    ((pci_subdevice_id == ent->subdevice_id) ||
404		    (ent->subdevice_id == PCI_ANY_ID))) {
405			sprintf(adapter_name, "%s %s",
406				igb_strings[ent->index],
407				igb_driver_version);
408			device_set_desc_copy(dev, adapter_name);
409			return (BUS_PROBE_DEFAULT);
410		}
411		ent++;
412	}
413
414	return (ENXIO);
415}
416
417/*********************************************************************
418 *  Device initialization routine
419 *
420 *  The attach entry point is called when the driver is being loaded.
421 *  This routine identifies the type of hardware, allocates all resources
422 *  and initializes the hardware.
423 *
424 *  return 0 on success, positive on failure
425 *********************************************************************/
426
427static int
428igb_attach(device_t dev)
429{
430	struct adapter	*adapter;
431	int		error = 0;
432	u16		eeprom_data;
433
434	INIT_DEBUGOUT("igb_attach: begin");
435
436	adapter = device_get_softc(dev);
437	adapter->dev = adapter->osdep.dev = dev;
438	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
439
440	/* SYSCTL stuff */
441	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
442	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
443	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
444	    igb_sysctl_nvm_info, "I", "NVM Information");
445
446	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
447	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
448	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
449	    &igb_enable_aim, 1, "Interrupt Moderation");
450
451	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
452	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
453	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
454	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
455
456	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
457
458	/* Determine hardware and mac info */
459	igb_identify_hardware(adapter);
460
461	/* Setup PCI resources */
462	if (igb_allocate_pci_resources(adapter)) {
463		device_printf(dev, "Allocation of PCI resources failed\n");
464		error = ENXIO;
465		goto err_pci;
466	}
467
468	/* Do Shared Code initialization */
469	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
470		device_printf(dev, "Setup of Shared code failed\n");
471		error = ENXIO;
472		goto err_pci;
473	}
474
475	e1000_get_bus_info(&adapter->hw);
476
477	/* Sysctl for limiting the amount of work done in the taskqueue */
478	igb_set_sysctl_value(adapter, "rx_processing_limit",
479	    "max number of rx packets to process", &adapter->rx_process_limit,
480	    igb_rx_process_limit);
481
482	/*
483	 * Validate number of transmit and receive descriptors. It
484	 * must not exceed hardware maximum, and must be multiple
485	 * of E1000_DBA_ALIGN.
486	 */
487	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
488	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
489		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
490		    IGB_DEFAULT_TXD, igb_txd);
491		adapter->num_tx_desc = IGB_DEFAULT_TXD;
492	} else
493		adapter->num_tx_desc = igb_txd;
494	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
495	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
496		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
497		    IGB_DEFAULT_RXD, igb_rxd);
498		adapter->num_rx_desc = IGB_DEFAULT_RXD;
499	} else
500		adapter->num_rx_desc = igb_rxd;
501
502	adapter->hw.mac.autoneg = DO_AUTO_NEG;
503	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
504	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
505
506	/* Copper options */
507	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
508		adapter->hw.phy.mdix = AUTO_ALL_MODES;
509		adapter->hw.phy.disable_polarity_correction = FALSE;
510		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
511	}
512
513	/*
514	 * Set the frame limits assuming
515	 * standard ethernet sized frames.
516	 */
517	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
518	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
519
520	/*
521	** Allocate and Setup Queues
522	*/
523	if (igb_allocate_queues(adapter)) {
524		error = ENOMEM;
525		goto err_pci;
526	}
527
528	/* Allocate the appropriate stats memory */
529	if (adapter->vf_ifp) {
530		adapter->stats =
531		    (struct e1000_vf_stats *)malloc(sizeof \
532		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
533		igb_vf_init_stats(adapter);
534	} else
535		adapter->stats =
536		    (struct e1000_hw_stats *)malloc(sizeof \
537		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
538	if (adapter->stats == NULL) {
539		device_printf(dev, "Can not allocate stats memory\n");
540		error = ENOMEM;
541		goto err_late;
542	}
543
544	/* Allocate multicast array memory. */
545	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
546	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
547	if (adapter->mta == NULL) {
548		device_printf(dev, "Can not allocate multicast setup array\n");
549		error = ENOMEM;
550		goto err_late;
551	}
552
553	/* Some adapter-specific advanced features */
554	if (adapter->hw.mac.type >= e1000_i350) {
555		igb_set_sysctl_value(adapter, "dma_coalesce",
556		    "configure dma coalesce",
557		    &adapter->dma_coalesce, igb_dma_coalesce);
558		igb_set_sysctl_value(adapter, "eee_disabled",
559		    "enable Energy Efficient Ethernet",
560		    &adapter->hw.dev_spec._82575.eee_disable,
561		    igb_eee_disabled);
562		e1000_set_eee_i350(&adapter->hw);
563	}
564
565	/*
566	** Start from a known state, this is
567	** important in reading the nvm and
568	** mac from that.
569	*/
570	e1000_reset_hw(&adapter->hw);
571
572	/* Make sure we have a good EEPROM before we read from it */
573	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
574		/*
575		** Some PCI-E parts fail the first check due to
576		** the link being in sleep state, call it again,
577		** if it fails a second time its a real issue.
578		*/
579		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
580			device_printf(dev,
581			    "The EEPROM Checksum Is Not Valid\n");
582			error = EIO;
583			goto err_late;
584		}
585	}
586
587	/*
588	** Copy the permanent MAC address out of the EEPROM
589	*/
590	if (e1000_read_mac_addr(&adapter->hw) < 0) {
591		device_printf(dev, "EEPROM read error while reading MAC"
592		    " address\n");
593		error = EIO;
594		goto err_late;
595	}
596	/* Check its sanity */
597	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
598		device_printf(dev, "Invalid MAC address\n");
599		error = EIO;
600		goto err_late;
601	}
602
603	/*
604	** Configure Interrupts
605	*/
606	if ((adapter->msix > 1) && (igb_enable_msix))
607		error = igb_allocate_msix(adapter);
608	else /* MSI or Legacy */
609		error = igb_allocate_legacy(adapter);
610	if (error)
611		goto err_late;
612
613	/* Setup OS specific network interface */
614	if (igb_setup_interface(dev, adapter) != 0)
615		goto err_late;
616
617	/* Now get a good starting state */
618	igb_reset(adapter);
619
620	/* Initialize statistics */
621	igb_update_stats_counters(adapter);
622
623	adapter->hw.mac.get_link_status = 1;
624	igb_update_link_status(adapter);
625
626	/* Indicate SOL/IDER usage */
627	if (e1000_check_reset_block(&adapter->hw))
628		device_printf(dev,
629		    "PHY reset is blocked due to SOL/IDER session.\n");
630
631	/* Determine if we have to control management hardware */
632	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
633
634	/*
635	 * Setup Wake-on-Lan
636	 */
637	/* APME bit in EEPROM is mapped to WUC.APME */
638	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
639	if (eeprom_data)
640		adapter->wol = E1000_WUFC_MAG;
641
642	/* Register for VLAN events */
643	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
644	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
645	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
646	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
647
648	igb_add_hw_stats(adapter);
649
650	/* Tell the stack that the interface is not active */
651	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
652
653	adapter->led_dev = led_create(igb_led_func, adapter,
654	    device_get_nameunit(dev));
655
656	INIT_DEBUGOUT("igb_attach: end");
657
658	return (0);
659
660err_late:
661	igb_free_transmit_structures(adapter);
662	igb_free_receive_structures(adapter);
663	igb_release_hw_control(adapter);
664	if (adapter->ifp != NULL)
665		if_free(adapter->ifp);
666err_pci:
667	igb_free_pci_resources(adapter);
668	free(adapter->mta, M_DEVBUF);
669	IGB_CORE_LOCK_DESTROY(adapter);
670
671	return (error);
672}
673
674/*********************************************************************
675 *  Device removal routine
676 *
677 *  The detach entry point is called when the driver is being removed.
678 *  This routine stops the adapter and deallocates all the resources
679 *  that were allocated for driver operation.
680 *
681 *  return 0 on success, positive on failure
682 *********************************************************************/
683
684static int
685igb_detach(device_t dev)
686{
687	struct adapter	*adapter = device_get_softc(dev);
688	struct ifnet	*ifp = adapter->ifp;
689
690	INIT_DEBUGOUT("igb_detach: begin");
691
692	/* Make sure VLANS are not using driver */
693	if (adapter->ifp->if_vlantrunk != NULL) {
694		device_printf(dev,"Vlan in use, detach first\n");
695		return (EBUSY);
696	}
697
698	ether_ifdetach(adapter->ifp);
699
700	if (adapter->led_dev != NULL)
701		led_destroy(adapter->led_dev);
702
703#ifdef DEVICE_POLLING
704	if (ifp->if_capenable & IFCAP_POLLING)
705		ether_poll_deregister(ifp);
706#endif
707
708	IGB_CORE_LOCK(adapter);
709	adapter->in_detach = 1;
710	igb_stop(adapter);
711	IGB_CORE_UNLOCK(adapter);
712
713	e1000_phy_hw_reset(&adapter->hw);
714
715	/* Give control back to firmware */
716	igb_release_manageability(adapter);
717	igb_release_hw_control(adapter);
718
719	if (adapter->wol) {
720		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
721		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
722		igb_enable_wakeup(dev);
723	}
724
725	/* Unregister VLAN events */
726	if (adapter->vlan_attach != NULL)
727		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
728	if (adapter->vlan_detach != NULL)
729		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
730
731	callout_drain(&adapter->timer);
732
733	igb_free_pci_resources(adapter);
734	bus_generic_detach(dev);
735	if_free(ifp);
736
737	igb_free_transmit_structures(adapter);
738	igb_free_receive_structures(adapter);
739	free(adapter->mta, M_DEVBUF);
740
741	IGB_CORE_LOCK_DESTROY(adapter);
742
743	return (0);
744}
745
746/*********************************************************************
747 *
748 *  Shutdown entry point
749 *
750 **********************************************************************/
751
752static int
753igb_shutdown(device_t dev)
754{
755	return igb_suspend(dev);
756}
757
758/*
759 * Suspend/resume device methods.
760 */
761static int
762igb_suspend(device_t dev)
763{
764	struct adapter *adapter = device_get_softc(dev);
765
766	IGB_CORE_LOCK(adapter);
767
768	igb_stop(adapter);
769
770        igb_release_manageability(adapter);
771	igb_release_hw_control(adapter);
772
773        if (adapter->wol) {
774                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
775                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
776                igb_enable_wakeup(dev);
777        }
778
779	IGB_CORE_UNLOCK(adapter);
780
781	return bus_generic_suspend(dev);
782}
783
784static int
785igb_resume(device_t dev)
786{
787	struct adapter *adapter = device_get_softc(dev);
788	struct ifnet *ifp = adapter->ifp;
789#if __FreeBSD_version >= 800000
790	struct tx_ring *txr = adapter->tx_rings;
791#endif
792
793	IGB_CORE_LOCK(adapter);
794	igb_init_locked(adapter);
795	igb_init_manageability(adapter);
796
797	if ((ifp->if_flags & IFF_UP) &&
798	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
799#if __FreeBSD_version < 800000
800		igb_start(ifp);
801#else
802		for (int i = 0; i < adapter->num_queues; i++, txr++) {
803			IGB_TX_LOCK(txr);
804			if (!drbr_empty(ifp, txr->br))
805				igb_mq_start_locked(ifp, txr, NULL);
806			IGB_TX_UNLOCK(txr);
807		}
808#endif
809	}
810
811	IGB_CORE_UNLOCK(adapter);
812
813	return bus_generic_resume(dev);
814}
815
816
817#if __FreeBSD_version < 800000
818/*********************************************************************
819 *  Transmit entry point
820 *
821 *  igb_start is called by the stack to initiate a transmit.
822 *  The driver will remain in this routine as long as there are
823 *  packets to transmit and transmit resources are available.
824 *  In case resources are not available stack is notified and
825 *  the packet is requeued.
826 **********************************************************************/
827
828static void
829igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
830{
831	struct adapter	*adapter = ifp->if_softc;
832	struct mbuf	*m_head;
833
834	IGB_TX_LOCK_ASSERT(txr);
835
836	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
837	    IFF_DRV_RUNNING)
838		return;
839	if (!adapter->link_active)
840		return;
841
842	/* Call cleanup if number of TX descriptors low */
843	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
844		igb_txeof(txr);
845
846	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
847		if (txr->tx_avail <= IGB_MAX_SCATTER) {
848			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
849			break;
850		}
851		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
852		if (m_head == NULL)
853			break;
854		/*
855		 *  Encapsulation can modify our pointer, and or make it
856		 *  NULL on failure.  In that event, we can't requeue.
857		 */
858		if (igb_xmit(txr, &m_head)) {
859			if (m_head == NULL)
860				break;
861			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
862			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
863			break;
864		}
865
866		/* Send a copy of the frame to the BPF listener */
867		ETHER_BPF_MTAP(ifp, m_head);
868
869		/* Set watchdog on */
870		txr->watchdog_time = ticks;
871		txr->queue_status = IGB_QUEUE_WORKING;
872	}
873}
874
875/*
876 * Legacy TX driver routine, called from the
877 * stack, always uses tx[0], and spins for it.
878 * Should not be used with multiqueue tx
879 */
880static void
881igb_start(struct ifnet *ifp)
882{
883	struct adapter	*adapter = ifp->if_softc;
884	struct tx_ring	*txr = adapter->tx_rings;
885
886	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
887		IGB_TX_LOCK(txr);
888		igb_start_locked(txr, ifp);
889		IGB_TX_UNLOCK(txr);
890	}
891	return;
892}
893
894#else /* __FreeBSD_version >= 800000 */
895/*
896** Multiqueue Transmit driver
897**
898*/
899static int
900igb_mq_start(struct ifnet *ifp, struct mbuf *m)
901{
902	struct adapter		*adapter = ifp->if_softc;
903	struct igb_queue	*que;
904	struct tx_ring		*txr;
905	int 			i = 0, err = 0;
906
907	/* Which queue to use */
908	if ((m->m_flags & M_FLOWID) != 0)
909		i = m->m_pkthdr.flowid % adapter->num_queues;
910
911	txr = &adapter->tx_rings[i];
912	que = &adapter->queues[i];
913
914	if (IGB_TX_TRYLOCK(txr)) {
915		err = igb_mq_start_locked(ifp, txr, m);
916		IGB_TX_UNLOCK(txr);
917	} else {
918		err = drbr_enqueue(ifp, txr->br, m);
919		taskqueue_enqueue(que->tq, &txr->txq_task);
920	}
921
922	return (err);
923}
924
925static int
926igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
927{
928	struct adapter  *adapter = txr->adapter;
929        struct mbuf     *next;
930        int             err = 0, enq;
931
932	IGB_TX_LOCK_ASSERT(txr);
933
934	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
935	    IFF_DRV_RUNNING || adapter->link_active == 0) {
936		if (m != NULL)
937			err = drbr_enqueue(ifp, txr->br, m);
938		return (err);
939	}
940
941	enq = 0;
942	if (m == NULL) {
943		next = drbr_dequeue(ifp, txr->br);
944	} else if (drbr_needs_enqueue(ifp, txr->br)) {
945		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
946			return (err);
947		next = drbr_dequeue(ifp, txr->br);
948	} else
949		next = m;
950
951	/* Process the queue */
952	while (next != NULL) {
953		if ((err = igb_xmit(txr, &next)) != 0) {
954			if (next != NULL)
955				err = drbr_enqueue(ifp, txr->br, next);
956			break;
957		}
958		enq++;
959		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
960		ETHER_BPF_MTAP(ifp, next);
961		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
962			break;
963		if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
964			igb_txeof(txr);
965		if (txr->tx_avail <= IGB_MAX_SCATTER) {
966			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
967			break;
968		}
969		next = drbr_dequeue(ifp, txr->br);
970	}
971	if (enq > 0) {
972		/* Set the watchdog */
973		txr->queue_status = IGB_QUEUE_WORKING;
974		txr->watchdog_time = ticks;
975	}
976	return (err);
977}
978
979/*
980 * Called from a taskqueue to drain queued transmit packets.
981 */
982static void
983igb_deferred_mq_start(void *arg, int pending)
984{
985	struct tx_ring *txr = arg;
986	struct adapter *adapter = txr->adapter;
987	struct ifnet *ifp = adapter->ifp;
988
989	IGB_TX_LOCK(txr);
990	if (!drbr_empty(ifp, txr->br))
991		igb_mq_start_locked(ifp, txr, NULL);
992	IGB_TX_UNLOCK(txr);
993}
994
995/*
996** Flush all ring buffers
997*/
998static void
999igb_qflush(struct ifnet *ifp)
1000{
1001	struct adapter	*adapter = ifp->if_softc;
1002	struct tx_ring	*txr = adapter->tx_rings;
1003	struct mbuf	*m;
1004
1005	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1006		IGB_TX_LOCK(txr);
1007		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1008			m_freem(m);
1009		IGB_TX_UNLOCK(txr);
1010	}
1011	if_qflush(ifp);
1012}
1013#endif /* __FreeBSD_version < 800000 */
1014
1015/*********************************************************************
1016 *  Ioctl entry point
1017 *
1018 *  igb_ioctl is called when the user wants to configure the
1019 *  interface.
1020 *
1021 *  return 0 on success, positive on failure
1022 **********************************************************************/
1023
1024static int
1025igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1026{
1027	struct adapter	*adapter = ifp->if_softc;
1028	struct ifreq *ifr = (struct ifreq *)data;
1029#ifdef INET
1030	struct ifaddr *ifa = (struct ifaddr *)data;
1031#endif
1032	int error = 0;
1033
1034	if (adapter->in_detach)
1035		return (error);
1036
1037	switch (command) {
1038	case SIOCSIFADDR:
1039#ifdef INET
1040		if (ifa->ifa_addr->sa_family == AF_INET) {
1041			/*
1042			 * XXX
1043			 * Since resetting hardware takes a very long time
1044			 * and results in link renegotiation we only
1045			 * initialize the hardware only when it is absolutely
1046			 * required.
1047			 */
1048			ifp->if_flags |= IFF_UP;
1049			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1050				IGB_CORE_LOCK(adapter);
1051				igb_init_locked(adapter);
1052				IGB_CORE_UNLOCK(adapter);
1053			}
1054			if (!(ifp->if_flags & IFF_NOARP))
1055				arp_ifinit(ifp, ifa);
1056		} else
1057#endif
1058			error = ether_ioctl(ifp, command, data);
1059		break;
1060	case SIOCSIFMTU:
1061	    {
1062		int max_frame_size;
1063
1064		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1065
1066		IGB_CORE_LOCK(adapter);
1067		max_frame_size = 9234;
1068		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1069		    ETHER_CRC_LEN) {
1070			IGB_CORE_UNLOCK(adapter);
1071			error = EINVAL;
1072			break;
1073		}
1074
1075		ifp->if_mtu = ifr->ifr_mtu;
1076		adapter->max_frame_size =
1077		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1078		igb_init_locked(adapter);
1079		IGB_CORE_UNLOCK(adapter);
1080		break;
1081	    }
1082	case SIOCSIFFLAGS:
1083		IOCTL_DEBUGOUT("ioctl rcv'd:\
1084		    SIOCSIFFLAGS (Set Interface Flags)");
1085		IGB_CORE_LOCK(adapter);
1086		if (ifp->if_flags & IFF_UP) {
1087			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1088				if ((ifp->if_flags ^ adapter->if_flags) &
1089				    (IFF_PROMISC | IFF_ALLMULTI)) {
1090					igb_disable_promisc(adapter);
1091					igb_set_promisc(adapter);
1092				}
1093			} else
1094				igb_init_locked(adapter);
1095		} else
1096			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1097				igb_stop(adapter);
1098		adapter->if_flags = ifp->if_flags;
1099		IGB_CORE_UNLOCK(adapter);
1100		break;
1101	case SIOCADDMULTI:
1102	case SIOCDELMULTI:
1103		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1104		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1105			IGB_CORE_LOCK(adapter);
1106			igb_disable_intr(adapter);
1107			igb_set_multi(adapter);
1108#ifdef DEVICE_POLLING
1109			if (!(ifp->if_capenable & IFCAP_POLLING))
1110#endif
1111				igb_enable_intr(adapter);
1112			IGB_CORE_UNLOCK(adapter);
1113		}
1114		break;
1115	case SIOCSIFMEDIA:
1116		/*
1117		** As the speed/duplex settings are being
1118		** changed, we need toreset the PHY.
1119		*/
1120		adapter->hw.phy.reset_disable = FALSE;
1121		/* Check SOL/IDER usage */
1122		IGB_CORE_LOCK(adapter);
1123		if (e1000_check_reset_block(&adapter->hw)) {
1124			IGB_CORE_UNLOCK(adapter);
1125			device_printf(adapter->dev, "Media change is"
1126			    " blocked due to SOL/IDER session.\n");
1127			break;
1128		}
1129		IGB_CORE_UNLOCK(adapter);
1130	case SIOCGIFMEDIA:
1131		IOCTL_DEBUGOUT("ioctl rcv'd: \
1132		    SIOCxIFMEDIA (Get/Set Interface Media)");
1133		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1134		break;
1135	case SIOCSIFCAP:
1136	    {
1137		int mask, reinit;
1138
1139		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1140		reinit = 0;
1141		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1142#ifdef DEVICE_POLLING
1143		if (mask & IFCAP_POLLING) {
1144			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1145				error = ether_poll_register(igb_poll, ifp);
1146				if (error)
1147					return (error);
1148				IGB_CORE_LOCK(adapter);
1149				igb_disable_intr(adapter);
1150				ifp->if_capenable |= IFCAP_POLLING;
1151				IGB_CORE_UNLOCK(adapter);
1152			} else {
1153				error = ether_poll_deregister(ifp);
1154				/* Enable interrupt even in error case */
1155				IGB_CORE_LOCK(adapter);
1156				igb_enable_intr(adapter);
1157				ifp->if_capenable &= ~IFCAP_POLLING;
1158				IGB_CORE_UNLOCK(adapter);
1159			}
1160		}
1161#endif
1162		if (mask & IFCAP_HWCSUM) {
1163			ifp->if_capenable ^= IFCAP_HWCSUM;
1164			reinit = 1;
1165		}
1166		if (mask & IFCAP_TSO4) {
1167			ifp->if_capenable ^= IFCAP_TSO4;
1168			reinit = 1;
1169		}
1170		if (mask & IFCAP_VLAN_HWTAGGING) {
1171			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1172			reinit = 1;
1173		}
1174		if (mask & IFCAP_VLAN_HWFILTER) {
1175			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1176			reinit = 1;
1177		}
1178		if (mask & IFCAP_LRO) {
1179			ifp->if_capenable ^= IFCAP_LRO;
1180			reinit = 1;
1181		}
1182		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1183			igb_init(adapter);
1184		VLAN_CAPABILITIES(ifp);
1185		break;
1186	    }
1187
1188	default:
1189		error = ether_ioctl(ifp, command, data);
1190		break;
1191	}
1192
1193	return (error);
1194}
1195
1196
1197/*********************************************************************
1198 *  Init entry point
1199 *
1200 *  This routine is used in two ways. It is used by the stack as
1201 *  init entry point in network interface structure. It is also used
1202 *  by the driver as a hw/sw initialization routine to get to a
1203 *  consistent state.
1204 *
1205 *  return 0 on success, positive on failure
1206 **********************************************************************/
1207
1208static void
1209igb_init_locked(struct adapter *adapter)
1210{
1211	struct ifnet	*ifp = adapter->ifp;
1212	device_t	dev = adapter->dev;
1213
1214	INIT_DEBUGOUT("igb_init: begin");
1215
1216	IGB_CORE_LOCK_ASSERT(adapter);
1217
1218	igb_disable_intr(adapter);
1219	callout_stop(&adapter->timer);
1220
1221	/* Get the latest mac address, User can use a LAA */
1222        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1223              ETHER_ADDR_LEN);
1224
1225	/* Put the address into the Receive Address Array */
1226	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1227
1228	igb_reset(adapter);
1229	igb_update_link_status(adapter);
1230
1231	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1232
1233	/* Set hardware offload abilities */
1234	ifp->if_hwassist = 0;
1235	if (ifp->if_capenable & IFCAP_TXCSUM) {
1236		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1237#if __FreeBSD_version >= 800000
1238		if (adapter->hw.mac.type == e1000_82576)
1239			ifp->if_hwassist |= CSUM_SCTP;
1240#endif
1241	}
1242
1243	if (ifp->if_capenable & IFCAP_TSO4)
1244		ifp->if_hwassist |= CSUM_TSO;
1245
1246	/* Configure for OS presence */
1247	igb_init_manageability(adapter);
1248
1249	/* Prepare transmit descriptors and buffers */
1250	igb_setup_transmit_structures(adapter);
1251	igb_initialize_transmit_units(adapter);
1252
1253	/* Setup Multicast table */
1254	igb_set_multi(adapter);
1255
1256	/*
1257	** Figure out the desired mbuf pool
1258	** for doing jumbo/packetsplit
1259	*/
1260	if (adapter->max_frame_size <= 2048)
1261		adapter->rx_mbuf_sz = MCLBYTES;
1262	else if (adapter->max_frame_size <= 4096)
1263		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1264	else
1265		adapter->rx_mbuf_sz = MJUM9BYTES;
1266
1267	/* Prepare receive descriptors and buffers */
1268	if (igb_setup_receive_structures(adapter)) {
1269		device_printf(dev, "Could not setup receive structures\n");
1270		return;
1271	}
1272	igb_initialize_receive_units(adapter);
1273
1274        /* Enable VLAN support */
1275	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1276		igb_setup_vlan_hw_support(adapter);
1277
1278	/* Don't lose promiscuous settings */
1279	igb_set_promisc(adapter);
1280
1281	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1282	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1283
1284	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1285	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1286
1287	if (adapter->msix > 1) /* Set up queue routing */
1288		igb_configure_queues(adapter);
1289
1290	/* this clears any pending interrupts */
1291	E1000_READ_REG(&adapter->hw, E1000_ICR);
1292#ifdef DEVICE_POLLING
1293	/*
1294	 * Only enable interrupts if we are not polling, make sure
1295	 * they are off otherwise.
1296	 */
1297	if (ifp->if_capenable & IFCAP_POLLING)
1298		igb_disable_intr(adapter);
1299	else
1300#endif /* DEVICE_POLLING */
1301	{
1302		igb_enable_intr(adapter);
1303		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1304	}
1305
1306	/* Set Energy Efficient Ethernet */
1307	e1000_set_eee_i350(&adapter->hw);
1308
1309	/* Don't reset the phy next time init gets called */
1310	adapter->hw.phy.reset_disable = TRUE;
1311}
1312
1313static void
1314igb_init(void *arg)
1315{
1316	struct adapter *adapter = arg;
1317
1318	IGB_CORE_LOCK(adapter);
1319	igb_init_locked(adapter);
1320	IGB_CORE_UNLOCK(adapter);
1321}
1322
1323
1324static void
1325igb_handle_que(void *context, int pending)
1326{
1327	struct igb_queue *que = context;
1328	struct adapter *adapter = que->adapter;
1329	struct tx_ring *txr = que->txr;
1330	struct ifnet	*ifp = adapter->ifp;
1331
1332	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1333		bool	more;
1334
1335		more = igb_rxeof(que, -1, NULL);
1336
1337		IGB_TX_LOCK(txr);
1338		if (igb_txeof(txr))
1339			more = TRUE;
1340#if __FreeBSD_version >= 800000
1341		if (!drbr_empty(ifp, txr->br))
1342			igb_mq_start_locked(ifp, txr, NULL);
1343#else
1344		igb_start_locked(txr, ifp);
1345#endif
1346		IGB_TX_UNLOCK(txr);
1347		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1348			taskqueue_enqueue(que->tq, &que->que_task);
1349			return;
1350		}
1351	}
1352
1353#ifdef DEVICE_POLLING
1354	if (ifp->if_capenable & IFCAP_POLLING)
1355		return;
1356#endif
1357	/* Reenable this interrupt */
1358	if (que->eims)
1359		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1360	else
1361		igb_enable_intr(adapter);
1362}
1363
1364/* Deal with link in a sleepable context */
1365static void
1366igb_handle_link(void *context, int pending)
1367{
1368	struct adapter *adapter = context;
1369
1370	adapter->hw.mac.get_link_status = 1;
1371	igb_update_link_status(adapter);
1372}
1373
1374/*********************************************************************
1375 *
1376 *  MSI/Legacy Deferred
1377 *  Interrupt Service routine
1378 *
1379 *********************************************************************/
1380static int
1381igb_irq_fast(void *arg)
1382{
1383	struct adapter		*adapter = arg;
1384	struct igb_queue	*que = adapter->queues;
1385	u32			reg_icr;
1386
1387
1388	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1389
1390	/* Hot eject?  */
1391	if (reg_icr == 0xffffffff)
1392		return FILTER_STRAY;
1393
1394	/* Definitely not our interrupt.  */
1395	if (reg_icr == 0x0)
1396		return FILTER_STRAY;
1397
1398	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1399		return FILTER_STRAY;
1400
1401	/*
1402	 * Mask interrupts until the taskqueue is finished running.  This is
1403	 * cheap, just assume that it is needed.  This also works around the
1404	 * MSI message reordering errata on certain systems.
1405	 */
1406	igb_disable_intr(adapter);
1407	taskqueue_enqueue(que->tq, &que->que_task);
1408
1409	/* Link status change */
1410	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1411		taskqueue_enqueue(que->tq, &adapter->link_task);
1412
1413	if (reg_icr & E1000_ICR_RXO)
1414		adapter->rx_overruns++;
1415	return FILTER_HANDLED;
1416}
1417
1418#ifdef DEVICE_POLLING
1419/*********************************************************************
1420 *
1421 *  Legacy polling routine : if using this code you MUST be sure that
1422 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1423 *
1424 *********************************************************************/
1425#if __FreeBSD_version >= 800000
1426#define POLL_RETURN_COUNT(a) (a)
1427static int
1428#else
1429#define POLL_RETURN_COUNT(a)
1430static void
1431#endif
1432igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1433{
1434	struct adapter		*adapter = ifp->if_softc;
1435	struct igb_queue	*que = adapter->queues;
1436	struct tx_ring		*txr = adapter->tx_rings;
1437	u32			reg_icr, rx_done = 0;
1438	u32			loop = IGB_MAX_LOOP;
1439	bool			more;
1440
1441	IGB_CORE_LOCK(adapter);
1442	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1443		IGB_CORE_UNLOCK(adapter);
1444		return POLL_RETURN_COUNT(rx_done);
1445	}
1446
1447	if (cmd == POLL_AND_CHECK_STATUS) {
1448		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1449		/* Link status change */
1450		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1451			igb_handle_link(adapter, 0);
1452
1453		if (reg_icr & E1000_ICR_RXO)
1454			adapter->rx_overruns++;
1455	}
1456	IGB_CORE_UNLOCK(adapter);
1457
1458	igb_rxeof(que, count, &rx_done);
1459
1460	IGB_TX_LOCK(txr);
1461	do {
1462		more = igb_txeof(txr);
1463	} while (loop-- && more);
1464#if __FreeBSD_version >= 800000
1465	if (!drbr_empty(ifp, txr->br))
1466		igb_mq_start_locked(ifp, txr, NULL);
1467#else
1468	igb_start_locked(txr, ifp);
1469#endif
1470	IGB_TX_UNLOCK(txr);
1471	return POLL_RETURN_COUNT(rx_done);
1472}
1473#endif /* DEVICE_POLLING */
1474
1475/*********************************************************************
1476 *
1477 *  MSIX TX Interrupt Service routine
1478 *
1479 **********************************************************************/
1480static void
1481igb_msix_que(void *arg)
1482{
1483	struct igb_queue *que = arg;
1484	struct adapter *adapter = que->adapter;
1485	struct tx_ring *txr = que->txr;
1486	struct rx_ring *rxr = que->rxr;
1487	u32		newitr = 0;
1488	bool		more_tx, more_rx;
1489
1490	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1491	++que->irqs;
1492
1493	IGB_TX_LOCK(txr);
1494	more_tx = igb_txeof(txr);
1495	IGB_TX_UNLOCK(txr);
1496
1497	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1498
1499	if (igb_enable_aim == FALSE)
1500		goto no_calc;
1501	/*
1502	** Do Adaptive Interrupt Moderation:
1503        **  - Write out last calculated setting
1504	**  - Calculate based on average size over
1505	**    the last interval.
1506	*/
1507        if (que->eitr_setting)
1508                E1000_WRITE_REG(&adapter->hw,
1509                    E1000_EITR(que->msix), que->eitr_setting);
1510
1511        que->eitr_setting = 0;
1512
1513        /* Idle, do nothing */
1514        if ((txr->bytes == 0) && (rxr->bytes == 0))
1515                goto no_calc;
1516
1517        /* Used half Default if sub-gig */
1518        if (adapter->link_speed != 1000)
1519                newitr = IGB_DEFAULT_ITR / 2;
1520        else {
1521		if ((txr->bytes) && (txr->packets))
1522                	newitr = txr->bytes/txr->packets;
1523		if ((rxr->bytes) && (rxr->packets))
1524			newitr = max(newitr,
1525			    (rxr->bytes / rxr->packets));
1526                newitr += 24; /* account for hardware frame, crc */
1527		/* set an upper boundary */
1528		newitr = min(newitr, 3000);
1529		/* Be nice to the mid range */
1530                if ((newitr > 300) && (newitr < 1200))
1531                        newitr = (newitr / 3);
1532                else
1533                        newitr = (newitr / 2);
1534        }
1535        newitr &= 0x7FFC;  /* Mask invalid bits */
1536        if (adapter->hw.mac.type == e1000_82575)
1537                newitr |= newitr << 16;
1538        else
1539                newitr |= E1000_EITR_CNT_IGNR;
1540
1541        /* save for next interrupt */
1542        que->eitr_setting = newitr;
1543
1544        /* Reset state */
1545        txr->bytes = 0;
1546        txr->packets = 0;
1547        rxr->bytes = 0;
1548        rxr->packets = 0;
1549
1550no_calc:
1551	/* Schedule a clean task if needed*/
1552	if (more_tx || more_rx ||
1553	    (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE))
1554		taskqueue_enqueue(que->tq, &que->que_task);
1555	else
1556		/* Reenable this interrupt */
1557		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1558	return;
1559}
1560
1561
1562/*********************************************************************
1563 *
1564 *  MSIX Link Interrupt Service routine
1565 *
1566 **********************************************************************/
1567
1568static void
1569igb_msix_link(void *arg)
1570{
1571	struct adapter	*adapter = arg;
1572	u32       	icr;
1573
1574	++adapter->link_irq;
1575	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1576	if (!(icr & E1000_ICR_LSC))
1577		goto spurious;
1578	igb_handle_link(adapter, 0);
1579
1580spurious:
1581	/* Rearm */
1582	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1583	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1584	return;
1585}
1586
1587
1588/*********************************************************************
1589 *
1590 *  Media Ioctl callback
1591 *
1592 *  This routine is called whenever the user queries the status of
1593 *  the interface using ifconfig.
1594 *
1595 **********************************************************************/
1596static void
1597igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1598{
1599	struct adapter *adapter = ifp->if_softc;
1600	u_char fiber_type = IFM_1000_SX;
1601
1602	INIT_DEBUGOUT("igb_media_status: begin");
1603
1604	IGB_CORE_LOCK(adapter);
1605	igb_update_link_status(adapter);
1606
1607	ifmr->ifm_status = IFM_AVALID;
1608	ifmr->ifm_active = IFM_ETHER;
1609
1610	if (!adapter->link_active) {
1611		IGB_CORE_UNLOCK(adapter);
1612		return;
1613	}
1614
1615	ifmr->ifm_status |= IFM_ACTIVE;
1616
1617	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1618	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1619		ifmr->ifm_active |= fiber_type | IFM_FDX;
1620	else {
1621		switch (adapter->link_speed) {
1622		case 10:
1623			ifmr->ifm_active |= IFM_10_T;
1624			break;
1625		case 100:
1626			ifmr->ifm_active |= IFM_100_TX;
1627			break;
1628		case 1000:
1629			ifmr->ifm_active |= IFM_1000_T;
1630			break;
1631		}
1632		if (adapter->link_duplex == FULL_DUPLEX)
1633			ifmr->ifm_active |= IFM_FDX;
1634		else
1635			ifmr->ifm_active |= IFM_HDX;
1636	}
1637	IGB_CORE_UNLOCK(adapter);
1638}
1639
1640/*********************************************************************
1641 *
1642 *  Media Ioctl callback
1643 *
1644 *  This routine is called when the user changes speed/duplex using
1645 *  media/mediopt option with ifconfig.
1646 *
1647 **********************************************************************/
1648static int
1649igb_media_change(struct ifnet *ifp)
1650{
1651	struct adapter *adapter = ifp->if_softc;
1652	struct ifmedia  *ifm = &adapter->media;
1653
1654	INIT_DEBUGOUT("igb_media_change: begin");
1655
1656	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1657		return (EINVAL);
1658
1659	IGB_CORE_LOCK(adapter);
1660	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1661	case IFM_AUTO:
1662		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1663		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1664		break;
1665	case IFM_1000_LX:
1666	case IFM_1000_SX:
1667	case IFM_1000_T:
1668		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1669		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1670		break;
1671	case IFM_100_TX:
1672		adapter->hw.mac.autoneg = FALSE;
1673		adapter->hw.phy.autoneg_advertised = 0;
1674		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1675			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1676		else
1677			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1678		break;
1679	case IFM_10_T:
1680		adapter->hw.mac.autoneg = FALSE;
1681		adapter->hw.phy.autoneg_advertised = 0;
1682		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1683			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1684		else
1685			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1686		break;
1687	default:
1688		device_printf(adapter->dev, "Unsupported media type\n");
1689	}
1690
1691	igb_init_locked(adapter);
1692	IGB_CORE_UNLOCK(adapter);
1693
1694	return (0);
1695}
1696
1697
1698/*********************************************************************
1699 *
1700 *  This routine maps the mbufs to Advanced TX descriptors.
1701 *  used by the 82575 adapter.
1702 *
1703 **********************************************************************/
1704
1705static int
1706igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1707{
1708	struct adapter		*adapter = txr->adapter;
1709	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1710	bus_dmamap_t		map;
1711	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1712	union e1000_adv_tx_desc	*txd = NULL;
1713	struct mbuf		*m_head;
1714	u32			olinfo_status = 0, cmd_type_len = 0;
1715	int			nsegs, i, j, error, first, last = 0;
1716	u32			hdrlen = 0;
1717
1718	m_head = *m_headp;
1719
1720
1721	/* Set basic descriptor constants */
1722	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1723	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1724	if (m_head->m_flags & M_VLANTAG)
1725		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1726
1727	/*
1728         * Map the packet for DMA.
1729	 *
1730	 * Capture the first descriptor index,
1731	 * this descriptor will have the index
1732	 * of the EOP which is the only one that
1733	 * now gets a DONE bit writeback.
1734	 */
1735	first = txr->next_avail_desc;
1736	tx_buffer = &txr->tx_buffers[first];
1737	tx_buffer_mapped = tx_buffer;
1738	map = tx_buffer->map;
1739
1740	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1741	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1742
1743	if (error == EFBIG) {
1744		struct mbuf *m;
1745
1746		m = m_defrag(*m_headp, M_DONTWAIT);
1747		if (m == NULL) {
1748			adapter->mbuf_defrag_failed++;
1749			m_freem(*m_headp);
1750			*m_headp = NULL;
1751			return (ENOBUFS);
1752		}
1753		*m_headp = m;
1754
1755		/* Try it again */
1756		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1757		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1758
1759		if (error == ENOMEM) {
1760			adapter->no_tx_dma_setup++;
1761			return (error);
1762		} else if (error != 0) {
1763			adapter->no_tx_dma_setup++;
1764			m_freem(*m_headp);
1765			*m_headp = NULL;
1766			return (error);
1767		}
1768	} else if (error == ENOMEM) {
1769		adapter->no_tx_dma_setup++;
1770		return (error);
1771	} else if (error != 0) {
1772		adapter->no_tx_dma_setup++;
1773		m_freem(*m_headp);
1774		*m_headp = NULL;
1775		return (error);
1776	}
1777
1778	/* Check again to be sure we have enough descriptors */
1779        if (nsegs > (txr->tx_avail - 2)) {
1780                txr->no_desc_avail++;
1781		bus_dmamap_unload(txr->txtag, map);
1782		return (ENOBUFS);
1783        }
1784	m_head = *m_headp;
1785
1786        /*
1787         * Set up the context descriptor:
1788         * used when any hardware offload is done.
1789	 * This includes CSUM, VLAN, and TSO. It
1790	 * will use the first descriptor.
1791         */
1792        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1793		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1794			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1795			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1796			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1797		} else
1798			return (ENXIO);
1799	} else if (igb_tx_ctx_setup(txr, m_head))
1800		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1801
1802	/* Calculate payload length */
1803	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1804	    << E1000_ADVTXD_PAYLEN_SHIFT);
1805
1806	/* 82575 needs the queue index added */
1807	if (adapter->hw.mac.type == e1000_82575)
1808		olinfo_status |= txr->me << 4;
1809
1810	/* Set up our transmit descriptors */
1811	i = txr->next_avail_desc;
1812	for (j = 0; j < nsegs; j++) {
1813		bus_size_t seg_len;
1814		bus_addr_t seg_addr;
1815
1816		tx_buffer = &txr->tx_buffers[i];
1817		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1818		seg_addr = segs[j].ds_addr;
1819		seg_len  = segs[j].ds_len;
1820
1821		txd->read.buffer_addr = htole64(seg_addr);
1822		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1823		txd->read.olinfo_status = htole32(olinfo_status);
1824		last = i;
1825		if (++i == adapter->num_tx_desc)
1826			i = 0;
1827		tx_buffer->m_head = NULL;
1828		tx_buffer->next_eop = -1;
1829	}
1830
1831	txr->next_avail_desc = i;
1832	txr->tx_avail -= nsegs;
1833
1834        tx_buffer->m_head = m_head;
1835	tx_buffer_mapped->map = tx_buffer->map;
1836	tx_buffer->map = map;
1837        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1838
1839        /*
1840         * Last Descriptor of Packet
1841	 * needs End Of Packet (EOP)
1842	 * and Report Status (RS)
1843         */
1844        txd->read.cmd_type_len |=
1845	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1846	/*
1847	 * Keep track in the first buffer which
1848	 * descriptor will be written back
1849	 */
1850	tx_buffer = &txr->tx_buffers[first];
1851	tx_buffer->next_eop = last;
1852	txr->watchdog_time = ticks;
1853
1854	/*
1855	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1856	 * that this frame is available to transmit.
1857	 */
1858	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1859	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1860	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1861	++txr->tx_packets;
1862
1863	return (0);
1864
1865}
1866
1867static void
1868igb_set_promisc(struct adapter *adapter)
1869{
1870	struct ifnet	*ifp = adapter->ifp;
1871	struct e1000_hw *hw = &adapter->hw;
1872	u32		reg;
1873
1874	if (adapter->vf_ifp) {
1875		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1876		return;
1877	}
1878
1879	reg = E1000_READ_REG(hw, E1000_RCTL);
1880	if (ifp->if_flags & IFF_PROMISC) {
1881		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1882		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1883	} else if (ifp->if_flags & IFF_ALLMULTI) {
1884		reg |= E1000_RCTL_MPE;
1885		reg &= ~E1000_RCTL_UPE;
1886		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1887	}
1888}
1889
1890static void
1891igb_disable_promisc(struct adapter *adapter)
1892{
1893	struct e1000_hw *hw = &adapter->hw;
1894	u32		reg;
1895
1896	if (adapter->vf_ifp) {
1897		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1898		return;
1899	}
1900	reg = E1000_READ_REG(hw, E1000_RCTL);
1901	reg &=  (~E1000_RCTL_UPE);
1902	reg &=  (~E1000_RCTL_MPE);
1903	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1904}
1905
1906
1907/*********************************************************************
1908 *  Multicast Update
1909 *
1910 *  This routine is called whenever multicast address list is updated.
1911 *
1912 **********************************************************************/
1913
1914static void
1915igb_set_multi(struct adapter *adapter)
1916{
1917	struct ifnet	*ifp = adapter->ifp;
1918	struct ifmultiaddr *ifma;
1919	u32 reg_rctl = 0;
1920	u8  *mta;
1921
1922	int mcnt = 0;
1923
1924	IOCTL_DEBUGOUT("igb_set_multi: begin");
1925
1926	mta = adapter->mta;
1927	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
1928	    MAX_NUM_MULTICAST_ADDRESSES);
1929
1930#if __FreeBSD_version < 800000
1931	IF_ADDR_LOCK(ifp);
1932#else
1933	if_maddr_rlock(ifp);
1934#endif
1935	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1936		if (ifma->ifma_addr->sa_family != AF_LINK)
1937			continue;
1938
1939		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1940			break;
1941
1942		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1943		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1944		mcnt++;
1945	}
1946#if __FreeBSD_version < 800000
1947	IF_ADDR_UNLOCK(ifp);
1948#else
1949	if_maddr_runlock(ifp);
1950#endif
1951
1952	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1953		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1954		reg_rctl |= E1000_RCTL_MPE;
1955		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1956	} else
1957		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1958}
1959
1960
1961/*********************************************************************
1962 *  Timer routine:
1963 *  	This routine checks for link status,
1964 *	updates statistics, and does the watchdog.
1965 *
1966 **********************************************************************/
1967
1968static void
1969igb_local_timer(void *arg)
1970{
1971	struct adapter		*adapter = arg;
1972	device_t		dev = adapter->dev;
1973	struct tx_ring		*txr = adapter->tx_rings;
1974
1975
1976	IGB_CORE_LOCK_ASSERT(adapter);
1977
1978	igb_update_link_status(adapter);
1979	igb_update_stats_counters(adapter);
1980
1981	/*
1982	** If flow control has paused us since last checking
1983	** it invalidates the watchdog timing, so dont run it.
1984	*/
1985	if (adapter->pause_frames) {
1986		adapter->pause_frames = 0;
1987		goto out;
1988	}
1989
1990        /*
1991        ** Watchdog: check for time since any descriptor was cleaned
1992        */
1993	for (int i = 0; i < adapter->num_queues; i++, txr++)
1994		if (txr->queue_status == IGB_QUEUE_HUNG)
1995			goto timeout;
1996out:
1997	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1998#ifndef DEVICE_POLLING
1999	/* Schedule all queue interrupts - deadlock protection */
2000	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2001#endif
2002	return;
2003
2004timeout:
2005	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2006	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2007            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2008            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2009	device_printf(dev,"TX(%d) desc avail = %d,"
2010            "Next TX to Clean = %d\n",
2011            txr->me, txr->tx_avail, txr->next_to_clean);
2012	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2013	adapter->watchdog_events++;
2014	igb_init_locked(adapter);
2015}
2016
2017static void
2018igb_update_link_status(struct adapter *adapter)
2019{
2020	struct e1000_hw *hw = &adapter->hw;
2021	struct ifnet *ifp = adapter->ifp;
2022	device_t dev = adapter->dev;
2023	struct tx_ring *txr = adapter->tx_rings;
2024	u32 link_check, thstat, ctrl;
2025
2026	link_check = thstat = ctrl = 0;
2027
2028	/* Get the cached link value or read for real */
2029        switch (hw->phy.media_type) {
2030        case e1000_media_type_copper:
2031                if (hw->mac.get_link_status) {
2032			/* Do the work to read phy */
2033                        e1000_check_for_link(hw);
2034                        link_check = !hw->mac.get_link_status;
2035                } else
2036                        link_check = TRUE;
2037                break;
2038        case e1000_media_type_fiber:
2039                e1000_check_for_link(hw);
2040                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2041                                 E1000_STATUS_LU);
2042                break;
2043        case e1000_media_type_internal_serdes:
2044                e1000_check_for_link(hw);
2045                link_check = adapter->hw.mac.serdes_has_link;
2046                break;
2047	/* VF device is type_unknown */
2048        case e1000_media_type_unknown:
2049                e1000_check_for_link(hw);
2050		link_check = !hw->mac.get_link_status;
2051		/* Fall thru */
2052        default:
2053                break;
2054        }
2055
2056	/* Check for thermal downshift or shutdown */
2057	if (hw->mac.type == e1000_i350) {
2058		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2059		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2060	}
2061
2062	/* Now we check if a transition has happened */
2063	if (link_check && (adapter->link_active == 0)) {
2064		e1000_get_speed_and_duplex(&adapter->hw,
2065		    &adapter->link_speed, &adapter->link_duplex);
2066		if (bootverbose)
2067			device_printf(dev, "Link is up %d Mbps %s\n",
2068			    adapter->link_speed,
2069			    ((adapter->link_duplex == FULL_DUPLEX) ?
2070			    "Full Duplex" : "Half Duplex"));
2071		adapter->link_active = 1;
2072		ifp->if_baudrate = adapter->link_speed * 1000000;
2073		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2074		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2075			device_printf(dev, "Link: thermal downshift\n");
2076		/* This can sleep */
2077		if_link_state_change(ifp, LINK_STATE_UP);
2078	} else if (!link_check && (adapter->link_active == 1)) {
2079		ifp->if_baudrate = adapter->link_speed = 0;
2080		adapter->link_duplex = 0;
2081		if (bootverbose)
2082			device_printf(dev, "Link is Down\n");
2083		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2084		    (thstat & E1000_THSTAT_PWR_DOWN))
2085			device_printf(dev, "Link: thermal shutdown\n");
2086		adapter->link_active = 0;
2087		/* This can sleep */
2088		if_link_state_change(ifp, LINK_STATE_DOWN);
2089		/* Turn off watchdogs */
2090		for (int i = 0; i < adapter->num_queues; i++, txr++)
2091			txr->queue_status = IGB_QUEUE_IDLE;
2092	}
2093}
2094
2095/*********************************************************************
2096 *
2097 *  This routine disables all traffic on the adapter by issuing a
2098 *  global reset on the MAC and deallocates TX/RX buffers.
2099 *
2100 **********************************************************************/
2101
2102static void
2103igb_stop(void *arg)
2104{
2105	struct adapter	*adapter = arg;
2106	struct ifnet	*ifp = adapter->ifp;
2107	struct tx_ring *txr = adapter->tx_rings;
2108
2109	IGB_CORE_LOCK_ASSERT(adapter);
2110
2111	INIT_DEBUGOUT("igb_stop: begin");
2112
2113	igb_disable_intr(adapter);
2114
2115	callout_stop(&adapter->timer);
2116
2117	/* Tell the stack that the interface is no longer active */
2118	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2119
2120	/* Unarm watchdog timer. */
2121	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2122		IGB_TX_LOCK(txr);
2123		txr->queue_status = IGB_QUEUE_IDLE;
2124		IGB_TX_UNLOCK(txr);
2125	}
2126
2127	e1000_reset_hw(&adapter->hw);
2128	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2129
2130	e1000_led_off(&adapter->hw);
2131	e1000_cleanup_led(&adapter->hw);
2132}
2133
2134
2135/*********************************************************************
2136 *
2137 *  Determine hardware revision.
2138 *
2139 **********************************************************************/
2140static void
2141igb_identify_hardware(struct adapter *adapter)
2142{
2143	device_t dev = adapter->dev;
2144
2145	/* Make sure our PCI config space has the necessary stuff set */
2146	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2147	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2148	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2149		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2150		    "bits were not set!\n");
2151		adapter->hw.bus.pci_cmd_word |=
2152		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2153		pci_write_config(dev, PCIR_COMMAND,
2154		    adapter->hw.bus.pci_cmd_word, 2);
2155	}
2156
2157	/* Save off the information about this board */
2158	adapter->hw.vendor_id = pci_get_vendor(dev);
2159	adapter->hw.device_id = pci_get_device(dev);
2160	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2161	adapter->hw.subsystem_vendor_id =
2162	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2163	adapter->hw.subsystem_device_id =
2164	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2165
2166	/* Set MAC type early for PCI setup */
2167	e1000_set_mac_type(&adapter->hw);
2168
2169	/* Are we a VF device? */
2170	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2171	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2172		adapter->vf_ifp = 1;
2173	else
2174		adapter->vf_ifp = 0;
2175}
2176
2177static int
2178igb_allocate_pci_resources(struct adapter *adapter)
2179{
2180	device_t	dev = adapter->dev;
2181	int		rid;
2182
2183	rid = PCIR_BAR(0);
2184	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2185	    &rid, RF_ACTIVE);
2186	if (adapter->pci_mem == NULL) {
2187		device_printf(dev, "Unable to allocate bus resource: memory\n");
2188		return (ENXIO);
2189	}
2190	adapter->osdep.mem_bus_space_tag =
2191	    rman_get_bustag(adapter->pci_mem);
2192	adapter->osdep.mem_bus_space_handle =
2193	    rman_get_bushandle(adapter->pci_mem);
2194	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2195
2196	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2197
2198	/* This will setup either MSI/X or MSI */
2199	adapter->msix = igb_setup_msix(adapter);
2200	adapter->hw.back = &adapter->osdep;
2201
2202	return (0);
2203}
2204
2205/*********************************************************************
2206 *
2207 *  Setup the Legacy or MSI Interrupt handler
2208 *
2209 **********************************************************************/
2210static int
2211igb_allocate_legacy(struct adapter *adapter)
2212{
2213	device_t		dev = adapter->dev;
2214	struct igb_queue	*que = adapter->queues;
2215	struct tx_ring		*txr = adapter->tx_rings;
2216	int			error, rid = 0;
2217
2218	/* Turn off all interrupts */
2219	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2220
2221	/* MSI RID is 1 */
2222	if (adapter->msix == 1)
2223		rid = 1;
2224
2225	/* We allocate a single interrupt resource */
2226	adapter->res = bus_alloc_resource_any(dev,
2227	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2228	if (adapter->res == NULL) {
2229		device_printf(dev, "Unable to allocate bus resource: "
2230		    "interrupt\n");
2231		return (ENXIO);
2232	}
2233
2234#if __FreeBSD_version >= 800000
2235	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2236#endif
2237
2238	/*
2239	 * Try allocating a fast interrupt and the associated deferred
2240	 * processing contexts.
2241	 */
2242	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2243	/* Make tasklet for deferred link handling */
2244	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2245	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2246	    taskqueue_thread_enqueue, &que->tq);
2247	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2248	    device_get_nameunit(adapter->dev));
2249	if ((error = bus_setup_intr(dev, adapter->res,
2250	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2251	    adapter, &adapter->tag)) != 0) {
2252		device_printf(dev, "Failed to register fast interrupt "
2253			    "handler: %d\n", error);
2254		taskqueue_free(que->tq);
2255		que->tq = NULL;
2256		return (error);
2257	}
2258
2259	return (0);
2260}
2261
2262
2263/*********************************************************************
2264 *
2265 *  Setup the MSIX Queue Interrupt handlers:
2266 *
2267 **********************************************************************/
2268static int
2269igb_allocate_msix(struct adapter *adapter)
2270{
2271	device_t		dev = adapter->dev;
2272	struct igb_queue	*que = adapter->queues;
2273	int			error, rid, vector = 0;
2274
2275
2276	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2277		rid = vector +1;
2278		que->res = bus_alloc_resource_any(dev,
2279		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2280		if (que->res == NULL) {
2281			device_printf(dev,
2282			    "Unable to allocate bus resource: "
2283			    "MSIX Queue Interrupt\n");
2284			return (ENXIO);
2285		}
2286		error = bus_setup_intr(dev, que->res,
2287	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2288		    igb_msix_que, que, &que->tag);
2289		if (error) {
2290			que->res = NULL;
2291			device_printf(dev, "Failed to register Queue handler");
2292			return (error);
2293		}
2294#if __FreeBSD_version >= 800504
2295		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2296#endif
2297		que->msix = vector;
2298		if (adapter->hw.mac.type == e1000_82575)
2299			que->eims = E1000_EICR_TX_QUEUE0 << i;
2300		else
2301			que->eims = 1 << vector;
2302		/*
2303		** Bind the msix vector, and thus the
2304		** rings to the corresponding cpu.
2305		*/
2306		if (adapter->num_queues > 1)
2307			bus_bind_intr(dev, que->res, i);
2308#if __FreeBSD_version >= 800000
2309		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2310		    que->txr);
2311#endif
2312		/* Make tasklet for deferred handling */
2313		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2314		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2315		    taskqueue_thread_enqueue, &que->tq);
2316		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2317		    device_get_nameunit(adapter->dev));
2318	}
2319
2320	/* And Link */
2321	rid = vector + 1;
2322	adapter->res = bus_alloc_resource_any(dev,
2323	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2324	if (adapter->res == NULL) {
2325		device_printf(dev,
2326		    "Unable to allocate bus resource: "
2327		    "MSIX Link Interrupt\n");
2328		return (ENXIO);
2329	}
2330	if ((error = bus_setup_intr(dev, adapter->res,
2331	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2332	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2333		device_printf(dev, "Failed to register Link handler");
2334		return (error);
2335	}
2336#if __FreeBSD_version >= 800504
2337	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2338#endif
2339	adapter->linkvec = vector;
2340
2341	return (0);
2342}
2343
2344
2345static void
2346igb_configure_queues(struct adapter *adapter)
2347{
2348	struct	e1000_hw	*hw = &adapter->hw;
2349	struct	igb_queue	*que;
2350	u32			tmp, ivar = 0, newitr = 0;
2351
2352	/* First turn on RSS capability */
2353	if (adapter->hw.mac.type != e1000_82575)
2354		E1000_WRITE_REG(hw, E1000_GPIE,
2355		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2356		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2357
2358	/* Turn on MSIX */
2359	switch (adapter->hw.mac.type) {
2360	case e1000_82580:
2361	case e1000_i350:
2362	case e1000_vfadapt:
2363	case e1000_vfadapt_i350:
2364		/* RX entries */
2365		for (int i = 0; i < adapter->num_queues; i++) {
2366			u32 index = i >> 1;
2367			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2368			que = &adapter->queues[i];
2369			if (i & 1) {
2370				ivar &= 0xFF00FFFF;
2371				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2372			} else {
2373				ivar &= 0xFFFFFF00;
2374				ivar |= que->msix | E1000_IVAR_VALID;
2375			}
2376			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2377		}
2378		/* TX entries */
2379		for (int i = 0; i < adapter->num_queues; i++) {
2380			u32 index = i >> 1;
2381			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2382			que = &adapter->queues[i];
2383			if (i & 1) {
2384				ivar &= 0x00FFFFFF;
2385				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2386			} else {
2387				ivar &= 0xFFFF00FF;
2388				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2389			}
2390			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2391			adapter->que_mask |= que->eims;
2392		}
2393
2394		/* And for the link interrupt */
2395		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2396		adapter->link_mask = 1 << adapter->linkvec;
2397		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2398		break;
2399	case e1000_82576:
2400		/* RX entries */
2401		for (int i = 0; i < adapter->num_queues; i++) {
2402			u32 index = i & 0x7; /* Each IVAR has two entries */
2403			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2404			que = &adapter->queues[i];
2405			if (i < 8) {
2406				ivar &= 0xFFFFFF00;
2407				ivar |= que->msix | E1000_IVAR_VALID;
2408			} else {
2409				ivar &= 0xFF00FFFF;
2410				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2411			}
2412			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2413			adapter->que_mask |= que->eims;
2414		}
2415		/* TX entries */
2416		for (int i = 0; i < adapter->num_queues; i++) {
2417			u32 index = i & 0x7; /* Each IVAR has two entries */
2418			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2419			que = &adapter->queues[i];
2420			if (i < 8) {
2421				ivar &= 0xFFFF00FF;
2422				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2423			} else {
2424				ivar &= 0x00FFFFFF;
2425				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2426			}
2427			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2428			adapter->que_mask |= que->eims;
2429		}
2430
2431		/* And for the link interrupt */
2432		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2433		adapter->link_mask = 1 << adapter->linkvec;
2434		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2435		break;
2436
2437	case e1000_82575:
2438                /* enable MSI-X support*/
2439		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2440                tmp |= E1000_CTRL_EXT_PBA_CLR;
2441                /* Auto-Mask interrupts upon ICR read. */
2442                tmp |= E1000_CTRL_EXT_EIAME;
2443                tmp |= E1000_CTRL_EXT_IRCA;
2444                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2445
2446		/* Queues */
2447		for (int i = 0; i < adapter->num_queues; i++) {
2448			que = &adapter->queues[i];
2449			tmp = E1000_EICR_RX_QUEUE0 << i;
2450			tmp |= E1000_EICR_TX_QUEUE0 << i;
2451			que->eims = tmp;
2452			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2453			    i, que->eims);
2454			adapter->que_mask |= que->eims;
2455		}
2456
2457		/* Link */
2458		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2459		    E1000_EIMS_OTHER);
2460		adapter->link_mask |= E1000_EIMS_OTHER;
2461	default:
2462		break;
2463	}
2464
2465	/* Set the starting interrupt rate */
2466	if (igb_max_interrupt_rate > 0)
2467		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2468
2469        if (hw->mac.type == e1000_82575)
2470                newitr |= newitr << 16;
2471        else
2472                newitr |= E1000_EITR_CNT_IGNR;
2473
2474	for (int i = 0; i < adapter->num_queues; i++) {
2475		que = &adapter->queues[i];
2476		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2477	}
2478
2479	return;
2480}
2481
2482
2483static void
2484igb_free_pci_resources(struct adapter *adapter)
2485{
2486	struct		igb_queue *que = adapter->queues;
2487	device_t	dev = adapter->dev;
2488	int		rid;
2489
2490	/*
2491	** There is a slight possibility of a failure mode
2492	** in attach that will result in entering this function
2493	** before interrupt resources have been initialized, and
2494	** in that case we do not want to execute the loops below
2495	** We can detect this reliably by the state of the adapter
2496	** res pointer.
2497	*/
2498	if (adapter->res == NULL)
2499		goto mem;
2500
2501	/*
2502	 * First release all the interrupt resources:
2503	 */
2504	for (int i = 0; i < adapter->num_queues; i++, que++) {
2505		rid = que->msix + 1;
2506		if (que->tag != NULL) {
2507			bus_teardown_intr(dev, que->res, que->tag);
2508			que->tag = NULL;
2509		}
2510		if (que->res != NULL)
2511			bus_release_resource(dev,
2512			    SYS_RES_IRQ, rid, que->res);
2513	}
2514
2515	/* Clean the Legacy or Link interrupt last */
2516	if (adapter->linkvec) /* we are doing MSIX */
2517		rid = adapter->linkvec + 1;
2518	else
2519		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2520
2521	que = adapter->queues;
2522	if (adapter->tag != NULL) {
2523		taskqueue_drain(que->tq, &adapter->link_task);
2524		bus_teardown_intr(dev, adapter->res, adapter->tag);
2525		adapter->tag = NULL;
2526	}
2527	if (adapter->res != NULL)
2528		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2529
2530	for (int i = 0; i < adapter->num_queues; i++, que++) {
2531		if (que->tq != NULL) {
2532#if __FreeBSD_version >= 800000
2533			taskqueue_drain(que->tq, &que->txr->txq_task);
2534#endif
2535			taskqueue_drain(que->tq, &que->que_task);
2536			taskqueue_free(que->tq);
2537		}
2538	}
2539mem:
2540	if (adapter->msix)
2541		pci_release_msi(dev);
2542
2543	if (adapter->msix_mem != NULL)
2544		bus_release_resource(dev, SYS_RES_MEMORY,
2545		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2546
2547	if (adapter->pci_mem != NULL)
2548		bus_release_resource(dev, SYS_RES_MEMORY,
2549		    PCIR_BAR(0), adapter->pci_mem);
2550
2551}
2552
2553/*
2554 * Setup Either MSI/X or MSI
2555 */
2556static int
2557igb_setup_msix(struct adapter *adapter)
2558{
2559	device_t dev = adapter->dev;
2560	int rid, want, queues, msgs;
2561
2562	/* tuneable override */
2563	if (igb_enable_msix == 0)
2564		goto msi;
2565
2566	/* First try MSI/X */
2567	rid = PCIR_BAR(IGB_MSIX_BAR);
2568	adapter->msix_mem = bus_alloc_resource_any(dev,
2569	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2570       	if (!adapter->msix_mem) {
2571		/* May not be enabled */
2572		device_printf(adapter->dev,
2573		    "Unable to map MSIX table \n");
2574		goto msi;
2575	}
2576
2577	msgs = pci_msix_count(dev);
2578	if (msgs == 0) { /* system has msix disabled */
2579		bus_release_resource(dev, SYS_RES_MEMORY,
2580		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2581		adapter->msix_mem = NULL;
2582		goto msi;
2583	}
2584
2585	/* Figure out a reasonable auto config value */
2586	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2587
2588	/* Manual override */
2589	if (igb_num_queues != 0)
2590		queues = igb_num_queues;
2591	if (queues > 8)  /* max queues */
2592		queues = 8;
2593
2594	/* Can have max of 4 queues on 82575 */
2595	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2596		queues = 4;
2597
2598	/* Limit the VF devices to one queue */
2599	if (adapter->vf_ifp)
2600		queues = 1;
2601
2602	/*
2603	** One vector (RX/TX pair) per queue
2604	** plus an additional for Link interrupt
2605	*/
2606	want = queues + 1;
2607	if (msgs >= want)
2608		msgs = want;
2609	else {
2610               	device_printf(adapter->dev,
2611		    "MSIX Configuration Problem, "
2612		    "%d vectors configured, but %d queues wanted!\n",
2613		    msgs, want);
2614		return (ENXIO);
2615	}
2616	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2617               	device_printf(adapter->dev,
2618		    "Using MSIX interrupts with %d vectors\n", msgs);
2619		adapter->num_queues = queues;
2620		return (msgs);
2621	}
2622msi:
2623       	msgs = pci_msi_count(dev);
2624       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2625               	device_printf(adapter->dev,"Using MSI interrupt\n");
2626	return (msgs);
2627}
2628
2629/*********************************************************************
2630 *
2631 *  Set up an fresh starting state
2632 *
2633 **********************************************************************/
2634static void
2635igb_reset(struct adapter *adapter)
2636{
2637	device_t	dev = adapter->dev;
2638	struct e1000_hw *hw = &adapter->hw;
2639	struct e1000_fc_info *fc = &hw->fc;
2640	struct ifnet	*ifp = adapter->ifp;
2641	u32		pba = 0;
2642	u16		hwm;
2643
2644	INIT_DEBUGOUT("igb_reset: begin");
2645
2646	/* Let the firmware know the OS is in control */
2647	igb_get_hw_control(adapter);
2648
2649	/*
2650	 * Packet Buffer Allocation (PBA)
2651	 * Writing PBA sets the receive portion of the buffer
2652	 * the remainder is used for the transmit buffer.
2653	 */
2654	switch (hw->mac.type) {
2655	case e1000_82575:
2656		pba = E1000_PBA_32K;
2657		break;
2658	case e1000_82576:
2659	case e1000_vfadapt:
2660		pba = E1000_READ_REG(hw, E1000_RXPBS);
2661		pba &= E1000_RXPBS_SIZE_MASK_82576;
2662		break;
2663	case e1000_82580:
2664	case e1000_i350:
2665	case e1000_vfadapt_i350:
2666		pba = E1000_READ_REG(hw, E1000_RXPBS);
2667		pba = e1000_rxpbs_adjust_82580(pba);
2668		break;
2669		pba = E1000_PBA_35K;
2670	default:
2671		break;
2672	}
2673
2674	/* Special needs in case of Jumbo frames */
2675	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2676		u32 tx_space, min_tx, min_rx;
2677		pba = E1000_READ_REG(hw, E1000_PBA);
2678		tx_space = pba >> 16;
2679		pba &= 0xffff;
2680		min_tx = (adapter->max_frame_size +
2681		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2682		min_tx = roundup2(min_tx, 1024);
2683		min_tx >>= 10;
2684                min_rx = adapter->max_frame_size;
2685                min_rx = roundup2(min_rx, 1024);
2686                min_rx >>= 10;
2687		if (tx_space < min_tx &&
2688		    ((min_tx - tx_space) < pba)) {
2689			pba = pba - (min_tx - tx_space);
2690			/*
2691                         * if short on rx space, rx wins
2692                         * and must trump tx adjustment
2693			 */
2694                        if (pba < min_rx)
2695                                pba = min_rx;
2696		}
2697		E1000_WRITE_REG(hw, E1000_PBA, pba);
2698	}
2699
2700	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2701
2702	/*
2703	 * These parameters control the automatic generation (Tx) and
2704	 * response (Rx) to Ethernet PAUSE frames.
2705	 * - High water mark should allow for at least two frames to be
2706	 *   received after sending an XOFF.
2707	 * - Low water mark works best when it is very near the high water mark.
2708	 *   This allows the receiver to restart by sending XON when it has
2709	 *   drained a bit.
2710	 */
2711	hwm = min(((pba << 10) * 9 / 10),
2712	    ((pba << 10) - 2 * adapter->max_frame_size));
2713
2714	if (hw->mac.type < e1000_82576) {
2715		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2716		fc->low_water = fc->high_water - 8;
2717	} else {
2718		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2719		fc->low_water = fc->high_water - 16;
2720	}
2721
2722	fc->pause_time = IGB_FC_PAUSE_TIME;
2723	fc->send_xon = TRUE;
2724
2725	/* Issue a global reset */
2726	e1000_reset_hw(hw);
2727	E1000_WRITE_REG(hw, E1000_WUC, 0);
2728
2729	if (e1000_init_hw(hw) < 0)
2730		device_printf(dev, "Hardware Initialization Failed\n");
2731
2732	/* Setup DMA Coalescing */
2733	if ((hw->mac.type == e1000_i350) &&
2734	    (adapter->dma_coalesce == TRUE)) {
2735		u32 reg;
2736
2737		hwm = (pba - 4) << 10;
2738		reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2739		    & E1000_DMACR_DMACTHR_MASK);
2740
2741		/* transition to L0x or L1 if available..*/
2742		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2743
2744		/* timer = +-1000 usec in 32usec intervals */
2745		reg |= (1000 >> 5);
2746		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2747
2748		/* No lower threshold */
2749		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2750
2751		/* set hwm to PBA -  2 * max frame size */
2752		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2753
2754		/* Set the interval before transition */
2755		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2756		reg |= 0x800000FF; /* 255 usec */
2757		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2758
2759		/* free space in tx packet buffer to wake from DMA coal */
2760		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2761		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2762
2763		/* make low power state decision controlled by DMA coal */
2764		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2765		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2766		    reg | E1000_PCIEMISC_LX_DECISION);
2767		device_printf(dev, "DMA Coalescing enabled\n");
2768	}
2769
2770	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2771	e1000_get_phy_info(hw);
2772	e1000_check_for_link(hw);
2773	return;
2774}
2775
2776/*********************************************************************
2777 *
2778 *  Setup networking device structure and register an interface.
2779 *
2780 **********************************************************************/
2781static int
2782igb_setup_interface(device_t dev, struct adapter *adapter)
2783{
2784	struct ifnet   *ifp;
2785
2786	INIT_DEBUGOUT("igb_setup_interface: begin");
2787
2788	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2789	if (ifp == NULL) {
2790		device_printf(dev, "can not allocate ifnet structure\n");
2791		return (-1);
2792	}
2793	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2794	ifp->if_mtu = ETHERMTU;
2795	ifp->if_init =  igb_init;
2796	ifp->if_softc = adapter;
2797	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2798	ifp->if_ioctl = igb_ioctl;
2799#if __FreeBSD_version >= 800000
2800	ifp->if_transmit = igb_mq_start;
2801	ifp->if_qflush = igb_qflush;
2802#else
2803	ifp->if_start = igb_start;
2804#endif
2805	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2806	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2807	IFQ_SET_READY(&ifp->if_snd);
2808
2809	ether_ifattach(ifp, adapter->hw.mac.addr);
2810
2811	ifp->if_capabilities = ifp->if_capenable = 0;
2812
2813	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2814	ifp->if_capabilities |= IFCAP_TSO4;
2815	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2816	ifp->if_capenable = ifp->if_capabilities;
2817
2818	/* Don't enable LRO by default */
2819	ifp->if_capabilities |= IFCAP_LRO;
2820
2821#ifdef DEVICE_POLLING
2822	ifp->if_capabilities |= IFCAP_POLLING;
2823#endif
2824
2825	/*
2826	 * Tell the upper layer(s) we
2827	 * support full VLAN capability.
2828	 */
2829	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2830	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2831	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2832
2833	/*
2834	** Dont turn this on by default, if vlans are
2835	** created on another pseudo device (eg. lagg)
2836	** then vlan events are not passed thru, breaking
2837	** operation, but with HW FILTER off it works. If
2838	** using vlans directly on the em driver you can
2839	** enable this and get full hardware tag filtering.
2840	*/
2841	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2842
2843	/*
2844	 * Specify the media types supported by this adapter and register
2845	 * callbacks to update media and link information
2846	 */
2847	ifmedia_init(&adapter->media, IFM_IMASK,
2848	    igb_media_change, igb_media_status);
2849	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2850	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2851		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2852			    0, NULL);
2853		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2854	} else {
2855		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2856		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2857			    0, NULL);
2858		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2859			    0, NULL);
2860		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2861			    0, NULL);
2862		if (adapter->hw.phy.type != e1000_phy_ife) {
2863			ifmedia_add(&adapter->media,
2864				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2865			ifmedia_add(&adapter->media,
2866				IFM_ETHER | IFM_1000_T, 0, NULL);
2867		}
2868	}
2869	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2870	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2871	return (0);
2872}
2873
2874
2875/*
2876 * Manage DMA'able memory.
2877 */
2878static void
2879igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2880{
2881	if (error)
2882		return;
2883	*(bus_addr_t *) arg = segs[0].ds_addr;
2884}
2885
2886static int
2887igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2888        struct igb_dma_alloc *dma, int mapflags)
2889{
2890	int error;
2891
2892	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2893				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2894				BUS_SPACE_MAXADDR,	/* lowaddr */
2895				BUS_SPACE_MAXADDR,	/* highaddr */
2896				NULL, NULL,		/* filter, filterarg */
2897				size,			/* maxsize */
2898				1,			/* nsegments */
2899				size,			/* maxsegsize */
2900				0,			/* flags */
2901				NULL,			/* lockfunc */
2902				NULL,			/* lockarg */
2903				&dma->dma_tag);
2904	if (error) {
2905		device_printf(adapter->dev,
2906		    "%s: bus_dma_tag_create failed: %d\n",
2907		    __func__, error);
2908		goto fail_0;
2909	}
2910
2911	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2912	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2913	if (error) {
2914		device_printf(adapter->dev,
2915		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2916		    __func__, (uintmax_t)size, error);
2917		goto fail_2;
2918	}
2919
2920	dma->dma_paddr = 0;
2921	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2922	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2923	if (error || dma->dma_paddr == 0) {
2924		device_printf(adapter->dev,
2925		    "%s: bus_dmamap_load failed: %d\n",
2926		    __func__, error);
2927		goto fail_3;
2928	}
2929
2930	return (0);
2931
2932fail_3:
2933	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2934fail_2:
2935	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2936	bus_dma_tag_destroy(dma->dma_tag);
2937fail_0:
2938	dma->dma_map = NULL;
2939	dma->dma_tag = NULL;
2940
2941	return (error);
2942}
2943
2944static void
2945igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2946{
2947	if (dma->dma_tag == NULL)
2948		return;
2949	if (dma->dma_map != NULL) {
2950		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2951		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2952		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2953		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2954		dma->dma_map = NULL;
2955	}
2956	bus_dma_tag_destroy(dma->dma_tag);
2957	dma->dma_tag = NULL;
2958}
2959
2960
2961/*********************************************************************
2962 *
2963 *  Allocate memory for the transmit and receive rings, and then
2964 *  the descriptors associated with each, called only once at attach.
2965 *
2966 **********************************************************************/
2967static int
2968igb_allocate_queues(struct adapter *adapter)
2969{
2970	device_t dev = adapter->dev;
2971	struct igb_queue	*que = NULL;
2972	struct tx_ring		*txr = NULL;
2973	struct rx_ring		*rxr = NULL;
2974	int rsize, tsize, error = E1000_SUCCESS;
2975	int txconf = 0, rxconf = 0;
2976
2977	/* First allocate the top level queue structs */
2978	if (!(adapter->queues =
2979	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2980	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2981		device_printf(dev, "Unable to allocate queue memory\n");
2982		error = ENOMEM;
2983		goto fail;
2984	}
2985
2986	/* Next allocate the TX ring struct memory */
2987	if (!(adapter->tx_rings =
2988	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2989	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2990		device_printf(dev, "Unable to allocate TX ring memory\n");
2991		error = ENOMEM;
2992		goto tx_fail;
2993	}
2994
2995	/* Now allocate the RX */
2996	if (!(adapter->rx_rings =
2997	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2998	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2999		device_printf(dev, "Unable to allocate RX ring memory\n");
3000		error = ENOMEM;
3001		goto rx_fail;
3002	}
3003
3004	tsize = roundup2(adapter->num_tx_desc *
3005	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3006	/*
3007	 * Now set up the TX queues, txconf is needed to handle the
3008	 * possibility that things fail midcourse and we need to
3009	 * undo memory gracefully
3010	 */
3011	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3012		/* Set up some basics */
3013		txr = &adapter->tx_rings[i];
3014		txr->adapter = adapter;
3015		txr->me = i;
3016
3017		/* Initialize the TX lock */
3018		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3019		    device_get_nameunit(dev), txr->me);
3020		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3021
3022		if (igb_dma_malloc(adapter, tsize,
3023			&txr->txdma, BUS_DMA_NOWAIT)) {
3024			device_printf(dev,
3025			    "Unable to allocate TX Descriptor memory\n");
3026			error = ENOMEM;
3027			goto err_tx_desc;
3028		}
3029		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3030		bzero((void *)txr->tx_base, tsize);
3031
3032        	/* Now allocate transmit buffers for the ring */
3033        	if (igb_allocate_transmit_buffers(txr)) {
3034			device_printf(dev,
3035			    "Critical Failure setting up transmit buffers\n");
3036			error = ENOMEM;
3037			goto err_tx_desc;
3038        	}
3039#if __FreeBSD_version >= 800000
3040		/* Allocate a buf ring */
3041		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3042		    M_WAITOK, &txr->tx_mtx);
3043#endif
3044	}
3045
3046	/*
3047	 * Next the RX queues...
3048	 */
3049	rsize = roundup2(adapter->num_rx_desc *
3050	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3051	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3052		rxr = &adapter->rx_rings[i];
3053		rxr->adapter = adapter;
3054		rxr->me = i;
3055
3056		/* Initialize the RX lock */
3057		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3058		    device_get_nameunit(dev), txr->me);
3059		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3060
3061		if (igb_dma_malloc(adapter, rsize,
3062			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3063			device_printf(dev,
3064			    "Unable to allocate RxDescriptor memory\n");
3065			error = ENOMEM;
3066			goto err_rx_desc;
3067		}
3068		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3069		bzero((void *)rxr->rx_base, rsize);
3070
3071        	/* Allocate receive buffers for the ring*/
3072		if (igb_allocate_receive_buffers(rxr)) {
3073			device_printf(dev,
3074			    "Critical Failure setting up receive buffers\n");
3075			error = ENOMEM;
3076			goto err_rx_desc;
3077		}
3078	}
3079
3080	/*
3081	** Finally set up the queue holding structs
3082	*/
3083	for (int i = 0; i < adapter->num_queues; i++) {
3084		que = &adapter->queues[i];
3085		que->adapter = adapter;
3086		que->txr = &adapter->tx_rings[i];
3087		que->rxr = &adapter->rx_rings[i];
3088	}
3089
3090	return (0);
3091
3092err_rx_desc:
3093	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3094		igb_dma_free(adapter, &rxr->rxdma);
3095err_tx_desc:
3096	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3097		igb_dma_free(adapter, &txr->txdma);
3098	free(adapter->rx_rings, M_DEVBUF);
3099rx_fail:
3100#if __FreeBSD_version >= 800000
3101	buf_ring_free(txr->br, M_DEVBUF);
3102#endif
3103	free(adapter->tx_rings, M_DEVBUF);
3104tx_fail:
3105	free(adapter->queues, M_DEVBUF);
3106fail:
3107	return (error);
3108}
3109
3110/*********************************************************************
3111 *
3112 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3113 *  the information needed to transmit a packet on the wire. This is
3114 *  called only once at attach, setup is done every reset.
3115 *
3116 **********************************************************************/
3117static int
3118igb_allocate_transmit_buffers(struct tx_ring *txr)
3119{
3120	struct adapter *adapter = txr->adapter;
3121	device_t dev = adapter->dev;
3122	struct igb_tx_buffer *txbuf;
3123	int error, i;
3124
3125	/*
3126	 * Setup DMA descriptor areas.
3127	 */
3128	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3129			       1, 0,			/* alignment, bounds */
3130			       BUS_SPACE_MAXADDR,	/* lowaddr */
3131			       BUS_SPACE_MAXADDR,	/* highaddr */
3132			       NULL, NULL,		/* filter, filterarg */
3133			       IGB_TSO_SIZE,		/* maxsize */
3134			       IGB_MAX_SCATTER,		/* nsegments */
3135			       PAGE_SIZE,		/* maxsegsize */
3136			       0,			/* flags */
3137			       NULL,			/* lockfunc */
3138			       NULL,			/* lockfuncarg */
3139			       &txr->txtag))) {
3140		device_printf(dev,"Unable to allocate TX DMA tag\n");
3141		goto fail;
3142	}
3143
3144	if (!(txr->tx_buffers =
3145	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3146	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3147		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3148		error = ENOMEM;
3149		goto fail;
3150	}
3151
3152        /* Create the descriptor buffer dma maps */
3153	txbuf = txr->tx_buffers;
3154	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3155		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3156		if (error != 0) {
3157			device_printf(dev, "Unable to create TX DMA map\n");
3158			goto fail;
3159		}
3160	}
3161
3162	return 0;
3163fail:
3164	/* We free all, it handles case where we are in the middle */
3165	igb_free_transmit_structures(adapter);
3166	return (error);
3167}
3168
3169/*********************************************************************
3170 *
3171 *  Initialize a transmit ring.
3172 *
3173 **********************************************************************/
3174static void
3175igb_setup_transmit_ring(struct tx_ring *txr)
3176{
3177	struct adapter *adapter = txr->adapter;
3178	struct igb_tx_buffer *txbuf;
3179	int i;
3180
3181	/* Clear the old descriptor contents */
3182	IGB_TX_LOCK(txr);
3183	bzero((void *)txr->tx_base,
3184	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3185	/* Reset indices */
3186	txr->next_avail_desc = 0;
3187	txr->next_to_clean = 0;
3188
3189	/* Free any existing tx buffers. */
3190        txbuf = txr->tx_buffers;
3191	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3192		if (txbuf->m_head != NULL) {
3193			bus_dmamap_sync(txr->txtag, txbuf->map,
3194			    BUS_DMASYNC_POSTWRITE);
3195			bus_dmamap_unload(txr->txtag, txbuf->map);
3196			m_freem(txbuf->m_head);
3197			txbuf->m_head = NULL;
3198		}
3199		/* clear the watch index */
3200		txbuf->next_eop = -1;
3201        }
3202
3203	/* Set number of descriptors available */
3204	txr->tx_avail = adapter->num_tx_desc;
3205
3206	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3207	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3208	IGB_TX_UNLOCK(txr);
3209}
3210
3211/*********************************************************************
3212 *
3213 *  Initialize all transmit rings.
3214 *
3215 **********************************************************************/
3216static void
3217igb_setup_transmit_structures(struct adapter *adapter)
3218{
3219	struct tx_ring *txr = adapter->tx_rings;
3220
3221	for (int i = 0; i < adapter->num_queues; i++, txr++)
3222		igb_setup_transmit_ring(txr);
3223
3224	return;
3225}
3226
3227/*********************************************************************
3228 *
3229 *  Enable transmit unit.
3230 *
3231 **********************************************************************/
3232static void
3233igb_initialize_transmit_units(struct adapter *adapter)
3234{
3235	struct tx_ring	*txr = adapter->tx_rings;
3236	struct e1000_hw *hw = &adapter->hw;
3237	u32		tctl, txdctl;
3238
3239	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3240	tctl = txdctl = 0;
3241
3242	/* Setup the Tx Descriptor Rings */
3243	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3244		u64 bus_addr = txr->txdma.dma_paddr;
3245
3246		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3247		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3248		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3249		    (uint32_t)(bus_addr >> 32));
3250		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3251		    (uint32_t)bus_addr);
3252
3253		/* Setup the HW Tx Head and Tail descriptor pointers */
3254		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3255		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3256
3257		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3258		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3259		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3260
3261		txr->queue_status = IGB_QUEUE_IDLE;
3262
3263		txdctl |= IGB_TX_PTHRESH;
3264		txdctl |= IGB_TX_HTHRESH << 8;
3265		txdctl |= IGB_TX_WTHRESH << 16;
3266		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3267		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3268	}
3269
3270	if (adapter->vf_ifp)
3271		return;
3272
3273	e1000_config_collision_dist(hw);
3274
3275	/* Program the Transmit Control Register */
3276	tctl = E1000_READ_REG(hw, E1000_TCTL);
3277	tctl &= ~E1000_TCTL_CT;
3278	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3279		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3280
3281	/* This write will effectively turn on the transmit unit. */
3282	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3283}
3284
3285/*********************************************************************
3286 *
3287 *  Free all transmit rings.
3288 *
3289 **********************************************************************/
3290static void
3291igb_free_transmit_structures(struct adapter *adapter)
3292{
3293	struct tx_ring *txr = adapter->tx_rings;
3294
3295	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3296		IGB_TX_LOCK(txr);
3297		igb_free_transmit_buffers(txr);
3298		igb_dma_free(adapter, &txr->txdma);
3299		IGB_TX_UNLOCK(txr);
3300		IGB_TX_LOCK_DESTROY(txr);
3301	}
3302	free(adapter->tx_rings, M_DEVBUF);
3303}
3304
3305/*********************************************************************
3306 *
3307 *  Free transmit ring related data structures.
3308 *
3309 **********************************************************************/
3310static void
3311igb_free_transmit_buffers(struct tx_ring *txr)
3312{
3313	struct adapter *adapter = txr->adapter;
3314	struct igb_tx_buffer *tx_buffer;
3315	int             i;
3316
3317	INIT_DEBUGOUT("free_transmit_ring: begin");
3318
3319	if (txr->tx_buffers == NULL)
3320		return;
3321
3322	tx_buffer = txr->tx_buffers;
3323	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3324		if (tx_buffer->m_head != NULL) {
3325			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3326			    BUS_DMASYNC_POSTWRITE);
3327			bus_dmamap_unload(txr->txtag,
3328			    tx_buffer->map);
3329			m_freem(tx_buffer->m_head);
3330			tx_buffer->m_head = NULL;
3331			if (tx_buffer->map != NULL) {
3332				bus_dmamap_destroy(txr->txtag,
3333				    tx_buffer->map);
3334				tx_buffer->map = NULL;
3335			}
3336		} else if (tx_buffer->map != NULL) {
3337			bus_dmamap_unload(txr->txtag,
3338			    tx_buffer->map);
3339			bus_dmamap_destroy(txr->txtag,
3340			    tx_buffer->map);
3341			tx_buffer->map = NULL;
3342		}
3343	}
3344#if __FreeBSD_version >= 800000
3345	if (txr->br != NULL)
3346		buf_ring_free(txr->br, M_DEVBUF);
3347#endif
3348	if (txr->tx_buffers != NULL) {
3349		free(txr->tx_buffers, M_DEVBUF);
3350		txr->tx_buffers = NULL;
3351	}
3352	if (txr->txtag != NULL) {
3353		bus_dma_tag_destroy(txr->txtag);
3354		txr->txtag = NULL;
3355	}
3356	return;
3357}
3358
3359/**********************************************************************
3360 *
3361 *  Setup work for hardware segmentation offload (TSO)
3362 *
3363 **********************************************************************/
3364static boolean_t
3365igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3366{
3367	struct adapter *adapter = txr->adapter;
3368	struct e1000_adv_tx_context_desc *TXD;
3369	struct igb_tx_buffer        *tx_buffer;
3370	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3371	u32 mss_l4len_idx = 0;
3372	u16 vtag = 0;
3373	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3374	struct ether_vlan_header *eh;
3375	struct ip *ip;
3376	struct tcphdr *th;
3377
3378
3379	/*
3380	 * Determine where frame payload starts.
3381	 * Jump over vlan headers if already present
3382	 */
3383	eh = mtod(mp, struct ether_vlan_header *);
3384	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3385		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3386	else
3387		ehdrlen = ETHER_HDR_LEN;
3388
3389	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3390	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3391		return FALSE;
3392
3393	/* Only supports IPV4 for now */
3394	ctxd = txr->next_avail_desc;
3395	tx_buffer = &txr->tx_buffers[ctxd];
3396	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3397
3398	ip = (struct ip *)(mp->m_data + ehdrlen);
3399	if (ip->ip_p != IPPROTO_TCP)
3400                return FALSE;   /* 0 */
3401	ip->ip_sum = 0;
3402	ip_hlen = ip->ip_hl << 2;
3403	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3404	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3405	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3406	tcp_hlen = th->th_off << 2;
3407	/*
3408	 * Calculate header length, this is used
3409	 * in the transmit desc in igb_xmit
3410	 */
3411	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3412
3413	/* VLAN MACLEN IPLEN */
3414	if (mp->m_flags & M_VLANTAG) {
3415		vtag = htole16(mp->m_pkthdr.ether_vtag);
3416		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3417	}
3418
3419	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3420	vlan_macip_lens |= ip_hlen;
3421	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3422
3423	/* ADV DTYPE TUCMD */
3424	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3425	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3426	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3427	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3428
3429	/* MSS L4LEN IDX */
3430	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3431	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3432	/* 82575 needs the queue index added */
3433	if (adapter->hw.mac.type == e1000_82575)
3434		mss_l4len_idx |= txr->me << 4;
3435	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3436
3437	TXD->seqnum_seed = htole32(0);
3438	tx_buffer->m_head = NULL;
3439	tx_buffer->next_eop = -1;
3440
3441	if (++ctxd == adapter->num_tx_desc)
3442		ctxd = 0;
3443
3444	txr->tx_avail--;
3445	txr->next_avail_desc = ctxd;
3446	return TRUE;
3447}
3448
3449
3450/*********************************************************************
3451 *
3452 *  Context Descriptor setup for VLAN or CSUM
3453 *
3454 **********************************************************************/
3455
3456static bool
3457igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3458{
3459	struct adapter *adapter = txr->adapter;
3460	struct e1000_adv_tx_context_desc *TXD;
3461	struct igb_tx_buffer        *tx_buffer;
3462	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3463	struct ether_vlan_header *eh;
3464	struct ip *ip = NULL;
3465	struct ip6_hdr *ip6;
3466	int  ehdrlen, ctxd, ip_hlen = 0;
3467	u16	etype, vtag = 0;
3468	u8	ipproto = 0;
3469	bool	offload = TRUE;
3470
3471	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3472		offload = FALSE;
3473
3474	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3475	ctxd = txr->next_avail_desc;
3476	tx_buffer = &txr->tx_buffers[ctxd];
3477	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3478
3479	/*
3480	** In advanced descriptors the vlan tag must
3481	** be placed into the context descriptor, thus
3482	** we need to be here just for that setup.
3483	*/
3484	if (mp->m_flags & M_VLANTAG) {
3485		vtag = htole16(mp->m_pkthdr.ether_vtag);
3486		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3487	} else if (offload == FALSE)
3488		return FALSE;
3489
3490	/*
3491	 * Determine where frame payload starts.
3492	 * Jump over vlan headers if already present,
3493	 * helpful for QinQ too.
3494	 */
3495	eh = mtod(mp, struct ether_vlan_header *);
3496	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3497		etype = ntohs(eh->evl_proto);
3498		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3499	} else {
3500		etype = ntohs(eh->evl_encap_proto);
3501		ehdrlen = ETHER_HDR_LEN;
3502	}
3503
3504	/* Set the ether header length */
3505	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3506
3507	switch (etype) {
3508		case ETHERTYPE_IP:
3509			ip = (struct ip *)(mp->m_data + ehdrlen);
3510			ip_hlen = ip->ip_hl << 2;
3511			if (mp->m_len < ehdrlen + ip_hlen) {
3512				offload = FALSE;
3513				break;
3514			}
3515			ipproto = ip->ip_p;
3516			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3517			break;
3518		case ETHERTYPE_IPV6:
3519			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3520			ip_hlen = sizeof(struct ip6_hdr);
3521			ipproto = ip6->ip6_nxt;
3522			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3523			break;
3524		default:
3525			offload = FALSE;
3526			break;
3527	}
3528
3529	vlan_macip_lens |= ip_hlen;
3530	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3531
3532	switch (ipproto) {
3533		case IPPROTO_TCP:
3534			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3535				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3536			break;
3537		case IPPROTO_UDP:
3538			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3539				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3540			break;
3541#if __FreeBSD_version >= 800000
3542		case IPPROTO_SCTP:
3543			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3544				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3545			break;
3546#endif
3547		default:
3548			offload = FALSE;
3549			break;
3550	}
3551
3552	/* 82575 needs the queue index added */
3553	if (adapter->hw.mac.type == e1000_82575)
3554		mss_l4len_idx = txr->me << 4;
3555
3556	/* Now copy bits into descriptor */
3557	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3558	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3559	TXD->seqnum_seed = htole32(0);
3560	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3561
3562	tx_buffer->m_head = NULL;
3563	tx_buffer->next_eop = -1;
3564
3565	/* We've consumed the first desc, adjust counters */
3566	if (++ctxd == adapter->num_tx_desc)
3567		ctxd = 0;
3568	txr->next_avail_desc = ctxd;
3569	--txr->tx_avail;
3570
3571        return (offload);
3572}
3573
3574
3575/**********************************************************************
3576 *
3577 *  Examine each tx_buffer in the used queue. If the hardware is done
3578 *  processing the packet then free associated resources. The
3579 *  tx_buffer is put back on the free queue.
3580 *
3581 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3582 **********************************************************************/
3583static bool
3584igb_txeof(struct tx_ring *txr)
3585{
3586	struct adapter	*adapter = txr->adapter;
3587        int first, last, done, processed;
3588        struct igb_tx_buffer *tx_buffer;
3589        struct e1000_tx_desc   *tx_desc, *eop_desc;
3590	struct ifnet   *ifp = adapter->ifp;
3591
3592	IGB_TX_LOCK_ASSERT(txr);
3593
3594        if (txr->tx_avail == adapter->num_tx_desc) {
3595		txr->queue_status = IGB_QUEUE_IDLE;
3596                return FALSE;
3597	}
3598
3599	processed = 0;
3600        first = txr->next_to_clean;
3601        tx_desc = &txr->tx_base[first];
3602        tx_buffer = &txr->tx_buffers[first];
3603	last = tx_buffer->next_eop;
3604        eop_desc = &txr->tx_base[last];
3605
3606	/*
3607	 * What this does is get the index of the
3608	 * first descriptor AFTER the EOP of the
3609	 * first packet, that way we can do the
3610	 * simple comparison on the inner while loop.
3611	 */
3612	if (++last == adapter->num_tx_desc)
3613 		last = 0;
3614	done = last;
3615
3616        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3617            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3618
3619        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3620		/* We clean the range of the packet */
3621		while (first != done) {
3622                	tx_desc->upper.data = 0;
3623                	tx_desc->lower.data = 0;
3624                	tx_desc->buffer_addr = 0;
3625                	++txr->tx_avail;
3626			++processed;
3627
3628			if (tx_buffer->m_head) {
3629				txr->bytes +=
3630				    tx_buffer->m_head->m_pkthdr.len;
3631				bus_dmamap_sync(txr->txtag,
3632				    tx_buffer->map,
3633				    BUS_DMASYNC_POSTWRITE);
3634				bus_dmamap_unload(txr->txtag,
3635				    tx_buffer->map);
3636
3637                        	m_freem(tx_buffer->m_head);
3638                        	tx_buffer->m_head = NULL;
3639                	}
3640			tx_buffer->next_eop = -1;
3641			txr->watchdog_time = ticks;
3642
3643	                if (++first == adapter->num_tx_desc)
3644				first = 0;
3645
3646	                tx_buffer = &txr->tx_buffers[first];
3647			tx_desc = &txr->tx_base[first];
3648		}
3649		++txr->packets;
3650		++ifp->if_opackets;
3651		/* See if we can continue to the next packet */
3652		last = tx_buffer->next_eop;
3653		if (last != -1) {
3654        		eop_desc = &txr->tx_base[last];
3655			/* Get new done point */
3656			if (++last == adapter->num_tx_desc) last = 0;
3657			done = last;
3658		} else
3659			break;
3660        }
3661        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3662            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3663
3664        txr->next_to_clean = first;
3665
3666	/*
3667	** Watchdog calculation, we know there's
3668	** work outstanding or the first return
3669	** would have been taken, so none processed
3670	** for too long indicates a hang.
3671	*/
3672	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3673		txr->queue_status = IGB_QUEUE_HUNG;
3674
3675        /*
3676         * If we have a minimum free, clear IFF_DRV_OACTIVE
3677         * to tell the stack that it is OK to send packets.
3678         */
3679        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3680                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3681		/* All clean, turn off the watchdog */
3682                if (txr->tx_avail == adapter->num_tx_desc) {
3683			txr->queue_status = IGB_QUEUE_IDLE;
3684			return (FALSE);
3685		}
3686        }
3687	return (TRUE);
3688}
3689
3690/*********************************************************************
3691 *
3692 *  Refresh mbuf buffers for RX descriptor rings
3693 *   - now keeps its own state so discards due to resource
3694 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3695 *     it just returns, keeping its placeholder, thus it can simply
3696 *     be recalled to try again.
3697 *
3698 **********************************************************************/
3699static void
3700igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3701{
3702	struct adapter		*adapter = rxr->adapter;
3703	bus_dma_segment_t	hseg[1];
3704	bus_dma_segment_t	pseg[1];
3705	struct igb_rx_buf	*rxbuf;
3706	struct mbuf		*mh, *mp;
3707	int			i, j, nsegs, error;
3708	bool			refreshed = FALSE;
3709
3710	i = j = rxr->next_to_refresh;
3711	/*
3712	** Get one descriptor beyond
3713	** our work mark to control
3714	** the loop.
3715        */
3716	if (++j == adapter->num_rx_desc)
3717		j = 0;
3718
3719	while (j != limit) {
3720		rxbuf = &rxr->rx_buffers[i];
3721		/* No hdr mbuf used with header split off */
3722		if (rxr->hdr_split == FALSE)
3723			goto no_split;
3724		if (rxbuf->m_head == NULL) {
3725			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3726			if (mh == NULL)
3727				goto update;
3728		} else
3729			mh = rxbuf->m_head;
3730
3731		mh->m_pkthdr.len = mh->m_len = MHLEN;
3732		mh->m_len = MHLEN;
3733		mh->m_flags |= M_PKTHDR;
3734		/* Get the memory mapping */
3735		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3736		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3737		if (error != 0) {
3738			printf("Refresh mbufs: hdr dmamap load"
3739			    " failure - %d\n", error);
3740			m_free(mh);
3741			rxbuf->m_head = NULL;
3742			goto update;
3743		}
3744		rxbuf->m_head = mh;
3745		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3746		    BUS_DMASYNC_PREREAD);
3747		rxr->rx_base[i].read.hdr_addr =
3748		    htole64(hseg[0].ds_addr);
3749no_split:
3750		if (rxbuf->m_pack == NULL) {
3751			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3752			    M_PKTHDR, adapter->rx_mbuf_sz);
3753			if (mp == NULL)
3754				goto update;
3755		} else
3756			mp = rxbuf->m_pack;
3757
3758		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3759		/* Get the memory mapping */
3760		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3761		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3762		if (error != 0) {
3763			printf("Refresh mbufs: payload dmamap load"
3764			    " failure - %d\n", error);
3765			m_free(mp);
3766			rxbuf->m_pack = NULL;
3767			goto update;
3768		}
3769		rxbuf->m_pack = mp;
3770		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3771		    BUS_DMASYNC_PREREAD);
3772		rxr->rx_base[i].read.pkt_addr =
3773		    htole64(pseg[0].ds_addr);
3774		refreshed = TRUE; /* I feel wefreshed :) */
3775
3776		i = j; /* our next is precalculated */
3777		rxr->next_to_refresh = i;
3778		if (++j == adapter->num_rx_desc)
3779			j = 0;
3780	}
3781update:
3782	if (refreshed) /* update tail */
3783		E1000_WRITE_REG(&adapter->hw,
3784		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3785	return;
3786}
3787
3788
3789/*********************************************************************
3790 *
3791 *  Allocate memory for rx_buffer structures. Since we use one
3792 *  rx_buffer per received packet, the maximum number of rx_buffer's
3793 *  that we'll need is equal to the number of receive descriptors
3794 *  that we've allocated.
3795 *
3796 **********************************************************************/
3797static int
3798igb_allocate_receive_buffers(struct rx_ring *rxr)
3799{
3800	struct	adapter 	*adapter = rxr->adapter;
3801	device_t 		dev = adapter->dev;
3802	struct igb_rx_buf	*rxbuf;
3803	int             	i, bsize, error;
3804
3805	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3806	if (!(rxr->rx_buffers =
3807	    (struct igb_rx_buf *) malloc(bsize,
3808	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3809		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3810		error = ENOMEM;
3811		goto fail;
3812	}
3813
3814	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3815				   1, 0,		/* alignment, bounds */
3816				   BUS_SPACE_MAXADDR,	/* lowaddr */
3817				   BUS_SPACE_MAXADDR,	/* highaddr */
3818				   NULL, NULL,		/* filter, filterarg */
3819				   MSIZE,		/* maxsize */
3820				   1,			/* nsegments */
3821				   MSIZE,		/* maxsegsize */
3822				   0,			/* flags */
3823				   NULL,		/* lockfunc */
3824				   NULL,		/* lockfuncarg */
3825				   &rxr->htag))) {
3826		device_printf(dev, "Unable to create RX DMA tag\n");
3827		goto fail;
3828	}
3829
3830	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3831				   1, 0,		/* alignment, bounds */
3832				   BUS_SPACE_MAXADDR,	/* lowaddr */
3833				   BUS_SPACE_MAXADDR,	/* highaddr */
3834				   NULL, NULL,		/* filter, filterarg */
3835				   MJUM9BYTES,		/* maxsize */
3836				   1,			/* nsegments */
3837				   MJUM9BYTES,		/* maxsegsize */
3838				   0,			/* flags */
3839				   NULL,		/* lockfunc */
3840				   NULL,		/* lockfuncarg */
3841				   &rxr->ptag))) {
3842		device_printf(dev, "Unable to create RX payload DMA tag\n");
3843		goto fail;
3844	}
3845
3846	for (i = 0; i < adapter->num_rx_desc; i++) {
3847		rxbuf = &rxr->rx_buffers[i];
3848		error = bus_dmamap_create(rxr->htag,
3849		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3850		if (error) {
3851			device_printf(dev,
3852			    "Unable to create RX head DMA maps\n");
3853			goto fail;
3854		}
3855		error = bus_dmamap_create(rxr->ptag,
3856		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3857		if (error) {
3858			device_printf(dev,
3859			    "Unable to create RX packet DMA maps\n");
3860			goto fail;
3861		}
3862	}
3863
3864	return (0);
3865
3866fail:
3867	/* Frees all, but can handle partial completion */
3868	igb_free_receive_structures(adapter);
3869	return (error);
3870}
3871
3872
3873static void
3874igb_free_receive_ring(struct rx_ring *rxr)
3875{
3876	struct	adapter		*adapter = rxr->adapter;
3877	struct igb_rx_buf	*rxbuf;
3878
3879
3880	for (int i = 0; i < adapter->num_rx_desc; i++) {
3881		rxbuf = &rxr->rx_buffers[i];
3882		if (rxbuf->m_head != NULL) {
3883			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3884			    BUS_DMASYNC_POSTREAD);
3885			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3886			rxbuf->m_head->m_flags |= M_PKTHDR;
3887			m_freem(rxbuf->m_head);
3888		}
3889		if (rxbuf->m_pack != NULL) {
3890			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3891			    BUS_DMASYNC_POSTREAD);
3892			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3893			rxbuf->m_pack->m_flags |= M_PKTHDR;
3894			m_freem(rxbuf->m_pack);
3895		}
3896		rxbuf->m_head = NULL;
3897		rxbuf->m_pack = NULL;
3898	}
3899}
3900
3901
3902/*********************************************************************
3903 *
3904 *  Initialize a receive ring and its buffers.
3905 *
3906 **********************************************************************/
3907static int
3908igb_setup_receive_ring(struct rx_ring *rxr)
3909{
3910	struct	adapter		*adapter;
3911	struct  ifnet		*ifp;
3912	device_t		dev;
3913	struct igb_rx_buf	*rxbuf;
3914	bus_dma_segment_t	pseg[1], hseg[1];
3915	struct lro_ctrl		*lro = &rxr->lro;
3916	int			rsize, nsegs, error = 0;
3917
3918	adapter = rxr->adapter;
3919	dev = adapter->dev;
3920	ifp = adapter->ifp;
3921
3922	/* Clear the ring contents */
3923	IGB_RX_LOCK(rxr);
3924	rsize = roundup2(adapter->num_rx_desc *
3925	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3926	bzero((void *)rxr->rx_base, rsize);
3927
3928	/*
3929	** Free current RX buffer structures and their mbufs
3930	*/
3931	igb_free_receive_ring(rxr);
3932
3933	/* Configure for header split? */
3934	if (igb_header_split)
3935		rxr->hdr_split = TRUE;
3936
3937        /* Now replenish the ring mbufs */
3938	for (int j = 0; j < adapter->num_rx_desc; ++j) {
3939		struct mbuf	*mh, *mp;
3940
3941		rxbuf = &rxr->rx_buffers[j];
3942		if (rxr->hdr_split == FALSE)
3943			goto skip_head;
3944
3945		/* First the header */
3946		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3947		if (rxbuf->m_head == NULL) {
3948			error = ENOBUFS;
3949                        goto fail;
3950		}
3951		m_adj(rxbuf->m_head, ETHER_ALIGN);
3952		mh = rxbuf->m_head;
3953		mh->m_len = mh->m_pkthdr.len = MHLEN;
3954		mh->m_flags |= M_PKTHDR;
3955		/* Get the memory mapping */
3956		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3957		    rxbuf->hmap, rxbuf->m_head, hseg,
3958		    &nsegs, BUS_DMA_NOWAIT);
3959		if (error != 0) /* Nothing elegant to do here */
3960                        goto fail;
3961		bus_dmamap_sync(rxr->htag,
3962		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3963		/* Update descriptor */
3964		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3965
3966skip_head:
3967		/* Now the payload cluster */
3968		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3969		    M_PKTHDR, adapter->rx_mbuf_sz);
3970		if (rxbuf->m_pack == NULL) {
3971			error = ENOBUFS;
3972                        goto fail;
3973		}
3974		mp = rxbuf->m_pack;
3975		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3976		/* Get the memory mapping */
3977		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3978		    rxbuf->pmap, mp, pseg,
3979		    &nsegs, BUS_DMA_NOWAIT);
3980		if (error != 0)
3981                        goto fail;
3982		bus_dmamap_sync(rxr->ptag,
3983		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
3984		/* Update descriptor */
3985		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
3986        }
3987
3988	/* Setup our descriptor indices */
3989	rxr->next_to_check = 0;
3990	rxr->next_to_refresh = adapter->num_rx_desc - 1;
3991	rxr->lro_enabled = FALSE;
3992	rxr->rx_split_packets = 0;
3993	rxr->rx_bytes = 0;
3994
3995	rxr->fmp = NULL;
3996	rxr->lmp = NULL;
3997	rxr->discard = FALSE;
3998
3999	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4000	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4001
4002	/*
4003	** Now set up the LRO interface, we
4004	** also only do head split when LRO
4005	** is enabled, since so often they
4006	** are undesireable in similar setups.
4007	*/
4008	if (ifp->if_capenable & IFCAP_LRO) {
4009		error = tcp_lro_init(lro);
4010		if (error) {
4011			device_printf(dev, "LRO Initialization failed!\n");
4012			goto fail;
4013		}
4014		INIT_DEBUGOUT("RX LRO Initialized\n");
4015		rxr->lro_enabled = TRUE;
4016		lro->ifp = adapter->ifp;
4017	}
4018
4019	IGB_RX_UNLOCK(rxr);
4020	return (0);
4021
4022fail:
4023	igb_free_receive_ring(rxr);
4024	IGB_RX_UNLOCK(rxr);
4025	return (error);
4026}
4027
4028
4029/*********************************************************************
4030 *
4031 *  Initialize all receive rings.
4032 *
4033 **********************************************************************/
4034static int
4035igb_setup_receive_structures(struct adapter *adapter)
4036{
4037	struct rx_ring *rxr = adapter->rx_rings;
4038	int i;
4039
4040	for (i = 0; i < adapter->num_queues; i++, rxr++)
4041		if (igb_setup_receive_ring(rxr))
4042			goto fail;
4043
4044	return (0);
4045fail:
4046	/*
4047	 * Free RX buffers allocated so far, we will only handle
4048	 * the rings that completed, the failing case will have
4049	 * cleaned up for itself. 'i' is the endpoint.
4050	 */
4051	for (int j = 0; j > i; ++j) {
4052		rxr = &adapter->rx_rings[i];
4053		IGB_RX_LOCK(rxr);
4054		igb_free_receive_ring(rxr);
4055		IGB_RX_UNLOCK(rxr);
4056	}
4057
4058	return (ENOBUFS);
4059}
4060
4061/*********************************************************************
4062 *
4063 *  Enable receive unit.
4064 *
4065 **********************************************************************/
4066static void
4067igb_initialize_receive_units(struct adapter *adapter)
4068{
4069	struct rx_ring	*rxr = adapter->rx_rings;
4070	struct ifnet	*ifp = adapter->ifp;
4071	struct e1000_hw *hw = &adapter->hw;
4072	u32		rctl, rxcsum, psize, srrctl = 0;
4073
4074	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4075
4076	/*
4077	 * Make sure receives are disabled while setting
4078	 * up the descriptor ring
4079	 */
4080	rctl = E1000_READ_REG(hw, E1000_RCTL);
4081	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4082
4083	/*
4084	** Set up for header split
4085	*/
4086	if (igb_header_split) {
4087		/* Use a standard mbuf for the header */
4088		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4089		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4090	} else
4091		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4092
4093	/*
4094	** Set up for jumbo frames
4095	*/
4096	if (ifp->if_mtu > ETHERMTU) {
4097		rctl |= E1000_RCTL_LPE;
4098		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4099			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4100			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4101		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4102			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4103			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4104		}
4105		/* Set maximum packet len */
4106		psize = adapter->max_frame_size;
4107		/* are we on a vlan? */
4108		if (adapter->ifp->if_vlantrunk != NULL)
4109			psize += VLAN_TAG_SIZE;
4110		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4111	} else {
4112		rctl &= ~E1000_RCTL_LPE;
4113		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4114		rctl |= E1000_RCTL_SZ_2048;
4115	}
4116
4117	/* Setup the Base and Length of the Rx Descriptor Rings */
4118	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4119		u64 bus_addr = rxr->rxdma.dma_paddr;
4120		u32 rxdctl;
4121
4122		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4123		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4124		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4125		    (uint32_t)(bus_addr >> 32));
4126		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4127		    (uint32_t)bus_addr);
4128		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4129		/* Enable this Queue */
4130		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4131		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4132		rxdctl &= 0xFFF00000;
4133		rxdctl |= IGB_RX_PTHRESH;
4134		rxdctl |= IGB_RX_HTHRESH << 8;
4135		rxdctl |= IGB_RX_WTHRESH << 16;
4136		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4137	}
4138
4139	/*
4140	** Setup for RX MultiQueue
4141	*/
4142	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4143	if (adapter->num_queues >1) {
4144		u32 random[10], mrqc, shift = 0;
4145		union igb_reta {
4146			u32 dword;
4147			u8  bytes[4];
4148		} reta;
4149
4150		arc4rand(&random, sizeof(random), 0);
4151		if (adapter->hw.mac.type == e1000_82575)
4152			shift = 6;
4153		/* Warning FM follows */
4154		for (int i = 0; i < 128; i++) {
4155			reta.bytes[i & 3] =
4156			    (i % adapter->num_queues) << shift;
4157			if ((i & 3) == 3)
4158				E1000_WRITE_REG(hw,
4159				    E1000_RETA(i >> 2), reta.dword);
4160		}
4161		/* Now fill in hash table */
4162		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4163		for (int i = 0; i < 10; i++)
4164			E1000_WRITE_REG_ARRAY(hw,
4165			    E1000_RSSRK(0), i, random[i]);
4166
4167		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4168		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4169		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4170		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4171		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4172		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4173		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4174		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4175
4176		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4177
4178		/*
4179		** NOTE: Receive Full-Packet Checksum Offload
4180		** is mutually exclusive with Multiqueue. However
4181		** this is not the same as TCP/IP checksums which
4182		** still work.
4183		*/
4184		rxcsum |= E1000_RXCSUM_PCSD;
4185#if __FreeBSD_version >= 800000
4186		/* For SCTP Offload */
4187		if ((hw->mac.type == e1000_82576)
4188		    && (ifp->if_capenable & IFCAP_RXCSUM))
4189			rxcsum |= E1000_RXCSUM_CRCOFL;
4190#endif
4191	} else {
4192		/* Non RSS setup */
4193		if (ifp->if_capenable & IFCAP_RXCSUM) {
4194			rxcsum |= E1000_RXCSUM_IPPCSE;
4195#if __FreeBSD_version >= 800000
4196			if (adapter->hw.mac.type == e1000_82576)
4197				rxcsum |= E1000_RXCSUM_CRCOFL;
4198#endif
4199		} else
4200			rxcsum &= ~E1000_RXCSUM_TUOFL;
4201	}
4202	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4203
4204	/* Setup the Receive Control Register */
4205	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4206	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4207		   E1000_RCTL_RDMTS_HALF |
4208		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4209	/* Strip CRC bytes. */
4210	rctl |= E1000_RCTL_SECRC;
4211	/* Make sure VLAN Filters are off */
4212	rctl &= ~E1000_RCTL_VFE;
4213	/* Don't store bad packets */
4214	rctl &= ~E1000_RCTL_SBP;
4215
4216	/* Enable Receives */
4217	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4218
4219	/*
4220	 * Setup the HW Rx Head and Tail Descriptor Pointers
4221	 *   - needs to be after enable
4222	 */
4223	for (int i = 0; i < adapter->num_queues; i++) {
4224		rxr = &adapter->rx_rings[i];
4225		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4226		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4227	}
4228	return;
4229}
4230
4231/*********************************************************************
4232 *
4233 *  Free receive rings.
4234 *
4235 **********************************************************************/
4236static void
4237igb_free_receive_structures(struct adapter *adapter)
4238{
4239	struct rx_ring *rxr = adapter->rx_rings;
4240
4241	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4242		struct lro_ctrl	*lro = &rxr->lro;
4243		igb_free_receive_buffers(rxr);
4244		tcp_lro_free(lro);
4245		igb_dma_free(adapter, &rxr->rxdma);
4246	}
4247
4248	free(adapter->rx_rings, M_DEVBUF);
4249}
4250
4251/*********************************************************************
4252 *
4253 *  Free receive ring data structures.
4254 *
4255 **********************************************************************/
4256static void
4257igb_free_receive_buffers(struct rx_ring *rxr)
4258{
4259	struct adapter		*adapter = rxr->adapter;
4260	struct igb_rx_buf	*rxbuf;
4261	int i;
4262
4263	INIT_DEBUGOUT("free_receive_structures: begin");
4264
4265	/* Cleanup any existing buffers */
4266	if (rxr->rx_buffers != NULL) {
4267		for (i = 0; i < adapter->num_rx_desc; i++) {
4268			rxbuf = &rxr->rx_buffers[i];
4269			if (rxbuf->m_head != NULL) {
4270				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4271				    BUS_DMASYNC_POSTREAD);
4272				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4273				rxbuf->m_head->m_flags |= M_PKTHDR;
4274				m_freem(rxbuf->m_head);
4275			}
4276			if (rxbuf->m_pack != NULL) {
4277				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4278				    BUS_DMASYNC_POSTREAD);
4279				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4280				rxbuf->m_pack->m_flags |= M_PKTHDR;
4281				m_freem(rxbuf->m_pack);
4282			}
4283			rxbuf->m_head = NULL;
4284			rxbuf->m_pack = NULL;
4285			if (rxbuf->hmap != NULL) {
4286				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4287				rxbuf->hmap = NULL;
4288			}
4289			if (rxbuf->pmap != NULL) {
4290				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4291				rxbuf->pmap = NULL;
4292			}
4293		}
4294		if (rxr->rx_buffers != NULL) {
4295			free(rxr->rx_buffers, M_DEVBUF);
4296			rxr->rx_buffers = NULL;
4297		}
4298	}
4299
4300	if (rxr->htag != NULL) {
4301		bus_dma_tag_destroy(rxr->htag);
4302		rxr->htag = NULL;
4303	}
4304	if (rxr->ptag != NULL) {
4305		bus_dma_tag_destroy(rxr->ptag);
4306		rxr->ptag = NULL;
4307	}
4308}
4309
4310static __inline void
4311igb_rx_discard(struct rx_ring *rxr, int i)
4312{
4313	struct igb_rx_buf	*rbuf;
4314
4315	rbuf = &rxr->rx_buffers[i];
4316
4317	/* Partially received? Free the chain */
4318	if (rxr->fmp != NULL) {
4319		rxr->fmp->m_flags |= M_PKTHDR;
4320		m_freem(rxr->fmp);
4321		rxr->fmp = NULL;
4322		rxr->lmp = NULL;
4323	}
4324
4325	/*
4326	** With advanced descriptors the writeback
4327	** clobbers the buffer addrs, so its easier
4328	** to just free the existing mbufs and take
4329	** the normal refresh path to get new buffers
4330	** and mapping.
4331	*/
4332	if (rbuf->m_head) {
4333		m_free(rbuf->m_head);
4334		rbuf->m_head = NULL;
4335	}
4336
4337	if (rbuf->m_pack) {
4338		m_free(rbuf->m_pack);
4339		rbuf->m_pack = NULL;
4340	}
4341
4342	return;
4343}
4344
4345static __inline void
4346igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4347{
4348
4349	/*
4350	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4351	 * should be computed by hardware. Also it should not have VLAN tag in
4352	 * ethernet header.
4353	 */
4354	if (rxr->lro_enabled &&
4355	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4356	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4357	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4358	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4359	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4360	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4361		/*
4362		 * Send to the stack if:
4363		 **  - LRO not enabled, or
4364		 **  - no LRO resources, or
4365		 **  - lro enqueue fails
4366		 */
4367		if (rxr->lro.lro_cnt != 0)
4368			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4369				return;
4370	}
4371	IGB_RX_UNLOCK(rxr);
4372	(*ifp->if_input)(ifp, m);
4373	IGB_RX_LOCK(rxr);
4374}
4375
4376/*********************************************************************
4377 *
4378 *  This routine executes in interrupt context. It replenishes
4379 *  the mbufs in the descriptor and sends data which has been
4380 *  dma'ed into host memory to upper layer.
4381 *
4382 *  We loop at most count times if count is > 0, or until done if
4383 *  count < 0.
4384 *
4385 *  Return TRUE if more to clean, FALSE otherwise
4386 *********************************************************************/
4387static bool
4388igb_rxeof(struct igb_queue *que, int count, int *done)
4389{
4390	struct adapter		*adapter = que->adapter;
4391	struct rx_ring		*rxr = que->rxr;
4392	struct ifnet		*ifp = adapter->ifp;
4393	struct lro_ctrl		*lro = &rxr->lro;
4394	struct lro_entry	*queued;
4395	int			i, processed = 0, rxdone = 0;
4396	u32			ptype, staterr = 0;
4397	union e1000_adv_rx_desc	*cur;
4398
4399	IGB_RX_LOCK(rxr);
4400	/* Sync the ring. */
4401	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4402	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4403
4404	/* Main clean loop */
4405	for (i = rxr->next_to_check; count != 0;) {
4406		struct mbuf		*sendmp, *mh, *mp;
4407		struct igb_rx_buf	*rxbuf;
4408		u16			hlen, plen, hdr, vtag;
4409		bool			eop = FALSE;
4410
4411		cur = &rxr->rx_base[i];
4412		staterr = le32toh(cur->wb.upper.status_error);
4413		if ((staterr & E1000_RXD_STAT_DD) == 0)
4414			break;
4415		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4416			break;
4417		count--;
4418		sendmp = mh = mp = NULL;
4419		cur->wb.upper.status_error = 0;
4420		rxbuf = &rxr->rx_buffers[i];
4421		plen = le16toh(cur->wb.upper.length);
4422		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4423		if ((adapter->hw.mac.type == e1000_i350) &&
4424		    (staterr & E1000_RXDEXT_STATERR_LB))
4425			vtag = be16toh(cur->wb.upper.vlan);
4426		else
4427			vtag = le16toh(cur->wb.upper.vlan);
4428		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4429		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4430
4431		/* Make sure all segments of a bad packet are discarded */
4432		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4433		    (rxr->discard)) {
4434			ifp->if_ierrors++;
4435			++rxr->rx_discarded;
4436			if (!eop) /* Catch subsequent segs */
4437				rxr->discard = TRUE;
4438			else
4439				rxr->discard = FALSE;
4440			igb_rx_discard(rxr, i);
4441			goto next_desc;
4442		}
4443
4444		/*
4445		** The way the hardware is configured to
4446		** split, it will ONLY use the header buffer
4447		** when header split is enabled, otherwise we
4448		** get normal behavior, ie, both header and
4449		** payload are DMA'd into the payload buffer.
4450		**
4451		** The fmp test is to catch the case where a
4452		** packet spans multiple descriptors, in that
4453		** case only the first header is valid.
4454		*/
4455		if (rxr->hdr_split && rxr->fmp == NULL) {
4456			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4457			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4458			if (hlen > IGB_HDR_BUF)
4459				hlen = IGB_HDR_BUF;
4460			mh = rxr->rx_buffers[i].m_head;
4461			mh->m_len = hlen;
4462			/* clear buf pointer for refresh */
4463			rxbuf->m_head = NULL;
4464			/*
4465			** Get the payload length, this
4466			** could be zero if its a small
4467			** packet.
4468			*/
4469			if (plen > 0) {
4470				mp = rxr->rx_buffers[i].m_pack;
4471				mp->m_len = plen;
4472				mh->m_next = mp;
4473				/* clear buf pointer */
4474				rxbuf->m_pack = NULL;
4475				rxr->rx_split_packets++;
4476			}
4477		} else {
4478			/*
4479			** Either no header split, or a
4480			** secondary piece of a fragmented
4481			** split packet.
4482			*/
4483			mh = rxr->rx_buffers[i].m_pack;
4484			mh->m_len = plen;
4485			/* clear buf info for refresh */
4486			rxbuf->m_pack = NULL;
4487		}
4488
4489		++processed; /* So we know when to refresh */
4490
4491		/* Initial frame - setup */
4492		if (rxr->fmp == NULL) {
4493			mh->m_pkthdr.len = mh->m_len;
4494			/* Save the head of the chain */
4495			rxr->fmp = mh;
4496			rxr->lmp = mh;
4497			if (mp != NULL) {
4498				/* Add payload if split */
4499				mh->m_pkthdr.len += mp->m_len;
4500				rxr->lmp = mh->m_next;
4501			}
4502		} else {
4503			/* Chain mbuf's together */
4504			rxr->lmp->m_next = mh;
4505			rxr->lmp = rxr->lmp->m_next;
4506			rxr->fmp->m_pkthdr.len += mh->m_len;
4507		}
4508
4509		if (eop) {
4510			rxr->fmp->m_pkthdr.rcvif = ifp;
4511			ifp->if_ipackets++;
4512			rxr->rx_packets++;
4513			/* capture data for AIM */
4514			rxr->packets++;
4515			rxr->bytes += rxr->fmp->m_pkthdr.len;
4516			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4517
4518			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4519				igb_rx_checksum(staterr, rxr->fmp, ptype);
4520
4521			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4522			    (staterr & E1000_RXD_STAT_VP) != 0) {
4523				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4524				rxr->fmp->m_flags |= M_VLANTAG;
4525			}
4526#if __FreeBSD_version >= 800000
4527			rxr->fmp->m_pkthdr.flowid = que->msix;
4528			rxr->fmp->m_flags |= M_FLOWID;
4529#endif
4530			sendmp = rxr->fmp;
4531			/* Make sure to set M_PKTHDR. */
4532			sendmp->m_flags |= M_PKTHDR;
4533			rxr->fmp = NULL;
4534			rxr->lmp = NULL;
4535		}
4536
4537next_desc:
4538		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4539		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4540
4541		/* Advance our pointers to the next descriptor. */
4542		if (++i == adapter->num_rx_desc)
4543			i = 0;
4544		/*
4545		** Send to the stack or LRO
4546		*/
4547		if (sendmp != NULL) {
4548			rxr->next_to_check = i;
4549			igb_rx_input(rxr, ifp, sendmp, ptype);
4550			i = rxr->next_to_check;
4551			rxdone++;
4552		}
4553
4554		/* Every 8 descriptors we go to refresh mbufs */
4555		if (processed == 8) {
4556                        igb_refresh_mbufs(rxr, i);
4557                        processed = 0;
4558		}
4559	}
4560
4561	/* Catch any remainders */
4562	if (igb_rx_unrefreshed(rxr))
4563		igb_refresh_mbufs(rxr, i);
4564
4565	rxr->next_to_check = i;
4566
4567	/*
4568	 * Flush any outstanding LRO work
4569	 */
4570	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4571		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4572		tcp_lro_flush(lro, queued);
4573	}
4574
4575	if (done != NULL)
4576		*done = rxdone;
4577
4578	IGB_RX_UNLOCK(rxr);
4579	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4580}
4581
4582/*********************************************************************
4583 *
4584 *  Verify that the hardware indicated that the checksum is valid.
4585 *  Inform the stack about the status of checksum so that stack
4586 *  doesn't spend time verifying the checksum.
4587 *
4588 *********************************************************************/
4589static void
4590igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4591{
4592	u16 status = (u16)staterr;
4593	u8  errors = (u8) (staterr >> 24);
4594	int sctp;
4595
4596	/* Ignore Checksum bit is set */
4597	if (status & E1000_RXD_STAT_IXSM) {
4598		mp->m_pkthdr.csum_flags = 0;
4599		return;
4600	}
4601
4602	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4603	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4604		sctp = 1;
4605	else
4606		sctp = 0;
4607	if (status & E1000_RXD_STAT_IPCS) {
4608		/* Did it pass? */
4609		if (!(errors & E1000_RXD_ERR_IPE)) {
4610			/* IP Checksum Good */
4611			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4612			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4613		} else
4614			mp->m_pkthdr.csum_flags = 0;
4615	}
4616
4617	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4618		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4619#if __FreeBSD_version >= 800000
4620		if (sctp) /* reassign */
4621			type = CSUM_SCTP_VALID;
4622#endif
4623		/* Did it pass? */
4624		if (!(errors & E1000_RXD_ERR_TCPE)) {
4625			mp->m_pkthdr.csum_flags |= type;
4626			if (sctp == 0)
4627				mp->m_pkthdr.csum_data = htons(0xffff);
4628		}
4629	}
4630	return;
4631}
4632
4633/*
4634 * This routine is run via an vlan
4635 * config EVENT
4636 */
4637static void
4638igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4639{
4640	struct adapter	*adapter = ifp->if_softc;
4641	u32		index, bit;
4642
4643	if (ifp->if_softc !=  arg)   /* Not our event */
4644		return;
4645
4646	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4647                return;
4648
4649	IGB_CORE_LOCK(adapter);
4650	index = (vtag >> 5) & 0x7F;
4651	bit = vtag & 0x1F;
4652	adapter->shadow_vfta[index] |= (1 << bit);
4653	++adapter->num_vlans;
4654	/* Change hw filter setting */
4655	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4656		igb_setup_vlan_hw_support(adapter);
4657	IGB_CORE_UNLOCK(adapter);
4658}
4659
4660/*
4661 * This routine is run via an vlan
4662 * unconfig EVENT
4663 */
4664static void
4665igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4666{
4667	struct adapter	*adapter = ifp->if_softc;
4668	u32		index, bit;
4669
4670	if (ifp->if_softc !=  arg)
4671		return;
4672
4673	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4674                return;
4675
4676	IGB_CORE_LOCK(adapter);
4677	index = (vtag >> 5) & 0x7F;
4678	bit = vtag & 0x1F;
4679	adapter->shadow_vfta[index] &= ~(1 << bit);
4680	--adapter->num_vlans;
4681	/* Change hw filter setting */
4682	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4683		igb_setup_vlan_hw_support(adapter);
4684	IGB_CORE_UNLOCK(adapter);
4685}
4686
4687static void
4688igb_setup_vlan_hw_support(struct adapter *adapter)
4689{
4690	struct e1000_hw *hw = &adapter->hw;
4691	struct ifnet	*ifp = adapter->ifp;
4692	u32             reg;
4693
4694	if (adapter->vf_ifp) {
4695		e1000_rlpml_set_vf(hw,
4696		    adapter->max_frame_size + VLAN_TAG_SIZE);
4697		return;
4698	}
4699
4700	reg = E1000_READ_REG(hw, E1000_CTRL);
4701	reg |= E1000_CTRL_VME;
4702	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4703
4704	/* Enable the Filter Table */
4705	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4706		reg = E1000_READ_REG(hw, E1000_RCTL);
4707		reg &= ~E1000_RCTL_CFIEN;
4708		reg |= E1000_RCTL_VFE;
4709		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4710	}
4711
4712	/* Update the frame size */
4713	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4714	    adapter->max_frame_size + VLAN_TAG_SIZE);
4715
4716	/* Don't bother with table if no vlans */
4717	if ((adapter->num_vlans == 0) ||
4718	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4719                return;
4720	/*
4721	** A soft reset zero's out the VFTA, so
4722	** we need to repopulate it now.
4723	*/
4724	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4725                if (adapter->shadow_vfta[i] != 0) {
4726			if (adapter->vf_ifp)
4727				e1000_vfta_set_vf(hw,
4728				    adapter->shadow_vfta[i], TRUE);
4729			else
4730				E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4731                           	 i, adapter->shadow_vfta[i]);
4732		}
4733}
4734
4735static void
4736igb_enable_intr(struct adapter *adapter)
4737{
4738	/* With RSS set up what to auto clear */
4739	if (adapter->msix_mem) {
4740		u32 mask = (adapter->que_mask | adapter->link_mask);
4741		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4742		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4743		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4744		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4745		    E1000_IMS_LSC);
4746	} else {
4747		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4748		    IMS_ENABLE_MASK);
4749	}
4750	E1000_WRITE_FLUSH(&adapter->hw);
4751
4752	return;
4753}
4754
4755static void
4756igb_disable_intr(struct adapter *adapter)
4757{
4758	if (adapter->msix_mem) {
4759		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4760		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4761	}
4762	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4763	E1000_WRITE_FLUSH(&adapter->hw);
4764	return;
4765}
4766
4767/*
4768 * Bit of a misnomer, what this really means is
4769 * to enable OS management of the system... aka
4770 * to disable special hardware management features
4771 */
4772static void
4773igb_init_manageability(struct adapter *adapter)
4774{
4775	if (adapter->has_manage) {
4776		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4777		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4778
4779		/* disable hardware interception of ARP */
4780		manc &= ~(E1000_MANC_ARP_EN);
4781
4782                /* enable receiving management packets to the host */
4783		manc |= E1000_MANC_EN_MNG2HOST;
4784		manc2h |= 1 << 5;  /* Mng Port 623 */
4785		manc2h |= 1 << 6;  /* Mng Port 664 */
4786		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4787		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4788	}
4789}
4790
4791/*
4792 * Give control back to hardware management
4793 * controller if there is one.
4794 */
4795static void
4796igb_release_manageability(struct adapter *adapter)
4797{
4798	if (adapter->has_manage) {
4799		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4800
4801		/* re-enable hardware interception of ARP */
4802		manc |= E1000_MANC_ARP_EN;
4803		manc &= ~E1000_MANC_EN_MNG2HOST;
4804
4805		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4806	}
4807}
4808
4809/*
4810 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4811 * For ASF and Pass Through versions of f/w this means that
4812 * the driver is loaded.
4813 *
4814 */
4815static void
4816igb_get_hw_control(struct adapter *adapter)
4817{
4818	u32 ctrl_ext;
4819
4820	if (adapter->vf_ifp)
4821		return;
4822
4823	/* Let firmware know the driver has taken over */
4824	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4825	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4826	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4827}
4828
4829/*
4830 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4831 * For ASF and Pass Through versions of f/w this means that the
4832 * driver is no longer loaded.
4833 *
4834 */
4835static void
4836igb_release_hw_control(struct adapter *adapter)
4837{
4838	u32 ctrl_ext;
4839
4840	if (adapter->vf_ifp)
4841		return;
4842
4843	/* Let firmware taken over control of h/w */
4844	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4845	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4846	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4847}
4848
4849static int
4850igb_is_valid_ether_addr(uint8_t *addr)
4851{
4852	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4853
4854	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4855		return (FALSE);
4856	}
4857
4858	return (TRUE);
4859}
4860
4861
4862/*
4863 * Enable PCI Wake On Lan capability
4864 */
4865static void
4866igb_enable_wakeup(device_t dev)
4867{
4868	u16     cap, status;
4869	u8      id;
4870
4871	/* First find the capabilities pointer*/
4872	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4873	/* Read the PM Capabilities */
4874	id = pci_read_config(dev, cap, 1);
4875	if (id != PCIY_PMG)     /* Something wrong */
4876		return;
4877	/* OK, we have the power capabilities, so
4878	   now get the status register */
4879	cap += PCIR_POWER_STATUS;
4880	status = pci_read_config(dev, cap, 2);
4881	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4882	pci_write_config(dev, cap, status, 2);
4883	return;
4884}
4885
4886static void
4887igb_led_func(void *arg, int onoff)
4888{
4889	struct adapter	*adapter = arg;
4890
4891	IGB_CORE_LOCK(adapter);
4892	if (onoff) {
4893		e1000_setup_led(&adapter->hw);
4894		e1000_led_on(&adapter->hw);
4895	} else {
4896		e1000_led_off(&adapter->hw);
4897		e1000_cleanup_led(&adapter->hw);
4898	}
4899	IGB_CORE_UNLOCK(adapter);
4900}
4901
4902/**********************************************************************
4903 *
4904 *  Update the board statistics counters.
4905 *
4906 **********************************************************************/
4907static void
4908igb_update_stats_counters(struct adapter *adapter)
4909{
4910	struct ifnet		*ifp;
4911        struct e1000_hw		*hw = &adapter->hw;
4912	struct e1000_hw_stats	*stats;
4913
4914	/*
4915	** The virtual function adapter has only a
4916	** small controlled set of stats, do only
4917	** those and return.
4918	*/
4919	if (adapter->vf_ifp) {
4920		igb_update_vf_stats_counters(adapter);
4921		return;
4922	}
4923
4924	stats = (struct e1000_hw_stats	*)adapter->stats;
4925
4926	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4927	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4928		stats->symerrs +=
4929		    E1000_READ_REG(hw,E1000_SYMERRS);
4930		stats->sec += E1000_READ_REG(hw, E1000_SEC);
4931	}
4932
4933	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4934	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4935	stats->scc += E1000_READ_REG(hw, E1000_SCC);
4936	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4937
4938	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4939	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4940	stats->colc += E1000_READ_REG(hw, E1000_COLC);
4941	stats->dc += E1000_READ_REG(hw, E1000_DC);
4942	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4943	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4944	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4945	/*
4946	** For watchdog management we need to know if we have been
4947	** paused during the last interval, so capture that here.
4948	*/
4949        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4950        stats->xoffrxc += adapter->pause_frames;
4951	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
4952	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
4953	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
4954	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
4955	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
4956	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
4957	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
4958	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
4959	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
4960	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
4961	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
4962	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
4963
4964	/* For the 64-bit byte counters the low dword must be read first. */
4965	/* Both registers clear on the read of the high dword */
4966
4967	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
4968	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
4969	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
4970	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
4971
4972	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
4973	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
4974	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
4975	stats->roc += E1000_READ_REG(hw, E1000_ROC);
4976	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
4977
4978	stats->tor += E1000_READ_REG(hw, E1000_TORH);
4979	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
4980
4981	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
4982	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
4983	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
4984	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
4985	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
4986	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
4987	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
4988	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
4989	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
4990	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
4991
4992	/* Interrupt Counts */
4993
4994	stats->iac += E1000_READ_REG(hw, E1000_IAC);
4995	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
4996	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
4997	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
4998	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
4999	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5000	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5001	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5002	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5003
5004	/* Host to Card Statistics */
5005
5006	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5007	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5008	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5009	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5010	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5011	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5012	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5013	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5014	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5015	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5016	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5017	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5018	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5019	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5020
5021	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5022	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5023	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5024	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5025	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5026	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5027
5028	ifp = adapter->ifp;
5029	ifp->if_collisions = stats->colc;
5030
5031	/* Rx Errors */
5032	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5033	    stats->crcerrs + stats->algnerrc +
5034	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5035
5036	/* Tx Errors */
5037	ifp->if_oerrors = stats->ecol +
5038	    stats->latecol + adapter->watchdog_events;
5039
5040	/* Driver specific counters */
5041	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5042	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5043	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5044	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5045	adapter->packet_buf_alloc_tx =
5046	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5047	adapter->packet_buf_alloc_rx =
5048	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5049}
5050
5051
5052/**********************************************************************
5053 *
5054 *  Initialize the VF board statistics counters.
5055 *
5056 **********************************************************************/
5057static void
5058igb_vf_init_stats(struct adapter *adapter)
5059{
5060        struct e1000_hw *hw = &adapter->hw;
5061	struct e1000_vf_stats	*stats;
5062
5063	stats = (struct e1000_vf_stats	*)adapter->stats;
5064	if (stats == NULL)
5065		return;
5066        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5067        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5068        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5069        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5070        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5071}
5072
5073/**********************************************************************
5074 *
5075 *  Update the VF board statistics counters.
5076 *
5077 **********************************************************************/
5078static void
5079igb_update_vf_stats_counters(struct adapter *adapter)
5080{
5081	struct e1000_hw *hw = &adapter->hw;
5082	struct e1000_vf_stats	*stats;
5083
5084	if (adapter->link_speed == 0)
5085		return;
5086
5087	stats = (struct e1000_vf_stats	*)adapter->stats;
5088
5089	UPDATE_VF_REG(E1000_VFGPRC,
5090	    stats->last_gprc, stats->gprc);
5091	UPDATE_VF_REG(E1000_VFGORC,
5092	    stats->last_gorc, stats->gorc);
5093	UPDATE_VF_REG(E1000_VFGPTC,
5094	    stats->last_gptc, stats->gptc);
5095	UPDATE_VF_REG(E1000_VFGOTC,
5096	    stats->last_gotc, stats->gotc);
5097	UPDATE_VF_REG(E1000_VFMPRC,
5098	    stats->last_mprc, stats->mprc);
5099}
5100
5101/* Export a single 32-bit register via a read-only sysctl. */
5102static int
5103igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5104{
5105	struct adapter *adapter;
5106	u_int val;
5107
5108	adapter = oidp->oid_arg1;
5109	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5110	return (sysctl_handle_int(oidp, &val, 0, req));
5111}
5112
5113/*
5114**  Tuneable interrupt rate handler
5115*/
5116static int
5117igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5118{
5119	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5120	int			error;
5121	u32			reg, usec, rate;
5122
5123	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5124	usec = ((reg & 0x7FFC) >> 2);
5125	if (usec > 0)
5126		rate = 1000000 / usec;
5127	else
5128		rate = 0;
5129	error = sysctl_handle_int(oidp, &rate, 0, req);
5130	if (error || !req->newptr)
5131		return error;
5132	return 0;
5133}
5134
5135/*
5136 * Add sysctl variables, one per statistic, to the system.
5137 */
5138static void
5139igb_add_hw_stats(struct adapter *adapter)
5140{
5141	device_t dev = adapter->dev;
5142
5143	struct tx_ring *txr = adapter->tx_rings;
5144	struct rx_ring *rxr = adapter->rx_rings;
5145
5146	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5147	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5148	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5149	struct e1000_hw_stats *stats = adapter->stats;
5150
5151	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5152	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5153
5154#define QUEUE_NAME_LEN 32
5155	char namebuf[QUEUE_NAME_LEN];
5156
5157	/* Driver Statistics */
5158	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5159			CTLFLAG_RD, &adapter->link_irq, 0,
5160			"Link MSIX IRQ Handled");
5161	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5162			CTLFLAG_RD, &adapter->dropped_pkts,
5163			"Driver dropped packets");
5164	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5165			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5166			"Driver tx dma failure in xmit");
5167	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5168			CTLFLAG_RD, &adapter->rx_overruns,
5169			"RX overruns");
5170	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5171			CTLFLAG_RD, &adapter->watchdog_events,
5172			"Watchdog timeouts");
5173
5174	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5175			CTLFLAG_RD, &adapter->device_control,
5176			"Device Control Register");
5177	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5178			CTLFLAG_RD, &adapter->rx_control,
5179			"Receiver Control Register");
5180	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5181			CTLFLAG_RD, &adapter->int_mask,
5182			"Interrupt Mask");
5183	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5184			CTLFLAG_RD, &adapter->eint_mask,
5185			"Extended Interrupt Mask");
5186	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5187			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5188			"Transmit Buffer Packet Allocation");
5189	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5190			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5191			"Receive Buffer Packet Allocation");
5192	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5193			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5194			"Flow Control High Watermark");
5195	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5196			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5197			"Flow Control Low Watermark");
5198
5199	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5200		struct lro_ctrl *lro = &rxr->lro;
5201
5202		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5203		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5204					    CTLFLAG_RD, NULL, "Queue Name");
5205		queue_list = SYSCTL_CHILDREN(queue_node);
5206
5207		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5208				CTLFLAG_RD, &adapter->queues[i],
5209				sizeof(&adapter->queues[i]),
5210				igb_sysctl_interrupt_rate_handler,
5211				"IU", "Interrupt Rate");
5212
5213		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5214				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5215				igb_sysctl_reg_handler, "IU",
5216 				"Transmit Descriptor Head");
5217		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5218				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5219				igb_sysctl_reg_handler, "IU",
5220 				"Transmit Descriptor Tail");
5221		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5222				CTLFLAG_RD, &txr->no_desc_avail,
5223				"Queue No Descriptor Available");
5224		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5225				CTLFLAG_RD, &txr->tx_packets,
5226				"Queue Packets Transmitted");
5227
5228		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5229				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5230				igb_sysctl_reg_handler, "IU",
5231				"Receive Descriptor Head");
5232		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5233				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5234				igb_sysctl_reg_handler, "IU",
5235				"Receive Descriptor Tail");
5236		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5237				CTLFLAG_RD, &rxr->rx_packets,
5238				"Queue Packets Received");
5239		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5240				CTLFLAG_RD, &rxr->rx_bytes,
5241				"Queue Bytes Received");
5242		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5243				CTLFLAG_RD, &lro->lro_queued, 0,
5244				"LRO Queued");
5245		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5246				CTLFLAG_RD, &lro->lro_flushed, 0,
5247				"LRO Flushed");
5248	}
5249
5250	/* MAC stats get their own sub node */
5251
5252	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5253				    CTLFLAG_RD, NULL, "MAC Statistics");
5254	stat_list = SYSCTL_CHILDREN(stat_node);
5255
5256	/*
5257	** VF adapter has a very limited set of stats
5258	** since its not managing the metal, so to speak.
5259	*/
5260	if (adapter->vf_ifp) {
5261	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5262			CTLFLAG_RD, &stats->gprc,
5263			"Good Packets Received");
5264	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5265			CTLFLAG_RD, &stats->gptc,
5266			"Good Packets Transmitted");
5267 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5268 			CTLFLAG_RD, &stats->gorc,
5269 			"Good Octets Received");
5270 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5271 			CTLFLAG_RD, &stats->gotc,
5272 			"Good Octets Transmitted");
5273	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5274			CTLFLAG_RD, &stats->mprc,
5275			"Multicast Packets Received");
5276		return;
5277	}
5278
5279	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5280			CTLFLAG_RD, &stats->ecol,
5281			"Excessive collisions");
5282	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5283			CTLFLAG_RD, &stats->scc,
5284			"Single collisions");
5285	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5286			CTLFLAG_RD, &stats->mcc,
5287			"Multiple collisions");
5288	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5289			CTLFLAG_RD, &stats->latecol,
5290			"Late collisions");
5291	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5292			CTLFLAG_RD, &stats->colc,
5293			"Collision Count");
5294	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5295			CTLFLAG_RD, &stats->symerrs,
5296			"Symbol Errors");
5297	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5298			CTLFLAG_RD, &stats->sec,
5299			"Sequence Errors");
5300	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5301			CTLFLAG_RD, &stats->dc,
5302			"Defer Count");
5303	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5304			CTLFLAG_RD, &stats->mpc,
5305			"Missed Packets");
5306	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5307			CTLFLAG_RD, &stats->rnbc,
5308			"Receive No Buffers");
5309	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5310			CTLFLAG_RD, &stats->ruc,
5311			"Receive Undersize");
5312	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5313			CTLFLAG_RD, &stats->rfc,
5314			"Fragmented Packets Received ");
5315	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5316			CTLFLAG_RD, &stats->roc,
5317			"Oversized Packets Received");
5318	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5319			CTLFLAG_RD, &stats->rjc,
5320			"Recevied Jabber");
5321	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5322			CTLFLAG_RD, &stats->rxerrc,
5323			"Receive Errors");
5324	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5325			CTLFLAG_RD, &stats->crcerrs,
5326			"CRC errors");
5327	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5328			CTLFLAG_RD, &stats->algnerrc,
5329			"Alignment Errors");
5330	/* On 82575 these are collision counts */
5331	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5332			CTLFLAG_RD, &stats->cexterr,
5333			"Collision/Carrier extension errors");
5334	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5335			CTLFLAG_RD, &stats->xonrxc,
5336			"XON Received");
5337	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5338			CTLFLAG_RD, &stats->xontxc,
5339			"XON Transmitted");
5340	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5341			CTLFLAG_RD, &stats->xoffrxc,
5342			"XOFF Received");
5343	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5344			CTLFLAG_RD, &stats->xofftxc,
5345			"XOFF Transmitted");
5346	/* Packet Reception Stats */
5347	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5348			CTLFLAG_RD, &stats->tpr,
5349			"Total Packets Received ");
5350	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5351			CTLFLAG_RD, &stats->gprc,
5352			"Good Packets Received");
5353	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5354			CTLFLAG_RD, &stats->bprc,
5355			"Broadcast Packets Received");
5356	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5357			CTLFLAG_RD, &stats->mprc,
5358			"Multicast Packets Received");
5359	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5360			CTLFLAG_RD, &stats->prc64,
5361			"64 byte frames received ");
5362	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5363			CTLFLAG_RD, &stats->prc127,
5364			"65-127 byte frames received");
5365	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5366			CTLFLAG_RD, &stats->prc255,
5367			"128-255 byte frames received");
5368	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5369			CTLFLAG_RD, &stats->prc511,
5370			"256-511 byte frames received");
5371	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5372			CTLFLAG_RD, &stats->prc1023,
5373			"512-1023 byte frames received");
5374	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5375			CTLFLAG_RD, &stats->prc1522,
5376			"1023-1522 byte frames received");
5377 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5378 			CTLFLAG_RD, &stats->gorc,
5379 			"Good Octets Received");
5380
5381	/* Packet Transmission Stats */
5382 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5383 			CTLFLAG_RD, &stats->gotc,
5384 			"Good Octets Transmitted");
5385	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5386			CTLFLAG_RD, &stats->tpt,
5387			"Total Packets Transmitted");
5388	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5389			CTLFLAG_RD, &stats->gptc,
5390			"Good Packets Transmitted");
5391	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5392			CTLFLAG_RD, &stats->bptc,
5393			"Broadcast Packets Transmitted");
5394	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5395			CTLFLAG_RD, &stats->mptc,
5396			"Multicast Packets Transmitted");
5397	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5398			CTLFLAG_RD, &stats->ptc64,
5399			"64 byte frames transmitted ");
5400	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5401			CTLFLAG_RD, &stats->ptc127,
5402			"65-127 byte frames transmitted");
5403	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5404			CTLFLAG_RD, &stats->ptc255,
5405			"128-255 byte frames transmitted");
5406	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5407			CTLFLAG_RD, &stats->ptc511,
5408			"256-511 byte frames transmitted");
5409	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5410			CTLFLAG_RD, &stats->ptc1023,
5411			"512-1023 byte frames transmitted");
5412	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5413			CTLFLAG_RD, &stats->ptc1522,
5414			"1024-1522 byte frames transmitted");
5415	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5416			CTLFLAG_RD, &stats->tsctc,
5417			"TSO Contexts Transmitted");
5418	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5419			CTLFLAG_RD, &stats->tsctfc,
5420			"TSO Contexts Failed");
5421
5422
5423	/* Interrupt Stats */
5424
5425	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5426				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5427	int_list = SYSCTL_CHILDREN(int_node);
5428
5429	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5430			CTLFLAG_RD, &stats->iac,
5431			"Interrupt Assertion Count");
5432
5433	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5434			CTLFLAG_RD, &stats->icrxptc,
5435			"Interrupt Cause Rx Pkt Timer Expire Count");
5436
5437	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5438			CTLFLAG_RD, &stats->icrxatc,
5439			"Interrupt Cause Rx Abs Timer Expire Count");
5440
5441	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5442			CTLFLAG_RD, &stats->ictxptc,
5443			"Interrupt Cause Tx Pkt Timer Expire Count");
5444
5445	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5446			CTLFLAG_RD, &stats->ictxatc,
5447			"Interrupt Cause Tx Abs Timer Expire Count");
5448
5449	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5450			CTLFLAG_RD, &stats->ictxqec,
5451			"Interrupt Cause Tx Queue Empty Count");
5452
5453	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5454			CTLFLAG_RD, &stats->ictxqmtc,
5455			"Interrupt Cause Tx Queue Min Thresh Count");
5456
5457	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5458			CTLFLAG_RD, &stats->icrxdmtc,
5459			"Interrupt Cause Rx Desc Min Thresh Count");
5460
5461	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5462			CTLFLAG_RD, &stats->icrxoc,
5463			"Interrupt Cause Receiver Overrun Count");
5464
5465	/* Host to Card Stats */
5466
5467	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5468				    CTLFLAG_RD, NULL,
5469				    "Host to Card Statistics");
5470
5471	host_list = SYSCTL_CHILDREN(host_node);
5472
5473	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5474			CTLFLAG_RD, &stats->cbtmpc,
5475			"Circuit Breaker Tx Packet Count");
5476
5477	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5478			CTLFLAG_RD, &stats->htdpmc,
5479			"Host Transmit Discarded Packets");
5480
5481	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5482			CTLFLAG_RD, &stats->rpthc,
5483			"Rx Packets To Host");
5484
5485	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5486			CTLFLAG_RD, &stats->cbrmpc,
5487			"Circuit Breaker Rx Packet Count");
5488
5489	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5490			CTLFLAG_RD, &stats->cbrdpc,
5491			"Circuit Breaker Rx Dropped Count");
5492
5493	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5494			CTLFLAG_RD, &stats->hgptc,
5495			"Host Good Packets Tx Count");
5496
5497	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5498			CTLFLAG_RD, &stats->htcbdpc,
5499			"Host Tx Circuit Breaker Dropped Count");
5500
5501	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5502			CTLFLAG_RD, &stats->hgorc,
5503			"Host Good Octets Received Count");
5504
5505	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5506			CTLFLAG_RD, &stats->hgotc,
5507			"Host Good Octets Transmit Count");
5508
5509	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5510			CTLFLAG_RD, &stats->lenerrs,
5511			"Length Errors");
5512
5513	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5514			CTLFLAG_RD, &stats->scvpc,
5515			"SerDes/SGMII Code Violation Pkt Count");
5516
5517	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5518			CTLFLAG_RD, &stats->hrmpc,
5519			"Header Redirection Missed Packet Count");
5520}
5521
5522
5523/**********************************************************************
5524 *
5525 *  This routine provides a way to dump out the adapter eeprom,
5526 *  often a useful debug/service tool. This only dumps the first
5527 *  32 words, stuff that matters is in that extent.
5528 *
5529 **********************************************************************/
5530static int
5531igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5532{
5533	struct adapter *adapter;
5534	int error;
5535	int result;
5536
5537	result = -1;
5538	error = sysctl_handle_int(oidp, &result, 0, req);
5539
5540	if (error || !req->newptr)
5541		return (error);
5542
5543	/*
5544	 * This value will cause a hex dump of the
5545	 * first 32 16-bit words of the EEPROM to
5546	 * the screen.
5547	 */
5548	if (result == 1) {
5549		adapter = (struct adapter *)arg1;
5550		igb_print_nvm_info(adapter);
5551        }
5552
5553	return (error);
5554}
5555
5556static void
5557igb_print_nvm_info(struct adapter *adapter)
5558{
5559	u16	eeprom_data;
5560	int	i, j, row = 0;
5561
5562	/* Its a bit crude, but it gets the job done */
5563	printf("\nInterface EEPROM Dump:\n");
5564	printf("Offset\n0x0000  ");
5565	for (i = 0, j = 0; i < 32; i++, j++) {
5566		if (j == 8) { /* Make the offset block */
5567			j = 0; ++row;
5568			printf("\n0x00%x0  ",row);
5569		}
5570		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5571		printf("%04x ", eeprom_data);
5572	}
5573	printf("\n");
5574}
5575
5576static void
5577igb_set_sysctl_value(struct adapter *adapter, const char *name,
5578	const char *description, int *limit, int value)
5579{
5580	*limit = value;
5581	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5582	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5583	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5584}
5585
5586/*
5587** Set flow control using sysctl:
5588** Flow control values:
5589** 	0 - off
5590**	1 - rx pause
5591**	2 - tx pause
5592**	3 - full
5593*/
5594static int
5595igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5596{
5597	int error;
5598	struct adapter *adapter;
5599
5600	error = sysctl_handle_int(oidp, &igb_fc_setting, 0, req);
5601
5602	if (error)
5603		return (error);
5604
5605	adapter = (struct adapter *) arg1;
5606	switch (igb_fc_setting) {
5607		case e1000_fc_rx_pause:
5608		case e1000_fc_tx_pause:
5609		case e1000_fc_full:
5610			adapter->hw.fc.requested_mode = igb_fc_setting;
5611			break;
5612		case e1000_fc_none:
5613		default:
5614			adapter->hw.fc.requested_mode = e1000_fc_none;
5615	}
5616
5617	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5618	e1000_force_mac_fc(&adapter->hw);
5619	return error;
5620}
5621