if_igb.c revision 223482
1130803Smarcel/******************************************************************************
2130803Smarcel
3130803Smarcel  Copyright (c) 2001-2011, Intel Corporation
4130803Smarcel  All rights reserved.
5130803Smarcel
6130803Smarcel  Redistribution and use in source and binary forms, with or without
7130803Smarcel  modification, are permitted provided that the following conditions are met:
8130803Smarcel
9130803Smarcel   1. Redistributions of source code must retain the above copyright notice,
10130803Smarcel      this list of conditions and the following disclaimer.
11130803Smarcel
12130803Smarcel   2. Redistributions in binary form must reproduce the above copyright
13130803Smarcel      notice, this list of conditions and the following disclaimer in the
14130803Smarcel      documentation and/or other materials provided with the distribution.
15130803Smarcel
16130803Smarcel   3. Neither the name of the Intel Corporation nor the names of its
17130803Smarcel      contributors may be used to endorse or promote products derived from
18130803Smarcel      this software without specific prior written permission.
19130803Smarcel
20130803Smarcel  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21130803Smarcel  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22130803Smarcel  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23130803Smarcel  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24130803Smarcel  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25130803Smarcel  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26130803Smarcel  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27130803Smarcel  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28130803Smarcel  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29130803Smarcel  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30130803Smarcel  POSSIBILITY OF SUCH DAMAGE.
31130803Smarcel
32130803Smarcel******************************************************************************/
33130803Smarcel/*$FreeBSD: head/sys/dev/e1000/if_igb.c 223482 2011-06-23 17:42:27Z jfv $*/
34130803Smarcel
35130803Smarcel
36130803Smarcel#ifdef HAVE_KERNEL_OPTION_HEADERS
37130803Smarcel#include "opt_device_polling.h"
38130803Smarcel#include "opt_inet.h"
39130803Smarcel#include "opt_inet6.h"
40130803Smarcel#include "opt_altq.h"
41130803Smarcel#endif
42130803Smarcel
43130803Smarcel#include <sys/param.h>
44130803Smarcel#include <sys/systm.h>
45130803Smarcel#if __FreeBSD_version >= 800000
46130803Smarcel#include <sys/buf_ring.h>
47130803Smarcel#endif
48130803Smarcel#include <sys/bus.h>
49130803Smarcel#include <sys/endian.h>
50130803Smarcel#include <sys/kernel.h>
51130803Smarcel#include <sys/kthread.h>
52130803Smarcel#include <sys/malloc.h>
53130803Smarcel#include <sys/mbuf.h>
54130803Smarcel#include <sys/module.h>
55130803Smarcel#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.2.5";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	/* required last entry */
154	{ 0, 0, 0, 0, 0}
155};
156
157/*********************************************************************
158 *  Table of branding strings for all supported NICs.
159 *********************************************************************/
160
161static char *igb_strings[] = {
162	"Intel(R) PRO/1000 Network Connection"
163};
164
165/*********************************************************************
166 *  Function prototypes
167 *********************************************************************/
168static int	igb_probe(device_t);
169static int	igb_attach(device_t);
170static int	igb_detach(device_t);
171static int	igb_shutdown(device_t);
172static int	igb_suspend(device_t);
173static int	igb_resume(device_t);
174#if __FreeBSD_version >= 800000
175static int	igb_mq_start(struct ifnet *, struct mbuf *);
176static int	igb_mq_start_locked(struct ifnet *,
177		    struct tx_ring *, struct mbuf *);
178static void	igb_qflush(struct ifnet *);
179static void	igb_deferred_mq_start(void *, int);
180#else
181static void	igb_start(struct ifnet *);
182static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
183#endif
184static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
185static void	igb_init(void *);
186static void	igb_init_locked(struct adapter *);
187static void	igb_stop(void *);
188static void	igb_media_status(struct ifnet *, struct ifmediareq *);
189static int	igb_media_change(struct ifnet *);
190static void	igb_identify_hardware(struct adapter *);
191static int	igb_allocate_pci_resources(struct adapter *);
192static int	igb_allocate_msix(struct adapter *);
193static int	igb_allocate_legacy(struct adapter *);
194static int	igb_setup_msix(struct adapter *);
195static void	igb_free_pci_resources(struct adapter *);
196static void	igb_local_timer(void *);
197static void	igb_reset(struct adapter *);
198static int	igb_setup_interface(device_t, struct adapter *);
199static int	igb_allocate_queues(struct adapter *);
200static void	igb_configure_queues(struct adapter *);
201
202static int	igb_allocate_transmit_buffers(struct tx_ring *);
203static void	igb_setup_transmit_structures(struct adapter *);
204static void	igb_setup_transmit_ring(struct tx_ring *);
205static void	igb_initialize_transmit_units(struct adapter *);
206static void	igb_free_transmit_structures(struct adapter *);
207static void	igb_free_transmit_buffers(struct tx_ring *);
208
209static int	igb_allocate_receive_buffers(struct rx_ring *);
210static int	igb_setup_receive_structures(struct adapter *);
211static int	igb_setup_receive_ring(struct rx_ring *);
212static void	igb_initialize_receive_units(struct adapter *);
213static void	igb_free_receive_structures(struct adapter *);
214static void	igb_free_receive_buffers(struct rx_ring *);
215static void	igb_free_receive_ring(struct rx_ring *);
216
217static void	igb_enable_intr(struct adapter *);
218static void	igb_disable_intr(struct adapter *);
219static void	igb_update_stats_counters(struct adapter *);
220static bool	igb_txeof(struct tx_ring *);
221
222static __inline	void igb_rx_discard(struct rx_ring *, int);
223static __inline void igb_rx_input(struct rx_ring *,
224		    struct ifnet *, struct mbuf *, u32);
225
226static bool	igb_rxeof(struct igb_queue *, int, int *);
227static void	igb_rx_checksum(u32, struct mbuf *, u32);
228static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
229static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
230static void	igb_set_promisc(struct adapter *);
231static void	igb_disable_promisc(struct adapter *);
232static void	igb_set_multi(struct adapter *);
233static void	igb_update_link_status(struct adapter *);
234static void	igb_refresh_mbufs(struct rx_ring *, int);
235
236static void	igb_register_vlan(void *, struct ifnet *, u16);
237static void	igb_unregister_vlan(void *, struct ifnet *, u16);
238static void	igb_setup_vlan_hw_support(struct adapter *);
239
240static int	igb_xmit(struct tx_ring *, struct mbuf **);
241static int	igb_dma_malloc(struct adapter *, bus_size_t,
242		    struct igb_dma_alloc *, int);
243static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
244static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
245static void	igb_print_nvm_info(struct adapter *);
246static int 	igb_is_valid_ether_addr(u8 *);
247static void     igb_add_hw_stats(struct adapter *);
248
249static void	igb_vf_init_stats(struct adapter *);
250static void	igb_update_vf_stats_counters(struct adapter *);
251
252/* Management and WOL Support */
253static void	igb_init_manageability(struct adapter *);
254static void	igb_release_manageability(struct adapter *);
255static void     igb_get_hw_control(struct adapter *);
256static void     igb_release_hw_control(struct adapter *);
257static void     igb_enable_wakeup(device_t);
258static void     igb_led_func(void *, int);
259
260static int	igb_irq_fast(void *);
261static void	igb_msix_que(void *);
262static void	igb_msix_link(void *);
263static void	igb_handle_que(void *context, int pending);
264static void	igb_handle_link(void *context, int pending);
265
266static void	igb_set_sysctl_value(struct adapter *, const char *,
267		    const char *, int *, int);
268static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
269static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
270
271#ifdef DEVICE_POLLING
272static poll_handler_t igb_poll;
273#endif /* POLLING */
274
275/*********************************************************************
276 *  FreeBSD Device Interface Entry Points
277 *********************************************************************/
278
279static device_method_t igb_methods[] = {
280	/* Device interface */
281	DEVMETHOD(device_probe, igb_probe),
282	DEVMETHOD(device_attach, igb_attach),
283	DEVMETHOD(device_detach, igb_detach),
284	DEVMETHOD(device_shutdown, igb_shutdown),
285	DEVMETHOD(device_suspend, igb_suspend),
286	DEVMETHOD(device_resume, igb_resume),
287	{0, 0}
288};
289
290static driver_t igb_driver = {
291	"igb", igb_methods, sizeof(struct adapter),
292};
293
294static devclass_t igb_devclass;
295DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
296MODULE_DEPEND(igb, pci, 1, 1, 1);
297MODULE_DEPEND(igb, ether, 1, 1, 1);
298
299/*********************************************************************
300 *  Tunable default values.
301 *********************************************************************/
302
303/* Descriptor defaults */
304static int igb_rxd = IGB_DEFAULT_RXD;
305static int igb_txd = IGB_DEFAULT_TXD;
306TUNABLE_INT("hw.igb.rxd", &igb_rxd);
307TUNABLE_INT("hw.igb.txd", &igb_txd);
308
309/*
310** AIM: Adaptive Interrupt Moderation
311** which means that the interrupt rate
312** is varied over time based on the
313** traffic for that interrupt vector
314*/
315static int igb_enable_aim = TRUE;
316TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
317
318/*
319 * MSIX should be the default for best performance,
320 * but this allows it to be forced off for testing.
321 */
322static int igb_enable_msix = 1;
323TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
324
325/*
326** Tuneable Interrupt rate
327*/
328static int igb_max_interrupt_rate = 8000;
329TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
330
331/*
332** Header split causes the packet header to
333** be dma'd to a seperate mbuf from the payload.
334** this can have memory alignment benefits. But
335** another plus is that small packets often fit
336** into the header and thus use no cluster. Its
337** a very workload dependent type feature.
338*/
339static bool igb_header_split = FALSE;
340TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
341
342/*
343** This will autoconfigure based on
344** the number of CPUs if left at 0.
345*/
346static int igb_num_queues = 0;
347TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
348
349/* How many packets rxeof tries to clean at a time */
350static int igb_rx_process_limit = 100;
351TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
352
353/*********************************************************************
354 *  Device identification routine
355 *
356 *  igb_probe determines if the driver should be loaded on
357 *  adapter based on PCI vendor/device id of the adapter.
358 *
359 *  return BUS_PROBE_DEFAULT on success, positive on failure
360 *********************************************************************/
361
362static int
363igb_probe(device_t dev)
364{
365	char		adapter_name[60];
366	uint16_t	pci_vendor_id = 0;
367	uint16_t	pci_device_id = 0;
368	uint16_t	pci_subvendor_id = 0;
369	uint16_t	pci_subdevice_id = 0;
370	igb_vendor_info_t *ent;
371
372	INIT_DEBUGOUT("igb_probe: begin");
373
374	pci_vendor_id = pci_get_vendor(dev);
375	if (pci_vendor_id != IGB_VENDOR_ID)
376		return (ENXIO);
377
378	pci_device_id = pci_get_device(dev);
379	pci_subvendor_id = pci_get_subvendor(dev);
380	pci_subdevice_id = pci_get_subdevice(dev);
381
382	ent = igb_vendor_info_array;
383	while (ent->vendor_id != 0) {
384		if ((pci_vendor_id == ent->vendor_id) &&
385		    (pci_device_id == ent->device_id) &&
386
387		    ((pci_subvendor_id == ent->subvendor_id) ||
388		    (ent->subvendor_id == PCI_ANY_ID)) &&
389
390		    ((pci_subdevice_id == ent->subdevice_id) ||
391		    (ent->subdevice_id == PCI_ANY_ID))) {
392			sprintf(adapter_name, "%s %s",
393				igb_strings[ent->index],
394				igb_driver_version);
395			device_set_desc_copy(dev, adapter_name);
396			return (BUS_PROBE_DEFAULT);
397		}
398		ent++;
399	}
400
401	return (ENXIO);
402}
403
404/*********************************************************************
405 *  Device initialization routine
406 *
407 *  The attach entry point is called when the driver is being loaded.
408 *  This routine identifies the type of hardware, allocates all resources
409 *  and initializes the hardware.
410 *
411 *  return 0 on success, positive on failure
412 *********************************************************************/
413
414static int
415igb_attach(device_t dev)
416{
417	struct adapter	*adapter;
418	int		error = 0;
419	u16		eeprom_data;
420
421	INIT_DEBUGOUT("igb_attach: begin");
422
423	if (resource_disabled("igb", device_get_unit(dev))) {
424		device_printf(dev, "Disabled by device hint\n");
425		return (ENXIO);
426	}
427
428	adapter = device_get_softc(dev);
429	adapter->dev = adapter->osdep.dev = dev;
430	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
431
432	/* SYSCTL stuff */
433	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
434	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
435	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
436	    igb_sysctl_nvm_info, "I", "NVM Information");
437
438	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
439	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
440	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
441	    &igb_enable_aim, 1, "Interrupt Moderation");
442
443	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
444	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
445	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
446	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
447
448	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
449
450	/* Determine hardware and mac info */
451	igb_identify_hardware(adapter);
452
453	/* Setup PCI resources */
454	if (igb_allocate_pci_resources(adapter)) {
455		device_printf(dev, "Allocation of PCI resources failed\n");
456		error = ENXIO;
457		goto err_pci;
458	}
459
460	/* Do Shared Code initialization */
461	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
462		device_printf(dev, "Setup of Shared code failed\n");
463		error = ENXIO;
464		goto err_pci;
465	}
466
467	e1000_get_bus_info(&adapter->hw);
468
469	/* Sysctl for limiting the amount of work done in the taskqueue */
470	igb_set_sysctl_value(adapter, "rx_processing_limit",
471	    "max number of rx packets to process",
472	    &adapter->rx_process_limit, igb_rx_process_limit);
473
474	/*
475	 * Validate number of transmit and receive descriptors. It
476	 * must not exceed hardware maximum, and must be multiple
477	 * of E1000_DBA_ALIGN.
478	 */
479	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
480	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
481		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
482		    IGB_DEFAULT_TXD, igb_txd);
483		adapter->num_tx_desc = IGB_DEFAULT_TXD;
484	} else
485		adapter->num_tx_desc = igb_txd;
486	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
487	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
488		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
489		    IGB_DEFAULT_RXD, igb_rxd);
490		adapter->num_rx_desc = IGB_DEFAULT_RXD;
491	} else
492		adapter->num_rx_desc = igb_rxd;
493
494	adapter->hw.mac.autoneg = DO_AUTO_NEG;
495	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
496	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
497
498	/* Copper options */
499	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
500		adapter->hw.phy.mdix = AUTO_ALL_MODES;
501		adapter->hw.phy.disable_polarity_correction = FALSE;
502		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
503	}
504
505	/*
506	 * Set the frame limits assuming
507	 * standard ethernet sized frames.
508	 */
509	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
510	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
511
512	/*
513	** Allocate and Setup Queues
514	*/
515	if (igb_allocate_queues(adapter)) {
516		error = ENOMEM;
517		goto err_pci;
518	}
519
520	/* Allocate the appropriate stats memory */
521	if (adapter->vf_ifp) {
522		adapter->stats =
523		    (struct e1000_vf_stats *)malloc(sizeof \
524		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
525		igb_vf_init_stats(adapter);
526	} else
527		adapter->stats =
528		    (struct e1000_hw_stats *)malloc(sizeof \
529		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
530	if (adapter->stats == NULL) {
531		device_printf(dev, "Can not allocate stats memory\n");
532		error = ENOMEM;
533		goto err_late;
534	}
535
536	/* Allocate multicast array memory. */
537	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
538	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
539	if (adapter->mta == NULL) {
540		device_printf(dev, "Can not allocate multicast setup array\n");
541		error = ENOMEM;
542		goto err_late;
543	}
544
545	/* Some adapter-specific advanced features */
546	if (adapter->hw.mac.type >= e1000_i350) {
547		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
550		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
551		igb_set_sysctl_value(adapter, "eee_disabled",
552		    "enable Energy Efficient Ethernet",
553		    &adapter->hw.dev_spec._82575.eee_disable,
554		    TRUE);
555		e1000_set_eee_i350(&adapter->hw);
556	}
557
558	/*
559	** Start from a known state, this is
560	** important in reading the nvm and
561	** mac from that.
562	*/
563	e1000_reset_hw(&adapter->hw);
564
565	/* Make sure we have a good EEPROM before we read from it */
566	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
567		/*
568		** Some PCI-E parts fail the first check due to
569		** the link being in sleep state, call it again,
570		** if it fails a second time its a real issue.
571		*/
572		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
573			device_printf(dev,
574			    "The EEPROM Checksum Is Not Valid\n");
575			error = EIO;
576			goto err_late;
577		}
578	}
579
580	/*
581	** Copy the permanent MAC address out of the EEPROM
582	*/
583	if (e1000_read_mac_addr(&adapter->hw) < 0) {
584		device_printf(dev, "EEPROM read error while reading MAC"
585		    " address\n");
586		error = EIO;
587		goto err_late;
588	}
589	/* Check its sanity */
590	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
591		device_printf(dev, "Invalid MAC address\n");
592		error = EIO;
593		goto err_late;
594	}
595
596	/*
597	** Configure Interrupts
598	*/
599	if ((adapter->msix > 1) && (igb_enable_msix))
600		error = igb_allocate_msix(adapter);
601	else /* MSI or Legacy */
602		error = igb_allocate_legacy(adapter);
603	if (error)
604		goto err_late;
605
606	/* Setup OS specific network interface */
607	if (igb_setup_interface(dev, adapter) != 0)
608		goto err_late;
609
610	/* Now get a good starting state */
611	igb_reset(adapter);
612
613	/* Initialize statistics */
614	igb_update_stats_counters(adapter);
615
616	adapter->hw.mac.get_link_status = 1;
617	igb_update_link_status(adapter);
618
619	/* Indicate SOL/IDER usage */
620	if (e1000_check_reset_block(&adapter->hw))
621		device_printf(dev,
622		    "PHY reset is blocked due to SOL/IDER session.\n");
623
624	/* Determine if we have to control management hardware */
625	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
626
627	/*
628	 * Setup Wake-on-Lan
629	 */
630	/* APME bit in EEPROM is mapped to WUC.APME */
631	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
632	if (eeprom_data)
633		adapter->wol = E1000_WUFC_MAG;
634
635	/* Register for VLAN events */
636	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
637	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
638	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
639	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
640
641	igb_add_hw_stats(adapter);
642
643	/* Tell the stack that the interface is not active */
644	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
645
646	adapter->led_dev = led_create(igb_led_func, adapter,
647	    device_get_nameunit(dev));
648
649	INIT_DEBUGOUT("igb_attach: end");
650
651	return (0);
652
653err_late:
654	igb_detach(dev);
655	igb_free_transmit_structures(adapter);
656	igb_free_receive_structures(adapter);
657	igb_release_hw_control(adapter);
658	if (adapter->ifp != NULL)
659		if_free(adapter->ifp);
660err_pci:
661	igb_free_pci_resources(adapter);
662	free(adapter->mta, M_DEVBUF);
663	IGB_CORE_LOCK_DESTROY(adapter);
664
665	return (error);
666}
667
668/*********************************************************************
669 *  Device removal routine
670 *
671 *  The detach entry point is called when the driver is being removed.
672 *  This routine stops the adapter and deallocates all the resources
673 *  that were allocated for driver operation.
674 *
675 *  return 0 on success, positive on failure
676 *********************************************************************/
677
678static int
679igb_detach(device_t dev)
680{
681	struct adapter	*adapter = device_get_softc(dev);
682	struct ifnet	*ifp = adapter->ifp;
683
684	INIT_DEBUGOUT("igb_detach: begin");
685
686	/* Make sure VLANS are not using driver */
687	if (adapter->ifp->if_vlantrunk != NULL) {
688		device_printf(dev,"Vlan in use, detach first\n");
689		return (EBUSY);
690	}
691
692	ether_ifdetach(adapter->ifp);
693
694	if (adapter->led_dev != NULL)
695		led_destroy(adapter->led_dev);
696
697#ifdef DEVICE_POLLING
698	if (ifp->if_capenable & IFCAP_POLLING)
699		ether_poll_deregister(ifp);
700#endif
701
702	IGB_CORE_LOCK(adapter);
703	adapter->in_detach = 1;
704	igb_stop(adapter);
705	IGB_CORE_UNLOCK(adapter);
706
707	e1000_phy_hw_reset(&adapter->hw);
708
709	/* Give control back to firmware */
710	igb_release_manageability(adapter);
711	igb_release_hw_control(adapter);
712
713	if (adapter->wol) {
714		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
715		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
716		igb_enable_wakeup(dev);
717	}
718
719	/* Unregister VLAN events */
720	if (adapter->vlan_attach != NULL)
721		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
722	if (adapter->vlan_detach != NULL)
723		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
724
725	callout_drain(&adapter->timer);
726
727	igb_free_pci_resources(adapter);
728	bus_generic_detach(dev);
729	if_free(ifp);
730
731	igb_free_transmit_structures(adapter);
732	igb_free_receive_structures(adapter);
733	if (adapter->mta != NULL)
734		free(adapter->mta, M_DEVBUF);
735
736	IGB_CORE_LOCK_DESTROY(adapter);
737
738	return (0);
739}
740
741/*********************************************************************
742 *
743 *  Shutdown entry point
744 *
745 **********************************************************************/
746
747static int
748igb_shutdown(device_t dev)
749{
750	return igb_suspend(dev);
751}
752
753/*
754 * Suspend/resume device methods.
755 */
756static int
757igb_suspend(device_t dev)
758{
759	struct adapter *adapter = device_get_softc(dev);
760
761	IGB_CORE_LOCK(adapter);
762
763	igb_stop(adapter);
764
765        igb_release_manageability(adapter);
766	igb_release_hw_control(adapter);
767
768        if (adapter->wol) {
769                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
770                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
771                igb_enable_wakeup(dev);
772        }
773
774	IGB_CORE_UNLOCK(adapter);
775
776	return bus_generic_suspend(dev);
777}
778
779static int
780igb_resume(device_t dev)
781{
782	struct adapter *adapter = device_get_softc(dev);
783	struct ifnet *ifp = adapter->ifp;
784#if __FreeBSD_version >= 800000
785	struct tx_ring *txr = adapter->tx_rings;
786#endif
787
788	IGB_CORE_LOCK(adapter);
789	igb_init_locked(adapter);
790	igb_init_manageability(adapter);
791
792	if ((ifp->if_flags & IFF_UP) &&
793	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
794#if __FreeBSD_version < 800000
795		igb_start(ifp);
796#else
797		for (int i = 0; i < adapter->num_queues; i++, txr++) {
798			IGB_TX_LOCK(txr);
799			if (!drbr_empty(ifp, txr->br))
800				igb_mq_start_locked(ifp, txr, NULL);
801			IGB_TX_UNLOCK(txr);
802		}
803#endif
804	}
805
806	IGB_CORE_UNLOCK(adapter);
807
808	return bus_generic_resume(dev);
809}
810
811
812#if __FreeBSD_version < 800000
813/*********************************************************************
814 *  Transmit entry point
815 *
816 *  igb_start is called by the stack to initiate a transmit.
817 *  The driver will remain in this routine as long as there are
818 *  packets to transmit and transmit resources are available.
819 *  In case resources are not available stack is notified and
820 *  the packet is requeued.
821 **********************************************************************/
822
823static void
824igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
825{
826	struct adapter	*adapter = ifp->if_softc;
827	struct mbuf	*m_head;
828
829	IGB_TX_LOCK_ASSERT(txr);
830
831	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
832	    IFF_DRV_RUNNING)
833		return;
834	if (!adapter->link_active)
835		return;
836
837	/* Call cleanup if number of TX descriptors low */
838	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
839		igb_txeof(txr);
840
841	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
842		if (txr->tx_avail <= IGB_MAX_SCATTER) {
843			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
844			break;
845		}
846		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
847		if (m_head == NULL)
848			break;
849		/*
850		 *  Encapsulation can modify our pointer, and or make it
851		 *  NULL on failure.  In that event, we can't requeue.
852		 */
853		if (igb_xmit(txr, &m_head)) {
854			if (m_head == NULL)
855				break;
856			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
857			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
858			break;
859		}
860
861		/* Send a copy of the frame to the BPF listener */
862		ETHER_BPF_MTAP(ifp, m_head);
863
864		/* Set watchdog on */
865		txr->watchdog_time = ticks;
866		txr->queue_status = IGB_QUEUE_WORKING;
867	}
868}
869
870/*
871 * Legacy TX driver routine, called from the
872 * stack, always uses tx[0], and spins for it.
873 * Should not be used with multiqueue tx
874 */
875static void
876igb_start(struct ifnet *ifp)
877{
878	struct adapter	*adapter = ifp->if_softc;
879	struct tx_ring	*txr = adapter->tx_rings;
880
881	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
882		IGB_TX_LOCK(txr);
883		igb_start_locked(txr, ifp);
884		IGB_TX_UNLOCK(txr);
885	}
886	return;
887}
888
889#else /* __FreeBSD_version >= 800000 */
890/*
891** Multiqueue Transmit driver
892**
893*/
894static int
895igb_mq_start(struct ifnet *ifp, struct mbuf *m)
896{
897	struct adapter		*adapter = ifp->if_softc;
898	struct igb_queue	*que;
899	struct tx_ring		*txr;
900	int 			i = 0, err = 0;
901
902	/* Which queue to use */
903	if ((m->m_flags & M_FLOWID) != 0)
904		i = m->m_pkthdr.flowid % adapter->num_queues;
905
906	txr = &adapter->tx_rings[i];
907	que = &adapter->queues[i];
908
909	if (IGB_TX_TRYLOCK(txr)) {
910		err = igb_mq_start_locked(ifp, txr, m);
911		IGB_TX_UNLOCK(txr);
912	} else {
913		err = drbr_enqueue(ifp, txr->br, m);
914		taskqueue_enqueue(que->tq, &txr->txq_task);
915	}
916
917	return (err);
918}
919
920static int
921igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
922{
923	struct adapter  *adapter = txr->adapter;
924        struct mbuf     *next;
925        int             err = 0, enq;
926
927	IGB_TX_LOCK_ASSERT(txr);
928
929	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
930	    IFF_DRV_RUNNING || adapter->link_active == 0) {
931		if (m != NULL)
932			err = drbr_enqueue(ifp, txr->br, m);
933		return (err);
934	}
935
936	enq = 0;
937	if (m == NULL) {
938		next = drbr_dequeue(ifp, txr->br);
939	} else if (drbr_needs_enqueue(ifp, txr->br)) {
940		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
941			return (err);
942		next = drbr_dequeue(ifp, txr->br);
943	} else
944		next = m;
945
946	/* Process the queue */
947	while (next != NULL) {
948		if ((err = igb_xmit(txr, &next)) != 0) {
949			if (next != NULL)
950				err = drbr_enqueue(ifp, txr->br, next);
951			break;
952		}
953		enq++;
954		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
955		ETHER_BPF_MTAP(ifp, next);
956		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
957			break;
958		if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
959			igb_txeof(txr);
960		if (txr->tx_avail <= IGB_MAX_SCATTER) {
961			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
962			break;
963		}
964		next = drbr_dequeue(ifp, txr->br);
965	}
966	if (enq > 0) {
967		/* Set the watchdog */
968		txr->queue_status = IGB_QUEUE_WORKING;
969		txr->watchdog_time = ticks;
970	}
971	return (err);
972}
973
974/*
975 * Called from a taskqueue to drain queued transmit packets.
976 */
977static void
978igb_deferred_mq_start(void *arg, int pending)
979{
980	struct tx_ring *txr = arg;
981	struct adapter *adapter = txr->adapter;
982	struct ifnet *ifp = adapter->ifp;
983
984	IGB_TX_LOCK(txr);
985	if (!drbr_empty(ifp, txr->br))
986		igb_mq_start_locked(ifp, txr, NULL);
987	IGB_TX_UNLOCK(txr);
988}
989
990/*
991** Flush all ring buffers
992*/
993static void
994igb_qflush(struct ifnet *ifp)
995{
996	struct adapter	*adapter = ifp->if_softc;
997	struct tx_ring	*txr = adapter->tx_rings;
998	struct mbuf	*m;
999
1000	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1001		IGB_TX_LOCK(txr);
1002		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1003			m_freem(m);
1004		IGB_TX_UNLOCK(txr);
1005	}
1006	if_qflush(ifp);
1007}
1008#endif /* __FreeBSD_version < 800000 */
1009
1010/*********************************************************************
1011 *  Ioctl entry point
1012 *
1013 *  igb_ioctl is called when the user wants to configure the
1014 *  interface.
1015 *
1016 *  return 0 on success, positive on failure
1017 **********************************************************************/
1018
1019static int
1020igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1021{
1022	struct adapter	*adapter = ifp->if_softc;
1023	struct ifreq	*ifr = (struct ifreq *)data;
1024#if defined(INET) || defined(INET6)
1025	struct ifaddr	*ifa = (struct ifaddr *)data;
1026	bool		avoid_reset = FALSE;
1027#endif
1028	int		error = 0;
1029
1030	if (adapter->in_detach)
1031		return (error);
1032
1033	switch (command) {
1034	case SIOCSIFADDR:
1035#ifdef INET
1036		if (ifa->ifa_addr->sa_family == AF_INET)
1037			avoid_reset = TRUE;
1038#endif
1039#ifdef INET6
1040		if (ifa->ifa_addr->sa_family == AF_INET6)
1041			avoid_reset = TRUE;
1042#endif
1043#if defined(INET) || defined(INET6)
1044		/*
1045		** Calling init results in link renegotiation,
1046		** so we avoid doing it when possible.
1047		*/
1048		if (avoid_reset) {
1049			ifp->if_flags |= IFF_UP;
1050			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1051				igb_init(adapter);
1052			if (!(ifp->if_flags & IFF_NOARP))
1053				arp_ifinit(ifp, ifa);
1054		} else
1055#endif
1056			error = ether_ioctl(ifp, command, data);
1057		break;
1058	case SIOCSIFMTU:
1059	    {
1060		int max_frame_size;
1061
1062		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1063
1064		IGB_CORE_LOCK(adapter);
1065		max_frame_size = 9234;
1066		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1067		    ETHER_CRC_LEN) {
1068			IGB_CORE_UNLOCK(adapter);
1069			error = EINVAL;
1070			break;
1071		}
1072
1073		ifp->if_mtu = ifr->ifr_mtu;
1074		adapter->max_frame_size =
1075		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1076		igb_init_locked(adapter);
1077		IGB_CORE_UNLOCK(adapter);
1078		break;
1079	    }
1080	case SIOCSIFFLAGS:
1081		IOCTL_DEBUGOUT("ioctl rcv'd:\
1082		    SIOCSIFFLAGS (Set Interface Flags)");
1083		IGB_CORE_LOCK(adapter);
1084		if (ifp->if_flags & IFF_UP) {
1085			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1086				if ((ifp->if_flags ^ adapter->if_flags) &
1087				    (IFF_PROMISC | IFF_ALLMULTI)) {
1088					igb_disable_promisc(adapter);
1089					igb_set_promisc(adapter);
1090				}
1091			} else
1092				igb_init_locked(adapter);
1093		} else
1094			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1095				igb_stop(adapter);
1096		adapter->if_flags = ifp->if_flags;
1097		IGB_CORE_UNLOCK(adapter);
1098		break;
1099	case SIOCADDMULTI:
1100	case SIOCDELMULTI:
1101		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1102		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1103			IGB_CORE_LOCK(adapter);
1104			igb_disable_intr(adapter);
1105			igb_set_multi(adapter);
1106#ifdef DEVICE_POLLING
1107			if (!(ifp->if_capenable & IFCAP_POLLING))
1108#endif
1109				igb_enable_intr(adapter);
1110			IGB_CORE_UNLOCK(adapter);
1111		}
1112		break;
1113	case SIOCSIFMEDIA:
1114		/*
1115		** As the speed/duplex settings are being
1116		** changed, we need toreset the PHY.
1117		*/
1118		adapter->hw.phy.reset_disable = FALSE;
1119		/* Check SOL/IDER usage */
1120		IGB_CORE_LOCK(adapter);
1121		if (e1000_check_reset_block(&adapter->hw)) {
1122			IGB_CORE_UNLOCK(adapter);
1123			device_printf(adapter->dev, "Media change is"
1124			    " blocked due to SOL/IDER session.\n");
1125			break;
1126		}
1127		IGB_CORE_UNLOCK(adapter);
1128	case SIOCGIFMEDIA:
1129		IOCTL_DEBUGOUT("ioctl rcv'd: \
1130		    SIOCxIFMEDIA (Get/Set Interface Media)");
1131		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1132		break;
1133	case SIOCSIFCAP:
1134	    {
1135		int mask, reinit;
1136
1137		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1138		reinit = 0;
1139		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1140#ifdef DEVICE_POLLING
1141		if (mask & IFCAP_POLLING) {
1142			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1143				error = ether_poll_register(igb_poll, ifp);
1144				if (error)
1145					return (error);
1146				IGB_CORE_LOCK(adapter);
1147				igb_disable_intr(adapter);
1148				ifp->if_capenable |= IFCAP_POLLING;
1149				IGB_CORE_UNLOCK(adapter);
1150			} else {
1151				error = ether_poll_deregister(ifp);
1152				/* Enable interrupt even in error case */
1153				IGB_CORE_LOCK(adapter);
1154				igb_enable_intr(adapter);
1155				ifp->if_capenable &= ~IFCAP_POLLING;
1156				IGB_CORE_UNLOCK(adapter);
1157			}
1158		}
1159#endif
1160		if (mask & IFCAP_HWCSUM) {
1161			ifp->if_capenable ^= IFCAP_HWCSUM;
1162			reinit = 1;
1163		}
1164		if (mask & IFCAP_TSO4) {
1165			ifp->if_capenable ^= IFCAP_TSO4;
1166			reinit = 1;
1167		}
1168		if (mask & IFCAP_VLAN_HWTAGGING) {
1169			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1170			reinit = 1;
1171		}
1172		if (mask & IFCAP_VLAN_HWFILTER) {
1173			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1174			reinit = 1;
1175		}
1176		if (mask & IFCAP_VLAN_HWTSO) {
1177			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1178			reinit = 1;
1179		}
1180		if (mask & IFCAP_LRO) {
1181			ifp->if_capenable ^= IFCAP_LRO;
1182			reinit = 1;
1183		}
1184		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1185			igb_init(adapter);
1186		VLAN_CAPABILITIES(ifp);
1187		break;
1188	    }
1189
1190	default:
1191		error = ether_ioctl(ifp, command, data);
1192		break;
1193	}
1194
1195	return (error);
1196}
1197
1198
1199/*********************************************************************
1200 *  Init entry point
1201 *
1202 *  This routine is used in two ways. It is used by the stack as
1203 *  init entry point in network interface structure. It is also used
1204 *  by the driver as a hw/sw initialization routine to get to a
1205 *  consistent state.
1206 *
1207 *  return 0 on success, positive on failure
1208 **********************************************************************/
1209
1210static void
1211igb_init_locked(struct adapter *adapter)
1212{
1213	struct ifnet	*ifp = adapter->ifp;
1214	device_t	dev = adapter->dev;
1215
1216	INIT_DEBUGOUT("igb_init: begin");
1217
1218	IGB_CORE_LOCK_ASSERT(adapter);
1219
1220	igb_disable_intr(adapter);
1221	callout_stop(&adapter->timer);
1222
1223	/* Get the latest mac address, User can use a LAA */
1224        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1225              ETHER_ADDR_LEN);
1226
1227	/* Put the address into the Receive Address Array */
1228	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1229
1230	igb_reset(adapter);
1231	igb_update_link_status(adapter);
1232
1233	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1234
1235	/* Set hardware offload abilities */
1236	ifp->if_hwassist = 0;
1237	if (ifp->if_capenable & IFCAP_TXCSUM) {
1238		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1239#if __FreeBSD_version >= 800000
1240		if (adapter->hw.mac.type == e1000_82576)
1241			ifp->if_hwassist |= CSUM_SCTP;
1242#endif
1243	}
1244
1245	if (ifp->if_capenable & IFCAP_TSO4)
1246		ifp->if_hwassist |= CSUM_TSO;
1247
1248	/* Configure for OS presence */
1249	igb_init_manageability(adapter);
1250
1251	/* Prepare transmit descriptors and buffers */
1252	igb_setup_transmit_structures(adapter);
1253	igb_initialize_transmit_units(adapter);
1254
1255	/* Setup Multicast table */
1256	igb_set_multi(adapter);
1257
1258	/*
1259	** Figure out the desired mbuf pool
1260	** for doing jumbo/packetsplit
1261	*/
1262	if (adapter->max_frame_size <= 2048)
1263		adapter->rx_mbuf_sz = MCLBYTES;
1264	else if (adapter->max_frame_size <= 4096)
1265		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1266	else
1267		adapter->rx_mbuf_sz = MJUM9BYTES;
1268
1269	/* Prepare receive descriptors and buffers */
1270	if (igb_setup_receive_structures(adapter)) {
1271		device_printf(dev, "Could not setup receive structures\n");
1272		return;
1273	}
1274	igb_initialize_receive_units(adapter);
1275
1276        /* Enable VLAN support */
1277	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1278		igb_setup_vlan_hw_support(adapter);
1279
1280	/* Don't lose promiscuous settings */
1281	igb_set_promisc(adapter);
1282
1283	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1284	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1285
1286	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1287	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1288
1289	if (adapter->msix > 1) /* Set up queue routing */
1290		igb_configure_queues(adapter);
1291
1292	/* this clears any pending interrupts */
1293	E1000_READ_REG(&adapter->hw, E1000_ICR);
1294#ifdef DEVICE_POLLING
1295	/*
1296	 * Only enable interrupts if we are not polling, make sure
1297	 * they are off otherwise.
1298	 */
1299	if (ifp->if_capenable & IFCAP_POLLING)
1300		igb_disable_intr(adapter);
1301	else
1302#endif /* DEVICE_POLLING */
1303	{
1304		igb_enable_intr(adapter);
1305		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1306	}
1307
1308	/* Set Energy Efficient Ethernet */
1309	e1000_set_eee_i350(&adapter->hw);
1310
1311	/* Don't reset the phy next time init gets called */
1312	adapter->hw.phy.reset_disable = TRUE;
1313}
1314
1315static void
1316igb_init(void *arg)
1317{
1318	struct adapter *adapter = arg;
1319
1320	IGB_CORE_LOCK(adapter);
1321	igb_init_locked(adapter);
1322	IGB_CORE_UNLOCK(adapter);
1323}
1324
1325
1326static void
1327igb_handle_que(void *context, int pending)
1328{
1329	struct igb_queue *que = context;
1330	struct adapter *adapter = que->adapter;
1331	struct tx_ring *txr = que->txr;
1332	struct ifnet	*ifp = adapter->ifp;
1333
1334	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1335		bool	more;
1336
1337		more = igb_rxeof(que, -1, NULL);
1338
1339		IGB_TX_LOCK(txr);
1340		if (igb_txeof(txr))
1341			more = TRUE;
1342#if __FreeBSD_version >= 800000
1343		if (!drbr_empty(ifp, txr->br))
1344			igb_mq_start_locked(ifp, txr, NULL);
1345#else
1346		igb_start_locked(txr, ifp);
1347#endif
1348		IGB_TX_UNLOCK(txr);
1349		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1350			taskqueue_enqueue(que->tq, &que->que_task);
1351			return;
1352		}
1353	}
1354
1355#ifdef DEVICE_POLLING
1356	if (ifp->if_capenable & IFCAP_POLLING)
1357		return;
1358#endif
1359	/* Reenable this interrupt */
1360	if (que->eims)
1361		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1362	else
1363		igb_enable_intr(adapter);
1364}
1365
1366/* Deal with link in a sleepable context */
1367static void
1368igb_handle_link(void *context, int pending)
1369{
1370	struct adapter *adapter = context;
1371
1372	adapter->hw.mac.get_link_status = 1;
1373	igb_update_link_status(adapter);
1374}
1375
1376/*********************************************************************
1377 *
1378 *  MSI/Legacy Deferred
1379 *  Interrupt Service routine
1380 *
1381 *********************************************************************/
1382static int
1383igb_irq_fast(void *arg)
1384{
1385	struct adapter		*adapter = arg;
1386	struct igb_queue	*que = adapter->queues;
1387	u32			reg_icr;
1388
1389
1390	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1391
1392	/* Hot eject?  */
1393	if (reg_icr == 0xffffffff)
1394		return FILTER_STRAY;
1395
1396	/* Definitely not our interrupt.  */
1397	if (reg_icr == 0x0)
1398		return FILTER_STRAY;
1399
1400	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1401		return FILTER_STRAY;
1402
1403	/*
1404	 * Mask interrupts until the taskqueue is finished running.  This is
1405	 * cheap, just assume that it is needed.  This also works around the
1406	 * MSI message reordering errata on certain systems.
1407	 */
1408	igb_disable_intr(adapter);
1409	taskqueue_enqueue(que->tq, &que->que_task);
1410
1411	/* Link status change */
1412	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1413		taskqueue_enqueue(que->tq, &adapter->link_task);
1414
1415	if (reg_icr & E1000_ICR_RXO)
1416		adapter->rx_overruns++;
1417	return FILTER_HANDLED;
1418}
1419
1420#ifdef DEVICE_POLLING
1421/*********************************************************************
1422 *
1423 *  Legacy polling routine : if using this code you MUST be sure that
1424 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1425 *
1426 *********************************************************************/
1427#if __FreeBSD_version >= 800000
1428#define POLL_RETURN_COUNT(a) (a)
1429static int
1430#else
1431#define POLL_RETURN_COUNT(a)
1432static void
1433#endif
1434igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1435{
1436	struct adapter		*adapter = ifp->if_softc;
1437	struct igb_queue	*que = adapter->queues;
1438	struct tx_ring		*txr = adapter->tx_rings;
1439	u32			reg_icr, rx_done = 0;
1440	u32			loop = IGB_MAX_LOOP;
1441	bool			more;
1442
1443	IGB_CORE_LOCK(adapter);
1444	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1445		IGB_CORE_UNLOCK(adapter);
1446		return POLL_RETURN_COUNT(rx_done);
1447	}
1448
1449	if (cmd == POLL_AND_CHECK_STATUS) {
1450		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1451		/* Link status change */
1452		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1453			igb_handle_link(adapter, 0);
1454
1455		if (reg_icr & E1000_ICR_RXO)
1456			adapter->rx_overruns++;
1457	}
1458	IGB_CORE_UNLOCK(adapter);
1459
1460	igb_rxeof(que, count, &rx_done);
1461
1462	IGB_TX_LOCK(txr);
1463	do {
1464		more = igb_txeof(txr);
1465	} while (loop-- && more);
1466#if __FreeBSD_version >= 800000
1467	if (!drbr_empty(ifp, txr->br))
1468		igb_mq_start_locked(ifp, txr, NULL);
1469#else
1470	igb_start_locked(txr, ifp);
1471#endif
1472	IGB_TX_UNLOCK(txr);
1473	return POLL_RETURN_COUNT(rx_done);
1474}
1475#endif /* DEVICE_POLLING */
1476
1477/*********************************************************************
1478 *
1479 *  MSIX TX Interrupt Service routine
1480 *
1481 **********************************************************************/
1482static void
1483igb_msix_que(void *arg)
1484{
1485	struct igb_queue *que = arg;
1486	struct adapter *adapter = que->adapter;
1487	struct tx_ring *txr = que->txr;
1488	struct rx_ring *rxr = que->rxr;
1489	u32		newitr = 0;
1490	bool		more_tx, more_rx;
1491
1492	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1493	++que->irqs;
1494
1495	IGB_TX_LOCK(txr);
1496	more_tx = igb_txeof(txr);
1497	IGB_TX_UNLOCK(txr);
1498
1499	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1500
1501	if (igb_enable_aim == FALSE)
1502		goto no_calc;
1503	/*
1504	** Do Adaptive Interrupt Moderation:
1505        **  - Write out last calculated setting
1506	**  - Calculate based on average size over
1507	**    the last interval.
1508	*/
1509        if (que->eitr_setting)
1510                E1000_WRITE_REG(&adapter->hw,
1511                    E1000_EITR(que->msix), que->eitr_setting);
1512
1513        que->eitr_setting = 0;
1514
1515        /* Idle, do nothing */
1516        if ((txr->bytes == 0) && (rxr->bytes == 0))
1517                goto no_calc;
1518
1519        /* Used half Default if sub-gig */
1520        if (adapter->link_speed != 1000)
1521                newitr = IGB_DEFAULT_ITR / 2;
1522        else {
1523		if ((txr->bytes) && (txr->packets))
1524                	newitr = txr->bytes/txr->packets;
1525		if ((rxr->bytes) && (rxr->packets))
1526			newitr = max(newitr,
1527			    (rxr->bytes / rxr->packets));
1528                newitr += 24; /* account for hardware frame, crc */
1529		/* set an upper boundary */
1530		newitr = min(newitr, 3000);
1531		/* Be nice to the mid range */
1532                if ((newitr > 300) && (newitr < 1200))
1533                        newitr = (newitr / 3);
1534                else
1535                        newitr = (newitr / 2);
1536        }
1537        newitr &= 0x7FFC;  /* Mask invalid bits */
1538        if (adapter->hw.mac.type == e1000_82575)
1539                newitr |= newitr << 16;
1540        else
1541                newitr |= E1000_EITR_CNT_IGNR;
1542
1543        /* save for next interrupt */
1544        que->eitr_setting = newitr;
1545
1546        /* Reset state */
1547        txr->bytes = 0;
1548        txr->packets = 0;
1549        rxr->bytes = 0;
1550        rxr->packets = 0;
1551
1552no_calc:
1553	/* Schedule a clean task if needed*/
1554	if (more_tx || more_rx ||
1555	    (adapter->ifp->if_drv_flags & IFF_DRV_OACTIVE))
1556		taskqueue_enqueue(que->tq, &que->que_task);
1557	else
1558		/* Reenable this interrupt */
1559		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1560	return;
1561}
1562
1563
1564/*********************************************************************
1565 *
1566 *  MSIX Link Interrupt Service routine
1567 *
1568 **********************************************************************/
1569
1570static void
1571igb_msix_link(void *arg)
1572{
1573	struct adapter	*adapter = arg;
1574	u32       	icr;
1575
1576	++adapter->link_irq;
1577	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1578	if (!(icr & E1000_ICR_LSC))
1579		goto spurious;
1580	igb_handle_link(adapter, 0);
1581
1582spurious:
1583	/* Rearm */
1584	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1585	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1586	return;
1587}
1588
1589
1590/*********************************************************************
1591 *
1592 *  Media Ioctl callback
1593 *
1594 *  This routine is called whenever the user queries the status of
1595 *  the interface using ifconfig.
1596 *
1597 **********************************************************************/
1598static void
1599igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1600{
1601	struct adapter *adapter = ifp->if_softc;
1602	u_char fiber_type = IFM_1000_SX;
1603
1604	INIT_DEBUGOUT("igb_media_status: begin");
1605
1606	IGB_CORE_LOCK(adapter);
1607	igb_update_link_status(adapter);
1608
1609	ifmr->ifm_status = IFM_AVALID;
1610	ifmr->ifm_active = IFM_ETHER;
1611
1612	if (!adapter->link_active) {
1613		IGB_CORE_UNLOCK(adapter);
1614		return;
1615	}
1616
1617	ifmr->ifm_status |= IFM_ACTIVE;
1618
1619	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1620	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1621		ifmr->ifm_active |= fiber_type | IFM_FDX;
1622	else {
1623		switch (adapter->link_speed) {
1624		case 10:
1625			ifmr->ifm_active |= IFM_10_T;
1626			break;
1627		case 100:
1628			ifmr->ifm_active |= IFM_100_TX;
1629			break;
1630		case 1000:
1631			ifmr->ifm_active |= IFM_1000_T;
1632			break;
1633		}
1634		if (adapter->link_duplex == FULL_DUPLEX)
1635			ifmr->ifm_active |= IFM_FDX;
1636		else
1637			ifmr->ifm_active |= IFM_HDX;
1638	}
1639	IGB_CORE_UNLOCK(adapter);
1640}
1641
1642/*********************************************************************
1643 *
1644 *  Media Ioctl callback
1645 *
1646 *  This routine is called when the user changes speed/duplex using
1647 *  media/mediopt option with ifconfig.
1648 *
1649 **********************************************************************/
1650static int
1651igb_media_change(struct ifnet *ifp)
1652{
1653	struct adapter *adapter = ifp->if_softc;
1654	struct ifmedia  *ifm = &adapter->media;
1655
1656	INIT_DEBUGOUT("igb_media_change: begin");
1657
1658	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1659		return (EINVAL);
1660
1661	IGB_CORE_LOCK(adapter);
1662	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1663	case IFM_AUTO:
1664		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1665		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1666		break;
1667	case IFM_1000_LX:
1668	case IFM_1000_SX:
1669	case IFM_1000_T:
1670		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1671		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1672		break;
1673	case IFM_100_TX:
1674		adapter->hw.mac.autoneg = FALSE;
1675		adapter->hw.phy.autoneg_advertised = 0;
1676		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1677			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1678		else
1679			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1680		break;
1681	case IFM_10_T:
1682		adapter->hw.mac.autoneg = FALSE;
1683		adapter->hw.phy.autoneg_advertised = 0;
1684		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1685			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1686		else
1687			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1688		break;
1689	default:
1690		device_printf(adapter->dev, "Unsupported media type\n");
1691	}
1692
1693	igb_init_locked(adapter);
1694	IGB_CORE_UNLOCK(adapter);
1695
1696	return (0);
1697}
1698
1699
1700/*********************************************************************
1701 *
1702 *  This routine maps the mbufs to Advanced TX descriptors.
1703 *  used by the 82575 adapter.
1704 *
1705 **********************************************************************/
1706
1707static int
1708igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1709{
1710	struct adapter		*adapter = txr->adapter;
1711	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1712	bus_dmamap_t		map;
1713	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1714	union e1000_adv_tx_desc	*txd = NULL;
1715	struct mbuf		*m_head;
1716	u32			olinfo_status = 0, cmd_type_len = 0;
1717	int			nsegs, i, j, error, first, last = 0;
1718	u32			hdrlen = 0;
1719
1720	m_head = *m_headp;
1721
1722
1723	/* Set basic descriptor constants */
1724	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1725	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1726	if (m_head->m_flags & M_VLANTAG)
1727		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1728
1729	/*
1730         * Map the packet for DMA.
1731	 *
1732	 * Capture the first descriptor index,
1733	 * this descriptor will have the index
1734	 * of the EOP which is the only one that
1735	 * now gets a DONE bit writeback.
1736	 */
1737	first = txr->next_avail_desc;
1738	tx_buffer = &txr->tx_buffers[first];
1739	tx_buffer_mapped = tx_buffer;
1740	map = tx_buffer->map;
1741
1742	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1743	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1744
1745	if (error == EFBIG) {
1746		struct mbuf *m;
1747
1748		m = m_defrag(*m_headp, M_DONTWAIT);
1749		if (m == NULL) {
1750			adapter->mbuf_defrag_failed++;
1751			m_freem(*m_headp);
1752			*m_headp = NULL;
1753			return (ENOBUFS);
1754		}
1755		*m_headp = m;
1756
1757		/* Try it again */
1758		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1759		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1760
1761		if (error == ENOMEM) {
1762			adapter->no_tx_dma_setup++;
1763			return (error);
1764		} else if (error != 0) {
1765			adapter->no_tx_dma_setup++;
1766			m_freem(*m_headp);
1767			*m_headp = NULL;
1768			return (error);
1769		}
1770	} else if (error == ENOMEM) {
1771		adapter->no_tx_dma_setup++;
1772		return (error);
1773	} else if (error != 0) {
1774		adapter->no_tx_dma_setup++;
1775		m_freem(*m_headp);
1776		*m_headp = NULL;
1777		return (error);
1778	}
1779
1780	/* Check again to be sure we have enough descriptors */
1781        if (nsegs > (txr->tx_avail - 2)) {
1782                txr->no_desc_avail++;
1783		bus_dmamap_unload(txr->txtag, map);
1784		return (ENOBUFS);
1785        }
1786	m_head = *m_headp;
1787
1788        /*
1789         * Set up the context descriptor:
1790         * used when any hardware offload is done.
1791	 * This includes CSUM, VLAN, and TSO. It
1792	 * will use the first descriptor.
1793         */
1794        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1795		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1796			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1797			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1798			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1799		} else
1800			return (ENXIO);
1801	} else if (igb_tx_ctx_setup(txr, m_head))
1802		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1803
1804	/* Calculate payload length */
1805	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1806	    << E1000_ADVTXD_PAYLEN_SHIFT);
1807
1808	/* 82575 needs the queue index added */
1809	if (adapter->hw.mac.type == e1000_82575)
1810		olinfo_status |= txr->me << 4;
1811
1812	/* Set up our transmit descriptors */
1813	i = txr->next_avail_desc;
1814	for (j = 0; j < nsegs; j++) {
1815		bus_size_t seg_len;
1816		bus_addr_t seg_addr;
1817
1818		tx_buffer = &txr->tx_buffers[i];
1819		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1820		seg_addr = segs[j].ds_addr;
1821		seg_len  = segs[j].ds_len;
1822
1823		txd->read.buffer_addr = htole64(seg_addr);
1824		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1825		txd->read.olinfo_status = htole32(olinfo_status);
1826		last = i;
1827		if (++i == adapter->num_tx_desc)
1828			i = 0;
1829		tx_buffer->m_head = NULL;
1830		tx_buffer->next_eop = -1;
1831	}
1832
1833	txr->next_avail_desc = i;
1834	txr->tx_avail -= nsegs;
1835
1836        tx_buffer->m_head = m_head;
1837	tx_buffer_mapped->map = tx_buffer->map;
1838	tx_buffer->map = map;
1839        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1840
1841        /*
1842         * Last Descriptor of Packet
1843	 * needs End Of Packet (EOP)
1844	 * and Report Status (RS)
1845         */
1846        txd->read.cmd_type_len |=
1847	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1848	/*
1849	 * Keep track in the first buffer which
1850	 * descriptor will be written back
1851	 */
1852	tx_buffer = &txr->tx_buffers[first];
1853	tx_buffer->next_eop = last;
1854	txr->watchdog_time = ticks;
1855
1856	/*
1857	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1858	 * that this frame is available to transmit.
1859	 */
1860	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1861	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1862	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1863	++txr->tx_packets;
1864
1865	return (0);
1866
1867}
1868
1869static void
1870igb_set_promisc(struct adapter *adapter)
1871{
1872	struct ifnet	*ifp = adapter->ifp;
1873	struct e1000_hw *hw = &adapter->hw;
1874	u32		reg;
1875
1876	if (adapter->vf_ifp) {
1877		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1878		return;
1879	}
1880
1881	reg = E1000_READ_REG(hw, E1000_RCTL);
1882	if (ifp->if_flags & IFF_PROMISC) {
1883		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1884		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1885	} else if (ifp->if_flags & IFF_ALLMULTI) {
1886		reg |= E1000_RCTL_MPE;
1887		reg &= ~E1000_RCTL_UPE;
1888		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1889	}
1890}
1891
1892static void
1893igb_disable_promisc(struct adapter *adapter)
1894{
1895	struct e1000_hw *hw = &adapter->hw;
1896	u32		reg;
1897
1898	if (adapter->vf_ifp) {
1899		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1900		return;
1901	}
1902	reg = E1000_READ_REG(hw, E1000_RCTL);
1903	reg &=  (~E1000_RCTL_UPE);
1904	reg &=  (~E1000_RCTL_MPE);
1905	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1906}
1907
1908
1909/*********************************************************************
1910 *  Multicast Update
1911 *
1912 *  This routine is called whenever multicast address list is updated.
1913 *
1914 **********************************************************************/
1915
1916static void
1917igb_set_multi(struct adapter *adapter)
1918{
1919	struct ifnet	*ifp = adapter->ifp;
1920	struct ifmultiaddr *ifma;
1921	u32 reg_rctl = 0;
1922	u8  *mta;
1923
1924	int mcnt = 0;
1925
1926	IOCTL_DEBUGOUT("igb_set_multi: begin");
1927
1928	mta = adapter->mta;
1929	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
1930	    MAX_NUM_MULTICAST_ADDRESSES);
1931
1932#if __FreeBSD_version < 800000
1933	IF_ADDR_LOCK(ifp);
1934#else
1935	if_maddr_rlock(ifp);
1936#endif
1937	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1938		if (ifma->ifma_addr->sa_family != AF_LINK)
1939			continue;
1940
1941		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1942			break;
1943
1944		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1945		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1946		mcnt++;
1947	}
1948#if __FreeBSD_version < 800000
1949	IF_ADDR_UNLOCK(ifp);
1950#else
1951	if_maddr_runlock(ifp);
1952#endif
1953
1954	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1955		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1956		reg_rctl |= E1000_RCTL_MPE;
1957		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1958	} else
1959		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1960}
1961
1962
1963/*********************************************************************
1964 *  Timer routine:
1965 *  	This routine checks for link status,
1966 *	updates statistics, and does the watchdog.
1967 *
1968 **********************************************************************/
1969
1970static void
1971igb_local_timer(void *arg)
1972{
1973	struct adapter		*adapter = arg;
1974	device_t		dev = adapter->dev;
1975	struct tx_ring		*txr = adapter->tx_rings;
1976
1977
1978	IGB_CORE_LOCK_ASSERT(adapter);
1979
1980	igb_update_link_status(adapter);
1981	igb_update_stats_counters(adapter);
1982
1983	/*
1984	** If flow control has paused us since last checking
1985	** it invalidates the watchdog timing, so dont run it.
1986	*/
1987	if (adapter->pause_frames) {
1988		adapter->pause_frames = 0;
1989		goto out;
1990	}
1991
1992        /*
1993        ** Watchdog: check for time since any descriptor was cleaned
1994        */
1995	for (int i = 0; i < adapter->num_queues; i++, txr++)
1996		if (txr->queue_status == IGB_QUEUE_HUNG)
1997			goto timeout;
1998out:
1999	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2000#ifndef DEVICE_POLLING
2001	/* Schedule all queue interrupts - deadlock protection */
2002	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2003#endif
2004	return;
2005
2006timeout:
2007	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2008	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2009            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2010            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2011	device_printf(dev,"TX(%d) desc avail = %d,"
2012            "Next TX to Clean = %d\n",
2013            txr->me, txr->tx_avail, txr->next_to_clean);
2014	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2015	adapter->watchdog_events++;
2016	igb_init_locked(adapter);
2017}
2018
2019static void
2020igb_update_link_status(struct adapter *adapter)
2021{
2022	struct e1000_hw *hw = &adapter->hw;
2023	struct ifnet *ifp = adapter->ifp;
2024	device_t dev = adapter->dev;
2025	struct tx_ring *txr = adapter->tx_rings;
2026	u32 link_check, thstat, ctrl;
2027
2028	link_check = thstat = ctrl = 0;
2029
2030	/* Get the cached link value or read for real */
2031        switch (hw->phy.media_type) {
2032        case e1000_media_type_copper:
2033                if (hw->mac.get_link_status) {
2034			/* Do the work to read phy */
2035                        e1000_check_for_link(hw);
2036                        link_check = !hw->mac.get_link_status;
2037                } else
2038                        link_check = TRUE;
2039                break;
2040        case e1000_media_type_fiber:
2041                e1000_check_for_link(hw);
2042                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2043                                 E1000_STATUS_LU);
2044                break;
2045        case e1000_media_type_internal_serdes:
2046                e1000_check_for_link(hw);
2047                link_check = adapter->hw.mac.serdes_has_link;
2048                break;
2049	/* VF device is type_unknown */
2050        case e1000_media_type_unknown:
2051                e1000_check_for_link(hw);
2052		link_check = !hw->mac.get_link_status;
2053		/* Fall thru */
2054        default:
2055                break;
2056        }
2057
2058	/* Check for thermal downshift or shutdown */
2059	if (hw->mac.type == e1000_i350) {
2060		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2061		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2062	}
2063
2064	/* Now we check if a transition has happened */
2065	if (link_check && (adapter->link_active == 0)) {
2066		e1000_get_speed_and_duplex(&adapter->hw,
2067		    &adapter->link_speed, &adapter->link_duplex);
2068		if (bootverbose)
2069			device_printf(dev, "Link is up %d Mbps %s\n",
2070			    adapter->link_speed,
2071			    ((adapter->link_duplex == FULL_DUPLEX) ?
2072			    "Full Duplex" : "Half Duplex"));
2073		adapter->link_active = 1;
2074		ifp->if_baudrate = adapter->link_speed * 1000000;
2075		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2076		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2077			device_printf(dev, "Link: thermal downshift\n");
2078		/* This can sleep */
2079		if_link_state_change(ifp, LINK_STATE_UP);
2080	} else if (!link_check && (adapter->link_active == 1)) {
2081		ifp->if_baudrate = adapter->link_speed = 0;
2082		adapter->link_duplex = 0;
2083		if (bootverbose)
2084			device_printf(dev, "Link is Down\n");
2085		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2086		    (thstat & E1000_THSTAT_PWR_DOWN))
2087			device_printf(dev, "Link: thermal shutdown\n");
2088		adapter->link_active = 0;
2089		/* This can sleep */
2090		if_link_state_change(ifp, LINK_STATE_DOWN);
2091		/* Turn off watchdogs */
2092		for (int i = 0; i < adapter->num_queues; i++, txr++)
2093			txr->queue_status = IGB_QUEUE_IDLE;
2094	}
2095}
2096
2097/*********************************************************************
2098 *
2099 *  This routine disables all traffic on the adapter by issuing a
2100 *  global reset on the MAC and deallocates TX/RX buffers.
2101 *
2102 **********************************************************************/
2103
2104static void
2105igb_stop(void *arg)
2106{
2107	struct adapter	*adapter = arg;
2108	struct ifnet	*ifp = adapter->ifp;
2109	struct tx_ring *txr = adapter->tx_rings;
2110
2111	IGB_CORE_LOCK_ASSERT(adapter);
2112
2113	INIT_DEBUGOUT("igb_stop: begin");
2114
2115	igb_disable_intr(adapter);
2116
2117	callout_stop(&adapter->timer);
2118
2119	/* Tell the stack that the interface is no longer active */
2120	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2121
2122	/* Unarm watchdog timer. */
2123	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2124		IGB_TX_LOCK(txr);
2125		txr->queue_status = IGB_QUEUE_IDLE;
2126		IGB_TX_UNLOCK(txr);
2127	}
2128
2129	e1000_reset_hw(&adapter->hw);
2130	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2131
2132	e1000_led_off(&adapter->hw);
2133	e1000_cleanup_led(&adapter->hw);
2134}
2135
2136
2137/*********************************************************************
2138 *
2139 *  Determine hardware revision.
2140 *
2141 **********************************************************************/
2142static void
2143igb_identify_hardware(struct adapter *adapter)
2144{
2145	device_t dev = adapter->dev;
2146
2147	/* Make sure our PCI config space has the necessary stuff set */
2148	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2149	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2150	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2151		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2152		    "bits were not set!\n");
2153		adapter->hw.bus.pci_cmd_word |=
2154		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2155		pci_write_config(dev, PCIR_COMMAND,
2156		    adapter->hw.bus.pci_cmd_word, 2);
2157	}
2158
2159	/* Save off the information about this board */
2160	adapter->hw.vendor_id = pci_get_vendor(dev);
2161	adapter->hw.device_id = pci_get_device(dev);
2162	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2163	adapter->hw.subsystem_vendor_id =
2164	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2165	adapter->hw.subsystem_device_id =
2166	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2167
2168	/* Set MAC type early for PCI setup */
2169	e1000_set_mac_type(&adapter->hw);
2170
2171	/* Are we a VF device? */
2172	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2173	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2174		adapter->vf_ifp = 1;
2175	else
2176		adapter->vf_ifp = 0;
2177}
2178
2179static int
2180igb_allocate_pci_resources(struct adapter *adapter)
2181{
2182	device_t	dev = adapter->dev;
2183	int		rid;
2184
2185	rid = PCIR_BAR(0);
2186	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2187	    &rid, RF_ACTIVE);
2188	if (adapter->pci_mem == NULL) {
2189		device_printf(dev, "Unable to allocate bus resource: memory\n");
2190		return (ENXIO);
2191	}
2192	adapter->osdep.mem_bus_space_tag =
2193	    rman_get_bustag(adapter->pci_mem);
2194	adapter->osdep.mem_bus_space_handle =
2195	    rman_get_bushandle(adapter->pci_mem);
2196	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2197
2198	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2199
2200	/* This will setup either MSI/X or MSI */
2201	adapter->msix = igb_setup_msix(adapter);
2202	adapter->hw.back = &adapter->osdep;
2203
2204	return (0);
2205}
2206
2207/*********************************************************************
2208 *
2209 *  Setup the Legacy or MSI Interrupt handler
2210 *
2211 **********************************************************************/
2212static int
2213igb_allocate_legacy(struct adapter *adapter)
2214{
2215	device_t		dev = adapter->dev;
2216	struct igb_queue	*que = adapter->queues;
2217	struct tx_ring		*txr = adapter->tx_rings;
2218	int			error, rid = 0;
2219
2220	/* Turn off all interrupts */
2221	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2222
2223	/* MSI RID is 1 */
2224	if (adapter->msix == 1)
2225		rid = 1;
2226
2227	/* We allocate a single interrupt resource */
2228	adapter->res = bus_alloc_resource_any(dev,
2229	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2230	if (adapter->res == NULL) {
2231		device_printf(dev, "Unable to allocate bus resource: "
2232		    "interrupt\n");
2233		return (ENXIO);
2234	}
2235
2236#if __FreeBSD_version >= 800000
2237	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2238#endif
2239
2240	/*
2241	 * Try allocating a fast interrupt and the associated deferred
2242	 * processing contexts.
2243	 */
2244	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2245	/* Make tasklet for deferred link handling */
2246	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2247	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2248	    taskqueue_thread_enqueue, &que->tq);
2249	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2250	    device_get_nameunit(adapter->dev));
2251	if ((error = bus_setup_intr(dev, adapter->res,
2252	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2253	    adapter, &adapter->tag)) != 0) {
2254		device_printf(dev, "Failed to register fast interrupt "
2255			    "handler: %d\n", error);
2256		taskqueue_free(que->tq);
2257		que->tq = NULL;
2258		return (error);
2259	}
2260
2261	return (0);
2262}
2263
2264
2265/*********************************************************************
2266 *
2267 *  Setup the MSIX Queue Interrupt handlers:
2268 *
2269 **********************************************************************/
2270static int
2271igb_allocate_msix(struct adapter *adapter)
2272{
2273	device_t		dev = adapter->dev;
2274	struct igb_queue	*que = adapter->queues;
2275	int			error, rid, vector = 0;
2276
2277
2278	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2279		rid = vector +1;
2280		que->res = bus_alloc_resource_any(dev,
2281		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2282		if (que->res == NULL) {
2283			device_printf(dev,
2284			    "Unable to allocate bus resource: "
2285			    "MSIX Queue Interrupt\n");
2286			return (ENXIO);
2287		}
2288		error = bus_setup_intr(dev, que->res,
2289	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2290		    igb_msix_que, que, &que->tag);
2291		if (error) {
2292			que->res = NULL;
2293			device_printf(dev, "Failed to register Queue handler");
2294			return (error);
2295		}
2296#if __FreeBSD_version >= 800504
2297		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2298#endif
2299		que->msix = vector;
2300		if (adapter->hw.mac.type == e1000_82575)
2301			que->eims = E1000_EICR_TX_QUEUE0 << i;
2302		else
2303			que->eims = 1 << vector;
2304		/*
2305		** Bind the msix vector, and thus the
2306		** rings to the corresponding cpu.
2307		*/
2308		if (adapter->num_queues > 1)
2309			bus_bind_intr(dev, que->res, i);
2310#if __FreeBSD_version >= 800000
2311		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2312		    que->txr);
2313#endif
2314		/* Make tasklet for deferred handling */
2315		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2316		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2317		    taskqueue_thread_enqueue, &que->tq);
2318		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2319		    device_get_nameunit(adapter->dev));
2320	}
2321
2322	/* And Link */
2323	rid = vector + 1;
2324	adapter->res = bus_alloc_resource_any(dev,
2325	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2326	if (adapter->res == NULL) {
2327		device_printf(dev,
2328		    "Unable to allocate bus resource: "
2329		    "MSIX Link Interrupt\n");
2330		return (ENXIO);
2331	}
2332	if ((error = bus_setup_intr(dev, adapter->res,
2333	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2334	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2335		device_printf(dev, "Failed to register Link handler");
2336		return (error);
2337	}
2338#if __FreeBSD_version >= 800504
2339	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2340#endif
2341	adapter->linkvec = vector;
2342
2343	return (0);
2344}
2345
2346
2347static void
2348igb_configure_queues(struct adapter *adapter)
2349{
2350	struct	e1000_hw	*hw = &adapter->hw;
2351	struct	igb_queue	*que;
2352	u32			tmp, ivar = 0, newitr = 0;
2353
2354	/* First turn on RSS capability */
2355	if (adapter->hw.mac.type != e1000_82575)
2356		E1000_WRITE_REG(hw, E1000_GPIE,
2357		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2358		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2359
2360	/* Turn on MSIX */
2361	switch (adapter->hw.mac.type) {
2362	case e1000_82580:
2363	case e1000_i350:
2364	case e1000_vfadapt:
2365	case e1000_vfadapt_i350:
2366		/* RX entries */
2367		for (int i = 0; i < adapter->num_queues; i++) {
2368			u32 index = i >> 1;
2369			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2370			que = &adapter->queues[i];
2371			if (i & 1) {
2372				ivar &= 0xFF00FFFF;
2373				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2374			} else {
2375				ivar &= 0xFFFFFF00;
2376				ivar |= que->msix | E1000_IVAR_VALID;
2377			}
2378			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2379		}
2380		/* TX entries */
2381		for (int i = 0; i < adapter->num_queues; i++) {
2382			u32 index = i >> 1;
2383			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2384			que = &adapter->queues[i];
2385			if (i & 1) {
2386				ivar &= 0x00FFFFFF;
2387				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2388			} else {
2389				ivar &= 0xFFFF00FF;
2390				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2391			}
2392			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2393			adapter->que_mask |= que->eims;
2394		}
2395
2396		/* And for the link interrupt */
2397		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2398		adapter->link_mask = 1 << adapter->linkvec;
2399		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2400		break;
2401	case e1000_82576:
2402		/* RX entries */
2403		for (int i = 0; i < adapter->num_queues; i++) {
2404			u32 index = i & 0x7; /* Each IVAR has two entries */
2405			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2406			que = &adapter->queues[i];
2407			if (i < 8) {
2408				ivar &= 0xFFFFFF00;
2409				ivar |= que->msix | E1000_IVAR_VALID;
2410			} else {
2411				ivar &= 0xFF00FFFF;
2412				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2413			}
2414			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2415			adapter->que_mask |= que->eims;
2416		}
2417		/* TX entries */
2418		for (int i = 0; i < adapter->num_queues; i++) {
2419			u32 index = i & 0x7; /* Each IVAR has two entries */
2420			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2421			que = &adapter->queues[i];
2422			if (i < 8) {
2423				ivar &= 0xFFFF00FF;
2424				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2425			} else {
2426				ivar &= 0x00FFFFFF;
2427				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2428			}
2429			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2430			adapter->que_mask |= que->eims;
2431		}
2432
2433		/* And for the link interrupt */
2434		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2435		adapter->link_mask = 1 << adapter->linkvec;
2436		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2437		break;
2438
2439	case e1000_82575:
2440                /* enable MSI-X support*/
2441		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2442                tmp |= E1000_CTRL_EXT_PBA_CLR;
2443                /* Auto-Mask interrupts upon ICR read. */
2444                tmp |= E1000_CTRL_EXT_EIAME;
2445                tmp |= E1000_CTRL_EXT_IRCA;
2446                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2447
2448		/* Queues */
2449		for (int i = 0; i < adapter->num_queues; i++) {
2450			que = &adapter->queues[i];
2451			tmp = E1000_EICR_RX_QUEUE0 << i;
2452			tmp |= E1000_EICR_TX_QUEUE0 << i;
2453			que->eims = tmp;
2454			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2455			    i, que->eims);
2456			adapter->que_mask |= que->eims;
2457		}
2458
2459		/* Link */
2460		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2461		    E1000_EIMS_OTHER);
2462		adapter->link_mask |= E1000_EIMS_OTHER;
2463	default:
2464		break;
2465	}
2466
2467	/* Set the starting interrupt rate */
2468	if (igb_max_interrupt_rate > 0)
2469		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2470
2471        if (hw->mac.type == e1000_82575)
2472                newitr |= newitr << 16;
2473        else
2474                newitr |= E1000_EITR_CNT_IGNR;
2475
2476	for (int i = 0; i < adapter->num_queues; i++) {
2477		que = &adapter->queues[i];
2478		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2479	}
2480
2481	return;
2482}
2483
2484
2485static void
2486igb_free_pci_resources(struct adapter *adapter)
2487{
2488	struct		igb_queue *que = adapter->queues;
2489	device_t	dev = adapter->dev;
2490	int		rid;
2491
2492	/*
2493	** There is a slight possibility of a failure mode
2494	** in attach that will result in entering this function
2495	** before interrupt resources have been initialized, and
2496	** in that case we do not want to execute the loops below
2497	** We can detect this reliably by the state of the adapter
2498	** res pointer.
2499	*/
2500	if (adapter->res == NULL)
2501		goto mem;
2502
2503	/*
2504	 * First release all the interrupt resources:
2505	 */
2506	for (int i = 0; i < adapter->num_queues; i++, que++) {
2507		rid = que->msix + 1;
2508		if (que->tag != NULL) {
2509			bus_teardown_intr(dev, que->res, que->tag);
2510			que->tag = NULL;
2511		}
2512		if (que->res != NULL)
2513			bus_release_resource(dev,
2514			    SYS_RES_IRQ, rid, que->res);
2515	}
2516
2517	/* Clean the Legacy or Link interrupt last */
2518	if (adapter->linkvec) /* we are doing MSIX */
2519		rid = adapter->linkvec + 1;
2520	else
2521		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2522
2523	que = adapter->queues;
2524	if (adapter->tag != NULL) {
2525		taskqueue_drain(que->tq, &adapter->link_task);
2526		bus_teardown_intr(dev, adapter->res, adapter->tag);
2527		adapter->tag = NULL;
2528	}
2529	if (adapter->res != NULL)
2530		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2531
2532	for (int i = 0; i < adapter->num_queues; i++, que++) {
2533		if (que->tq != NULL) {
2534#if __FreeBSD_version >= 800000
2535			taskqueue_drain(que->tq, &que->txr->txq_task);
2536#endif
2537			taskqueue_drain(que->tq, &que->que_task);
2538			taskqueue_free(que->tq);
2539		}
2540	}
2541mem:
2542	if (adapter->msix)
2543		pci_release_msi(dev);
2544
2545	if (adapter->msix_mem != NULL)
2546		bus_release_resource(dev, SYS_RES_MEMORY,
2547		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2548
2549	if (adapter->pci_mem != NULL)
2550		bus_release_resource(dev, SYS_RES_MEMORY,
2551		    PCIR_BAR(0), adapter->pci_mem);
2552
2553}
2554
2555/*
2556 * Setup Either MSI/X or MSI
2557 */
2558static int
2559igb_setup_msix(struct adapter *adapter)
2560{
2561	device_t dev = adapter->dev;
2562	int rid, want, queues, msgs;
2563
2564	/* tuneable override */
2565	if (igb_enable_msix == 0)
2566		goto msi;
2567
2568	/* First try MSI/X */
2569	rid = PCIR_BAR(IGB_MSIX_BAR);
2570	adapter->msix_mem = bus_alloc_resource_any(dev,
2571	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2572       	if (!adapter->msix_mem) {
2573		/* May not be enabled */
2574		device_printf(adapter->dev,
2575		    "Unable to map MSIX table \n");
2576		goto msi;
2577	}
2578
2579	msgs = pci_msix_count(dev);
2580	if (msgs == 0) { /* system has msix disabled */
2581		bus_release_resource(dev, SYS_RES_MEMORY,
2582		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2583		adapter->msix_mem = NULL;
2584		goto msi;
2585	}
2586
2587	/* Figure out a reasonable auto config value */
2588	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2589
2590	/* Manual override */
2591	if (igb_num_queues != 0)
2592		queues = igb_num_queues;
2593	if (queues > 8)  /* max queues */
2594		queues = 8;
2595
2596	/* Can have max of 4 queues on 82575 */
2597	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2598		queues = 4;
2599
2600	/* Limit the VF devices to one queue */
2601	if (adapter->vf_ifp)
2602		queues = 1;
2603
2604	/*
2605	** One vector (RX/TX pair) per queue
2606	** plus an additional for Link interrupt
2607	*/
2608	want = queues + 1;
2609	if (msgs >= want)
2610		msgs = want;
2611	else {
2612               	device_printf(adapter->dev,
2613		    "MSIX Configuration Problem, "
2614		    "%d vectors configured, but %d queues wanted!\n",
2615		    msgs, want);
2616		return (ENXIO);
2617	}
2618	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2619               	device_printf(adapter->dev,
2620		    "Using MSIX interrupts with %d vectors\n", msgs);
2621		adapter->num_queues = queues;
2622		return (msgs);
2623	}
2624msi:
2625       	msgs = pci_msi_count(dev);
2626       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2627               	device_printf(adapter->dev,"Using MSI interrupt\n");
2628	return (msgs);
2629}
2630
2631/*********************************************************************
2632 *
2633 *  Set up an fresh starting state
2634 *
2635 **********************************************************************/
2636static void
2637igb_reset(struct adapter *adapter)
2638{
2639	device_t	dev = adapter->dev;
2640	struct e1000_hw *hw = &adapter->hw;
2641	struct e1000_fc_info *fc = &hw->fc;
2642	struct ifnet	*ifp = adapter->ifp;
2643	u32		pba = 0;
2644	u16		hwm;
2645
2646	INIT_DEBUGOUT("igb_reset: begin");
2647
2648	/* Let the firmware know the OS is in control */
2649	igb_get_hw_control(adapter);
2650
2651	/*
2652	 * Packet Buffer Allocation (PBA)
2653	 * Writing PBA sets the receive portion of the buffer
2654	 * the remainder is used for the transmit buffer.
2655	 */
2656	switch (hw->mac.type) {
2657	case e1000_82575:
2658		pba = E1000_PBA_32K;
2659		break;
2660	case e1000_82576:
2661	case e1000_vfadapt:
2662		pba = E1000_READ_REG(hw, E1000_RXPBS);
2663		pba &= E1000_RXPBS_SIZE_MASK_82576;
2664		break;
2665	case e1000_82580:
2666	case e1000_i350:
2667	case e1000_vfadapt_i350:
2668		pba = E1000_READ_REG(hw, E1000_RXPBS);
2669		pba = e1000_rxpbs_adjust_82580(pba);
2670		break;
2671		pba = E1000_PBA_35K;
2672	default:
2673		break;
2674	}
2675
2676	/* Special needs in case of Jumbo frames */
2677	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2678		u32 tx_space, min_tx, min_rx;
2679		pba = E1000_READ_REG(hw, E1000_PBA);
2680		tx_space = pba >> 16;
2681		pba &= 0xffff;
2682		min_tx = (adapter->max_frame_size +
2683		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2684		min_tx = roundup2(min_tx, 1024);
2685		min_tx >>= 10;
2686                min_rx = adapter->max_frame_size;
2687                min_rx = roundup2(min_rx, 1024);
2688                min_rx >>= 10;
2689		if (tx_space < min_tx &&
2690		    ((min_tx - tx_space) < pba)) {
2691			pba = pba - (min_tx - tx_space);
2692			/*
2693                         * if short on rx space, rx wins
2694                         * and must trump tx adjustment
2695			 */
2696                        if (pba < min_rx)
2697                                pba = min_rx;
2698		}
2699		E1000_WRITE_REG(hw, E1000_PBA, pba);
2700	}
2701
2702	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2703
2704	/*
2705	 * These parameters control the automatic generation (Tx) and
2706	 * response (Rx) to Ethernet PAUSE frames.
2707	 * - High water mark should allow for at least two frames to be
2708	 *   received after sending an XOFF.
2709	 * - Low water mark works best when it is very near the high water mark.
2710	 *   This allows the receiver to restart by sending XON when it has
2711	 *   drained a bit.
2712	 */
2713	hwm = min(((pba << 10) * 9 / 10),
2714	    ((pba << 10) - 2 * adapter->max_frame_size));
2715
2716	if (hw->mac.type < e1000_82576) {
2717		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2718		fc->low_water = fc->high_water - 8;
2719	} else {
2720		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2721		fc->low_water = fc->high_water - 16;
2722	}
2723
2724	fc->pause_time = IGB_FC_PAUSE_TIME;
2725	fc->send_xon = TRUE;
2726	if (fc->requested_mode)
2727		fc->current_mode = fc->requested_mode;
2728	else
2729		fc->current_mode = e1000_fc_full;
2730
2731	adapter->fc = fc->current_mode;
2732
2733	/* Issue a global reset */
2734	e1000_reset_hw(hw);
2735	E1000_WRITE_REG(hw, E1000_WUC, 0);
2736
2737	if (e1000_init_hw(hw) < 0)
2738		device_printf(dev, "Hardware Initialization Failed\n");
2739
2740	/* Setup DMA Coalescing */
2741	if (hw->mac.type == e1000_i350) {
2742		u32 reg = ~E1000_DMACR_DMAC_EN;
2743
2744		if (adapter->dmac == 0) { /* Disabling it */
2745			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2746			goto reset_out;
2747		}
2748
2749		hwm = (pba - 4) << 10;
2750		reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2751		    & E1000_DMACR_DMACTHR_MASK);
2752
2753		/* transition to L0x or L1 if available..*/
2754		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2755
2756		/* timer = value in adapter->dmac in 32usec intervals */
2757		reg |= (adapter->dmac >> 5);
2758		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2759
2760		/* No lower threshold */
2761		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2762
2763		/* set hwm to PBA -  2 * max frame size */
2764		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2765
2766		/* Set the interval before transition */
2767		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2768		reg |= 0x800000FF; /* 255 usec */
2769		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2770
2771		/* free space in tx packet buffer to wake from DMA coal */
2772		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2773		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2774
2775		/* make low power state decision controlled by DMA coal */
2776		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2777		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2778		    reg | E1000_PCIEMISC_LX_DECISION);
2779		device_printf(dev, "DMA Coalescing enabled\n");
2780	}
2781
2782reset_out:
2783	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2784	e1000_get_phy_info(hw);
2785	e1000_check_for_link(hw);
2786	return;
2787}
2788
2789/*********************************************************************
2790 *
2791 *  Setup networking device structure and register an interface.
2792 *
2793 **********************************************************************/
2794static int
2795igb_setup_interface(device_t dev, struct adapter *adapter)
2796{
2797	struct ifnet   *ifp;
2798
2799	INIT_DEBUGOUT("igb_setup_interface: begin");
2800
2801	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2802	if (ifp == NULL) {
2803		device_printf(dev, "can not allocate ifnet structure\n");
2804		return (-1);
2805	}
2806	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2807	ifp->if_mtu = ETHERMTU;
2808	ifp->if_init =  igb_init;
2809	ifp->if_softc = adapter;
2810	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2811	ifp->if_ioctl = igb_ioctl;
2812#if __FreeBSD_version >= 800000
2813	ifp->if_transmit = igb_mq_start;
2814	ifp->if_qflush = igb_qflush;
2815#else
2816	ifp->if_start = igb_start;
2817#endif
2818	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2819	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2820	IFQ_SET_READY(&ifp->if_snd);
2821
2822	ether_ifattach(ifp, adapter->hw.mac.addr);
2823
2824	ifp->if_capabilities = ifp->if_capenable = 0;
2825
2826	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2827	ifp->if_capabilities |= IFCAP_TSO4;
2828	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2829	ifp->if_capenable = ifp->if_capabilities;
2830
2831	/* Don't enable LRO by default */
2832	ifp->if_capabilities |= IFCAP_LRO;
2833
2834#ifdef DEVICE_POLLING
2835	ifp->if_capabilities |= IFCAP_POLLING;
2836#endif
2837
2838	/*
2839	 * Tell the upper layer(s) we
2840	 * support full VLAN capability.
2841	 */
2842	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2843	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2844			     |  IFCAP_VLAN_HWTSO
2845			     |  IFCAP_VLAN_MTU;
2846	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
2847			  |  IFCAP_VLAN_HWTSO
2848			  |  IFCAP_VLAN_MTU;
2849
2850	/*
2851	** Don't turn this on by default, if vlans are
2852	** created on another pseudo device (eg. lagg)
2853	** then vlan events are not passed thru, breaking
2854	** operation, but with HW FILTER off it works. If
2855	** using vlans directly on the igb driver you can
2856	** enable this and get full hardware tag filtering.
2857	*/
2858	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2859
2860	/*
2861	 * Specify the media types supported by this adapter and register
2862	 * callbacks to update media and link information
2863	 */
2864	ifmedia_init(&adapter->media, IFM_IMASK,
2865	    igb_media_change, igb_media_status);
2866	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2867	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2868		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2869			    0, NULL);
2870		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2871	} else {
2872		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2873		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2874			    0, NULL);
2875		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2876			    0, NULL);
2877		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2878			    0, NULL);
2879		if (adapter->hw.phy.type != e1000_phy_ife) {
2880			ifmedia_add(&adapter->media,
2881				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2882			ifmedia_add(&adapter->media,
2883				IFM_ETHER | IFM_1000_T, 0, NULL);
2884		}
2885	}
2886	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2887	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2888	return (0);
2889}
2890
2891
2892/*
2893 * Manage DMA'able memory.
2894 */
2895static void
2896igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2897{
2898	if (error)
2899		return;
2900	*(bus_addr_t *) arg = segs[0].ds_addr;
2901}
2902
2903static int
2904igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2905        struct igb_dma_alloc *dma, int mapflags)
2906{
2907	int error;
2908
2909	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2910				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2911				BUS_SPACE_MAXADDR,	/* lowaddr */
2912				BUS_SPACE_MAXADDR,	/* highaddr */
2913				NULL, NULL,		/* filter, filterarg */
2914				size,			/* maxsize */
2915				1,			/* nsegments */
2916				size,			/* maxsegsize */
2917				0,			/* flags */
2918				NULL,			/* lockfunc */
2919				NULL,			/* lockarg */
2920				&dma->dma_tag);
2921	if (error) {
2922		device_printf(adapter->dev,
2923		    "%s: bus_dma_tag_create failed: %d\n",
2924		    __func__, error);
2925		goto fail_0;
2926	}
2927
2928	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2929	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2930	if (error) {
2931		device_printf(adapter->dev,
2932		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2933		    __func__, (uintmax_t)size, error);
2934		goto fail_2;
2935	}
2936
2937	dma->dma_paddr = 0;
2938	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2939	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2940	if (error || dma->dma_paddr == 0) {
2941		device_printf(adapter->dev,
2942		    "%s: bus_dmamap_load failed: %d\n",
2943		    __func__, error);
2944		goto fail_3;
2945	}
2946
2947	return (0);
2948
2949fail_3:
2950	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2951fail_2:
2952	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2953	bus_dma_tag_destroy(dma->dma_tag);
2954fail_0:
2955	dma->dma_map = NULL;
2956	dma->dma_tag = NULL;
2957
2958	return (error);
2959}
2960
2961static void
2962igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2963{
2964	if (dma->dma_tag == NULL)
2965		return;
2966	if (dma->dma_map != NULL) {
2967		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2968		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2969		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2970		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2971		dma->dma_map = NULL;
2972	}
2973	bus_dma_tag_destroy(dma->dma_tag);
2974	dma->dma_tag = NULL;
2975}
2976
2977
2978/*********************************************************************
2979 *
2980 *  Allocate memory for the transmit and receive rings, and then
2981 *  the descriptors associated with each, called only once at attach.
2982 *
2983 **********************************************************************/
2984static int
2985igb_allocate_queues(struct adapter *adapter)
2986{
2987	device_t dev = adapter->dev;
2988	struct igb_queue	*que = NULL;
2989	struct tx_ring		*txr = NULL;
2990	struct rx_ring		*rxr = NULL;
2991	int rsize, tsize, error = E1000_SUCCESS;
2992	int txconf = 0, rxconf = 0;
2993
2994	/* First allocate the top level queue structs */
2995	if (!(adapter->queues =
2996	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2997	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2998		device_printf(dev, "Unable to allocate queue memory\n");
2999		error = ENOMEM;
3000		goto fail;
3001	}
3002
3003	/* Next allocate the TX ring struct memory */
3004	if (!(adapter->tx_rings =
3005	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3006	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3007		device_printf(dev, "Unable to allocate TX ring memory\n");
3008		error = ENOMEM;
3009		goto tx_fail;
3010	}
3011
3012	/* Now allocate the RX */
3013	if (!(adapter->rx_rings =
3014	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3015	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3016		device_printf(dev, "Unable to allocate RX ring memory\n");
3017		error = ENOMEM;
3018		goto rx_fail;
3019	}
3020
3021	tsize = roundup2(adapter->num_tx_desc *
3022	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3023	/*
3024	 * Now set up the TX queues, txconf is needed to handle the
3025	 * possibility that things fail midcourse and we need to
3026	 * undo memory gracefully
3027	 */
3028	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3029		/* Set up some basics */
3030		txr = &adapter->tx_rings[i];
3031		txr->adapter = adapter;
3032		txr->me = i;
3033
3034		/* Initialize the TX lock */
3035		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3036		    device_get_nameunit(dev), txr->me);
3037		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3038
3039		if (igb_dma_malloc(adapter, tsize,
3040			&txr->txdma, BUS_DMA_NOWAIT)) {
3041			device_printf(dev,
3042			    "Unable to allocate TX Descriptor memory\n");
3043			error = ENOMEM;
3044			goto err_tx_desc;
3045		}
3046		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3047		bzero((void *)txr->tx_base, tsize);
3048
3049        	/* Now allocate transmit buffers for the ring */
3050        	if (igb_allocate_transmit_buffers(txr)) {
3051			device_printf(dev,
3052			    "Critical Failure setting up transmit buffers\n");
3053			error = ENOMEM;
3054			goto err_tx_desc;
3055        	}
3056#if __FreeBSD_version >= 800000
3057		/* Allocate a buf ring */
3058		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3059		    M_WAITOK, &txr->tx_mtx);
3060#endif
3061	}
3062
3063	/*
3064	 * Next the RX queues...
3065	 */
3066	rsize = roundup2(adapter->num_rx_desc *
3067	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3068	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3069		rxr = &adapter->rx_rings[i];
3070		rxr->adapter = adapter;
3071		rxr->me = i;
3072
3073		/* Initialize the RX lock */
3074		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3075		    device_get_nameunit(dev), txr->me);
3076		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3077
3078		if (igb_dma_malloc(adapter, rsize,
3079			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3080			device_printf(dev,
3081			    "Unable to allocate RxDescriptor memory\n");
3082			error = ENOMEM;
3083			goto err_rx_desc;
3084		}
3085		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3086		bzero((void *)rxr->rx_base, rsize);
3087
3088        	/* Allocate receive buffers for the ring*/
3089		if (igb_allocate_receive_buffers(rxr)) {
3090			device_printf(dev,
3091			    "Critical Failure setting up receive buffers\n");
3092			error = ENOMEM;
3093			goto err_rx_desc;
3094		}
3095	}
3096
3097	/*
3098	** Finally set up the queue holding structs
3099	*/
3100	for (int i = 0; i < adapter->num_queues; i++) {
3101		que = &adapter->queues[i];
3102		que->adapter = adapter;
3103		que->txr = &adapter->tx_rings[i];
3104		que->rxr = &adapter->rx_rings[i];
3105	}
3106
3107	return (0);
3108
3109err_rx_desc:
3110	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3111		igb_dma_free(adapter, &rxr->rxdma);
3112err_tx_desc:
3113	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3114		igb_dma_free(adapter, &txr->txdma);
3115	free(adapter->rx_rings, M_DEVBUF);
3116rx_fail:
3117#if __FreeBSD_version >= 800000
3118	buf_ring_free(txr->br, M_DEVBUF);
3119#endif
3120	free(adapter->tx_rings, M_DEVBUF);
3121tx_fail:
3122	free(adapter->queues, M_DEVBUF);
3123fail:
3124	return (error);
3125}
3126
3127/*********************************************************************
3128 *
3129 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3130 *  the information needed to transmit a packet on the wire. This is
3131 *  called only once at attach, setup is done every reset.
3132 *
3133 **********************************************************************/
3134static int
3135igb_allocate_transmit_buffers(struct tx_ring *txr)
3136{
3137	struct adapter *adapter = txr->adapter;
3138	device_t dev = adapter->dev;
3139	struct igb_tx_buffer *txbuf;
3140	int error, i;
3141
3142	/*
3143	 * Setup DMA descriptor areas.
3144	 */
3145	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3146			       1, 0,			/* alignment, bounds */
3147			       BUS_SPACE_MAXADDR,	/* lowaddr */
3148			       BUS_SPACE_MAXADDR,	/* highaddr */
3149			       NULL, NULL,		/* filter, filterarg */
3150			       IGB_TSO_SIZE,		/* maxsize */
3151			       IGB_MAX_SCATTER,		/* nsegments */
3152			       PAGE_SIZE,		/* maxsegsize */
3153			       0,			/* flags */
3154			       NULL,			/* lockfunc */
3155			       NULL,			/* lockfuncarg */
3156			       &txr->txtag))) {
3157		device_printf(dev,"Unable to allocate TX DMA tag\n");
3158		goto fail;
3159	}
3160
3161	if (!(txr->tx_buffers =
3162	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3163	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3164		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3165		error = ENOMEM;
3166		goto fail;
3167	}
3168
3169        /* Create the descriptor buffer dma maps */
3170	txbuf = txr->tx_buffers;
3171	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3172		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3173		if (error != 0) {
3174			device_printf(dev, "Unable to create TX DMA map\n");
3175			goto fail;
3176		}
3177	}
3178
3179	return 0;
3180fail:
3181	/* We free all, it handles case where we are in the middle */
3182	igb_free_transmit_structures(adapter);
3183	return (error);
3184}
3185
3186/*********************************************************************
3187 *
3188 *  Initialize a transmit ring.
3189 *
3190 **********************************************************************/
3191static void
3192igb_setup_transmit_ring(struct tx_ring *txr)
3193{
3194	struct adapter *adapter = txr->adapter;
3195	struct igb_tx_buffer *txbuf;
3196	int i;
3197
3198	/* Clear the old descriptor contents */
3199	IGB_TX_LOCK(txr);
3200	bzero((void *)txr->tx_base,
3201	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3202	/* Reset indices */
3203	txr->next_avail_desc = 0;
3204	txr->next_to_clean = 0;
3205
3206	/* Free any existing tx buffers. */
3207        txbuf = txr->tx_buffers;
3208	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3209		if (txbuf->m_head != NULL) {
3210			bus_dmamap_sync(txr->txtag, txbuf->map,
3211			    BUS_DMASYNC_POSTWRITE);
3212			bus_dmamap_unload(txr->txtag, txbuf->map);
3213			m_freem(txbuf->m_head);
3214			txbuf->m_head = NULL;
3215		}
3216		/* clear the watch index */
3217		txbuf->next_eop = -1;
3218        }
3219
3220	/* Set number of descriptors available */
3221	txr->tx_avail = adapter->num_tx_desc;
3222
3223	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3224	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3225	IGB_TX_UNLOCK(txr);
3226}
3227
3228/*********************************************************************
3229 *
3230 *  Initialize all transmit rings.
3231 *
3232 **********************************************************************/
3233static void
3234igb_setup_transmit_structures(struct adapter *adapter)
3235{
3236	struct tx_ring *txr = adapter->tx_rings;
3237
3238	for (int i = 0; i < adapter->num_queues; i++, txr++)
3239		igb_setup_transmit_ring(txr);
3240
3241	return;
3242}
3243
3244/*********************************************************************
3245 *
3246 *  Enable transmit unit.
3247 *
3248 **********************************************************************/
3249static void
3250igb_initialize_transmit_units(struct adapter *adapter)
3251{
3252	struct tx_ring	*txr = adapter->tx_rings;
3253	struct e1000_hw *hw = &adapter->hw;
3254	u32		tctl, txdctl;
3255
3256	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3257	tctl = txdctl = 0;
3258
3259	/* Setup the Tx Descriptor Rings */
3260	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3261		u64 bus_addr = txr->txdma.dma_paddr;
3262
3263		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3264		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3265		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3266		    (uint32_t)(bus_addr >> 32));
3267		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3268		    (uint32_t)bus_addr);
3269
3270		/* Setup the HW Tx Head and Tail descriptor pointers */
3271		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3272		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3273
3274		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3275		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3276		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3277
3278		txr->queue_status = IGB_QUEUE_IDLE;
3279
3280		txdctl |= IGB_TX_PTHRESH;
3281		txdctl |= IGB_TX_HTHRESH << 8;
3282		txdctl |= IGB_TX_WTHRESH << 16;
3283		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3284		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3285	}
3286
3287	if (adapter->vf_ifp)
3288		return;
3289
3290	e1000_config_collision_dist(hw);
3291
3292	/* Program the Transmit Control Register */
3293	tctl = E1000_READ_REG(hw, E1000_TCTL);
3294	tctl &= ~E1000_TCTL_CT;
3295	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3296		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3297
3298	/* This write will effectively turn on the transmit unit. */
3299	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3300}
3301
3302/*********************************************************************
3303 *
3304 *  Free all transmit rings.
3305 *
3306 **********************************************************************/
3307static void
3308igb_free_transmit_structures(struct adapter *adapter)
3309{
3310	struct tx_ring *txr = adapter->tx_rings;
3311
3312	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3313		IGB_TX_LOCK(txr);
3314		igb_free_transmit_buffers(txr);
3315		igb_dma_free(adapter, &txr->txdma);
3316		IGB_TX_UNLOCK(txr);
3317		IGB_TX_LOCK_DESTROY(txr);
3318	}
3319	free(adapter->tx_rings, M_DEVBUF);
3320}
3321
3322/*********************************************************************
3323 *
3324 *  Free transmit ring related data structures.
3325 *
3326 **********************************************************************/
3327static void
3328igb_free_transmit_buffers(struct tx_ring *txr)
3329{
3330	struct adapter *adapter = txr->adapter;
3331	struct igb_tx_buffer *tx_buffer;
3332	int             i;
3333
3334	INIT_DEBUGOUT("free_transmit_ring: begin");
3335
3336	if (txr->tx_buffers == NULL)
3337		return;
3338
3339	tx_buffer = txr->tx_buffers;
3340	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3341		if (tx_buffer->m_head != NULL) {
3342			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3343			    BUS_DMASYNC_POSTWRITE);
3344			bus_dmamap_unload(txr->txtag,
3345			    tx_buffer->map);
3346			m_freem(tx_buffer->m_head);
3347			tx_buffer->m_head = NULL;
3348			if (tx_buffer->map != NULL) {
3349				bus_dmamap_destroy(txr->txtag,
3350				    tx_buffer->map);
3351				tx_buffer->map = NULL;
3352			}
3353		} else if (tx_buffer->map != NULL) {
3354			bus_dmamap_unload(txr->txtag,
3355			    tx_buffer->map);
3356			bus_dmamap_destroy(txr->txtag,
3357			    tx_buffer->map);
3358			tx_buffer->map = NULL;
3359		}
3360	}
3361#if __FreeBSD_version >= 800000
3362	if (txr->br != NULL)
3363		buf_ring_free(txr->br, M_DEVBUF);
3364#endif
3365	if (txr->tx_buffers != NULL) {
3366		free(txr->tx_buffers, M_DEVBUF);
3367		txr->tx_buffers = NULL;
3368	}
3369	if (txr->txtag != NULL) {
3370		bus_dma_tag_destroy(txr->txtag);
3371		txr->txtag = NULL;
3372	}
3373	return;
3374}
3375
3376/**********************************************************************
3377 *
3378 *  Setup work for hardware segmentation offload (TSO)
3379 *
3380 **********************************************************************/
3381static boolean_t
3382igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3383{
3384	struct adapter *adapter = txr->adapter;
3385	struct e1000_adv_tx_context_desc *TXD;
3386	struct igb_tx_buffer        *tx_buffer;
3387	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3388	u32 mss_l4len_idx = 0;
3389	u16 vtag = 0;
3390	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3391	struct ether_vlan_header *eh;
3392	struct ip *ip;
3393	struct tcphdr *th;
3394
3395
3396	/*
3397	 * Determine where frame payload starts.
3398	 * Jump over vlan headers if already present
3399	 */
3400	eh = mtod(mp, struct ether_vlan_header *);
3401	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3402		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3403	else
3404		ehdrlen = ETHER_HDR_LEN;
3405
3406	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3407	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3408		return FALSE;
3409
3410	/* Only supports IPV4 for now */
3411	ctxd = txr->next_avail_desc;
3412	tx_buffer = &txr->tx_buffers[ctxd];
3413	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3414
3415	ip = (struct ip *)(mp->m_data + ehdrlen);
3416	if (ip->ip_p != IPPROTO_TCP)
3417                return FALSE;   /* 0 */
3418	ip->ip_sum = 0;
3419	ip_hlen = ip->ip_hl << 2;
3420	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3421	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3422	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3423	tcp_hlen = th->th_off << 2;
3424	/*
3425	 * Calculate header length, this is used
3426	 * in the transmit desc in igb_xmit
3427	 */
3428	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3429
3430	/* VLAN MACLEN IPLEN */
3431	if (mp->m_flags & M_VLANTAG) {
3432		vtag = htole16(mp->m_pkthdr.ether_vtag);
3433		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3434	}
3435
3436	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3437	vlan_macip_lens |= ip_hlen;
3438	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3439
3440	/* ADV DTYPE TUCMD */
3441	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3442	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3443	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3444	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3445
3446	/* MSS L4LEN IDX */
3447	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3448	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3449	/* 82575 needs the queue index added */
3450	if (adapter->hw.mac.type == e1000_82575)
3451		mss_l4len_idx |= txr->me << 4;
3452	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3453
3454	TXD->seqnum_seed = htole32(0);
3455	tx_buffer->m_head = NULL;
3456	tx_buffer->next_eop = -1;
3457
3458	if (++ctxd == adapter->num_tx_desc)
3459		ctxd = 0;
3460
3461	txr->tx_avail--;
3462	txr->next_avail_desc = ctxd;
3463	return TRUE;
3464}
3465
3466
3467/*********************************************************************
3468 *
3469 *  Context Descriptor setup for VLAN or CSUM
3470 *
3471 **********************************************************************/
3472
3473static bool
3474igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3475{
3476	struct adapter *adapter = txr->adapter;
3477	struct e1000_adv_tx_context_desc *TXD;
3478	struct igb_tx_buffer        *tx_buffer;
3479	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3480	struct ether_vlan_header *eh;
3481	struct ip *ip = NULL;
3482	struct ip6_hdr *ip6;
3483	int  ehdrlen, ctxd, ip_hlen = 0;
3484	u16	etype, vtag = 0;
3485	u8	ipproto = 0;
3486	bool	offload = TRUE;
3487
3488	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3489		offload = FALSE;
3490
3491	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3492	ctxd = txr->next_avail_desc;
3493	tx_buffer = &txr->tx_buffers[ctxd];
3494	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3495
3496	/*
3497	** In advanced descriptors the vlan tag must
3498	** be placed into the context descriptor, thus
3499	** we need to be here just for that setup.
3500	*/
3501	if (mp->m_flags & M_VLANTAG) {
3502		vtag = htole16(mp->m_pkthdr.ether_vtag);
3503		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3504	} else if (offload == FALSE)
3505		return FALSE;
3506
3507	/*
3508	 * Determine where frame payload starts.
3509	 * Jump over vlan headers if already present,
3510	 * helpful for QinQ too.
3511	 */
3512	eh = mtod(mp, struct ether_vlan_header *);
3513	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3514		etype = ntohs(eh->evl_proto);
3515		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3516	} else {
3517		etype = ntohs(eh->evl_encap_proto);
3518		ehdrlen = ETHER_HDR_LEN;
3519	}
3520
3521	/* Set the ether header length */
3522	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3523
3524	switch (etype) {
3525		case ETHERTYPE_IP:
3526			ip = (struct ip *)(mp->m_data + ehdrlen);
3527			ip_hlen = ip->ip_hl << 2;
3528			if (mp->m_len < ehdrlen + ip_hlen) {
3529				offload = FALSE;
3530				break;
3531			}
3532			ipproto = ip->ip_p;
3533			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3534			break;
3535		case ETHERTYPE_IPV6:
3536			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3537			ip_hlen = sizeof(struct ip6_hdr);
3538			ipproto = ip6->ip6_nxt;
3539			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3540			break;
3541		default:
3542			offload = FALSE;
3543			break;
3544	}
3545
3546	vlan_macip_lens |= ip_hlen;
3547	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3548
3549	switch (ipproto) {
3550		case IPPROTO_TCP:
3551			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3552				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3553			break;
3554		case IPPROTO_UDP:
3555			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3556				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3557			break;
3558#if __FreeBSD_version >= 800000
3559		case IPPROTO_SCTP:
3560			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3561				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3562			break;
3563#endif
3564		default:
3565			offload = FALSE;
3566			break;
3567	}
3568
3569	/* 82575 needs the queue index added */
3570	if (adapter->hw.mac.type == e1000_82575)
3571		mss_l4len_idx = txr->me << 4;
3572
3573	/* Now copy bits into descriptor */
3574	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3575	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3576	TXD->seqnum_seed = htole32(0);
3577	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3578
3579	tx_buffer->m_head = NULL;
3580	tx_buffer->next_eop = -1;
3581
3582	/* We've consumed the first desc, adjust counters */
3583	if (++ctxd == adapter->num_tx_desc)
3584		ctxd = 0;
3585	txr->next_avail_desc = ctxd;
3586	--txr->tx_avail;
3587
3588        return (offload);
3589}
3590
3591
3592/**********************************************************************
3593 *
3594 *  Examine each tx_buffer in the used queue. If the hardware is done
3595 *  processing the packet then free associated resources. The
3596 *  tx_buffer is put back on the free queue.
3597 *
3598 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3599 **********************************************************************/
3600static bool
3601igb_txeof(struct tx_ring *txr)
3602{
3603	struct adapter	*adapter = txr->adapter;
3604        int first, last, done, processed;
3605        struct igb_tx_buffer *tx_buffer;
3606        struct e1000_tx_desc   *tx_desc, *eop_desc;
3607	struct ifnet   *ifp = adapter->ifp;
3608
3609	IGB_TX_LOCK_ASSERT(txr);
3610
3611        if (txr->tx_avail == adapter->num_tx_desc) {
3612		txr->queue_status = IGB_QUEUE_IDLE;
3613                return FALSE;
3614	}
3615
3616	processed = 0;
3617        first = txr->next_to_clean;
3618        tx_desc = &txr->tx_base[first];
3619        tx_buffer = &txr->tx_buffers[first];
3620	last = tx_buffer->next_eop;
3621        eop_desc = &txr->tx_base[last];
3622
3623	/*
3624	 * What this does is get the index of the
3625	 * first descriptor AFTER the EOP of the
3626	 * first packet, that way we can do the
3627	 * simple comparison on the inner while loop.
3628	 */
3629	if (++last == adapter->num_tx_desc)
3630 		last = 0;
3631	done = last;
3632
3633        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3634            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3635
3636        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3637		/* We clean the range of the packet */
3638		while (first != done) {
3639                	tx_desc->upper.data = 0;
3640                	tx_desc->lower.data = 0;
3641                	tx_desc->buffer_addr = 0;
3642                	++txr->tx_avail;
3643			++processed;
3644
3645			if (tx_buffer->m_head) {
3646				txr->bytes +=
3647				    tx_buffer->m_head->m_pkthdr.len;
3648				bus_dmamap_sync(txr->txtag,
3649				    tx_buffer->map,
3650				    BUS_DMASYNC_POSTWRITE);
3651				bus_dmamap_unload(txr->txtag,
3652				    tx_buffer->map);
3653
3654                        	m_freem(tx_buffer->m_head);
3655                        	tx_buffer->m_head = NULL;
3656                	}
3657			tx_buffer->next_eop = -1;
3658			txr->watchdog_time = ticks;
3659
3660	                if (++first == adapter->num_tx_desc)
3661				first = 0;
3662
3663	                tx_buffer = &txr->tx_buffers[first];
3664			tx_desc = &txr->tx_base[first];
3665		}
3666		++txr->packets;
3667		++ifp->if_opackets;
3668		/* See if we can continue to the next packet */
3669		last = tx_buffer->next_eop;
3670		if (last != -1) {
3671        		eop_desc = &txr->tx_base[last];
3672			/* Get new done point */
3673			if (++last == adapter->num_tx_desc) last = 0;
3674			done = last;
3675		} else
3676			break;
3677        }
3678        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3679            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3680
3681        txr->next_to_clean = first;
3682
3683	/*
3684	** Watchdog calculation, we know there's
3685	** work outstanding or the first return
3686	** would have been taken, so none processed
3687	** for too long indicates a hang.
3688	*/
3689	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3690		txr->queue_status = IGB_QUEUE_HUNG;
3691
3692        /*
3693         * If we have a minimum free, clear IFF_DRV_OACTIVE
3694         * to tell the stack that it is OK to send packets.
3695         */
3696        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3697                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3698		/* All clean, turn off the watchdog */
3699                if (txr->tx_avail == adapter->num_tx_desc) {
3700			txr->queue_status = IGB_QUEUE_IDLE;
3701			return (FALSE);
3702		}
3703        }
3704	return (TRUE);
3705}
3706
3707/*********************************************************************
3708 *
3709 *  Refresh mbuf buffers for RX descriptor rings
3710 *   - now keeps its own state so discards due to resource
3711 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3712 *     it just returns, keeping its placeholder, thus it can simply
3713 *     be recalled to try again.
3714 *
3715 **********************************************************************/
3716static void
3717igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3718{
3719	struct adapter		*adapter = rxr->adapter;
3720	bus_dma_segment_t	hseg[1];
3721	bus_dma_segment_t	pseg[1];
3722	struct igb_rx_buf	*rxbuf;
3723	struct mbuf		*mh, *mp;
3724	int			i, j, nsegs, error;
3725	bool			refreshed = FALSE;
3726
3727	i = j = rxr->next_to_refresh;
3728	/*
3729	** Get one descriptor beyond
3730	** our work mark to control
3731	** the loop.
3732        */
3733	if (++j == adapter->num_rx_desc)
3734		j = 0;
3735
3736	while (j != limit) {
3737		rxbuf = &rxr->rx_buffers[i];
3738		/* No hdr mbuf used with header split off */
3739		if (rxr->hdr_split == FALSE)
3740			goto no_split;
3741		if (rxbuf->m_head == NULL) {
3742			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3743			if (mh == NULL)
3744				goto update;
3745		} else
3746			mh = rxbuf->m_head;
3747
3748		mh->m_pkthdr.len = mh->m_len = MHLEN;
3749		mh->m_len = MHLEN;
3750		mh->m_flags |= M_PKTHDR;
3751		/* Get the memory mapping */
3752		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3753		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3754		if (error != 0) {
3755			printf("Refresh mbufs: hdr dmamap load"
3756			    " failure - %d\n", error);
3757			m_free(mh);
3758			rxbuf->m_head = NULL;
3759			goto update;
3760		}
3761		rxbuf->m_head = mh;
3762		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3763		    BUS_DMASYNC_PREREAD);
3764		rxr->rx_base[i].read.hdr_addr =
3765		    htole64(hseg[0].ds_addr);
3766no_split:
3767		if (rxbuf->m_pack == NULL) {
3768			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3769			    M_PKTHDR, adapter->rx_mbuf_sz);
3770			if (mp == NULL)
3771				goto update;
3772		} else
3773			mp = rxbuf->m_pack;
3774
3775		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3776		/* Get the memory mapping */
3777		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3778		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3779		if (error != 0) {
3780			printf("Refresh mbufs: payload dmamap load"
3781			    " failure - %d\n", error);
3782			m_free(mp);
3783			rxbuf->m_pack = NULL;
3784			goto update;
3785		}
3786		rxbuf->m_pack = mp;
3787		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3788		    BUS_DMASYNC_PREREAD);
3789		rxr->rx_base[i].read.pkt_addr =
3790		    htole64(pseg[0].ds_addr);
3791		refreshed = TRUE; /* I feel wefreshed :) */
3792
3793		i = j; /* our next is precalculated */
3794		rxr->next_to_refresh = i;
3795		if (++j == adapter->num_rx_desc)
3796			j = 0;
3797	}
3798update:
3799	if (refreshed) /* update tail */
3800		E1000_WRITE_REG(&adapter->hw,
3801		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3802	return;
3803}
3804
3805
3806/*********************************************************************
3807 *
3808 *  Allocate memory for rx_buffer structures. Since we use one
3809 *  rx_buffer per received packet, the maximum number of rx_buffer's
3810 *  that we'll need is equal to the number of receive descriptors
3811 *  that we've allocated.
3812 *
3813 **********************************************************************/
3814static int
3815igb_allocate_receive_buffers(struct rx_ring *rxr)
3816{
3817	struct	adapter 	*adapter = rxr->adapter;
3818	device_t 		dev = adapter->dev;
3819	struct igb_rx_buf	*rxbuf;
3820	int             	i, bsize, error;
3821
3822	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3823	if (!(rxr->rx_buffers =
3824	    (struct igb_rx_buf *) malloc(bsize,
3825	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3826		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3827		error = ENOMEM;
3828		goto fail;
3829	}
3830
3831	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3832				   1, 0,		/* alignment, bounds */
3833				   BUS_SPACE_MAXADDR,	/* lowaddr */
3834				   BUS_SPACE_MAXADDR,	/* highaddr */
3835				   NULL, NULL,		/* filter, filterarg */
3836				   MSIZE,		/* maxsize */
3837				   1,			/* nsegments */
3838				   MSIZE,		/* maxsegsize */
3839				   0,			/* flags */
3840				   NULL,		/* lockfunc */
3841				   NULL,		/* lockfuncarg */
3842				   &rxr->htag))) {
3843		device_printf(dev, "Unable to create RX DMA tag\n");
3844		goto fail;
3845	}
3846
3847	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3848				   1, 0,		/* alignment, bounds */
3849				   BUS_SPACE_MAXADDR,	/* lowaddr */
3850				   BUS_SPACE_MAXADDR,	/* highaddr */
3851				   NULL, NULL,		/* filter, filterarg */
3852				   MJUM9BYTES,		/* maxsize */
3853				   1,			/* nsegments */
3854				   MJUM9BYTES,		/* maxsegsize */
3855				   0,			/* flags */
3856				   NULL,		/* lockfunc */
3857				   NULL,		/* lockfuncarg */
3858				   &rxr->ptag))) {
3859		device_printf(dev, "Unable to create RX payload DMA tag\n");
3860		goto fail;
3861	}
3862
3863	for (i = 0; i < adapter->num_rx_desc; i++) {
3864		rxbuf = &rxr->rx_buffers[i];
3865		error = bus_dmamap_create(rxr->htag,
3866		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3867		if (error) {
3868			device_printf(dev,
3869			    "Unable to create RX head DMA maps\n");
3870			goto fail;
3871		}
3872		error = bus_dmamap_create(rxr->ptag,
3873		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3874		if (error) {
3875			device_printf(dev,
3876			    "Unable to create RX packet DMA maps\n");
3877			goto fail;
3878		}
3879	}
3880
3881	return (0);
3882
3883fail:
3884	/* Frees all, but can handle partial completion */
3885	igb_free_receive_structures(adapter);
3886	return (error);
3887}
3888
3889
3890static void
3891igb_free_receive_ring(struct rx_ring *rxr)
3892{
3893	struct	adapter		*adapter = rxr->adapter;
3894	struct igb_rx_buf	*rxbuf;
3895
3896
3897	for (int i = 0; i < adapter->num_rx_desc; i++) {
3898		rxbuf = &rxr->rx_buffers[i];
3899		if (rxbuf->m_head != NULL) {
3900			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3901			    BUS_DMASYNC_POSTREAD);
3902			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3903			rxbuf->m_head->m_flags |= M_PKTHDR;
3904			m_freem(rxbuf->m_head);
3905		}
3906		if (rxbuf->m_pack != NULL) {
3907			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3908			    BUS_DMASYNC_POSTREAD);
3909			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3910			rxbuf->m_pack->m_flags |= M_PKTHDR;
3911			m_freem(rxbuf->m_pack);
3912		}
3913		rxbuf->m_head = NULL;
3914		rxbuf->m_pack = NULL;
3915	}
3916}
3917
3918
3919/*********************************************************************
3920 *
3921 *  Initialize a receive ring and its buffers.
3922 *
3923 **********************************************************************/
3924static int
3925igb_setup_receive_ring(struct rx_ring *rxr)
3926{
3927	struct	adapter		*adapter;
3928	struct  ifnet		*ifp;
3929	device_t		dev;
3930	struct igb_rx_buf	*rxbuf;
3931	bus_dma_segment_t	pseg[1], hseg[1];
3932	struct lro_ctrl		*lro = &rxr->lro;
3933	int			rsize, nsegs, error = 0;
3934
3935	adapter = rxr->adapter;
3936	dev = adapter->dev;
3937	ifp = adapter->ifp;
3938
3939	/* Clear the ring contents */
3940	IGB_RX_LOCK(rxr);
3941	rsize = roundup2(adapter->num_rx_desc *
3942	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3943	bzero((void *)rxr->rx_base, rsize);
3944
3945	/*
3946	** Free current RX buffer structures and their mbufs
3947	*/
3948	igb_free_receive_ring(rxr);
3949
3950	/* Configure for header split? */
3951	if (igb_header_split)
3952		rxr->hdr_split = TRUE;
3953
3954        /* Now replenish the ring mbufs */
3955	for (int j = 0; j < adapter->num_rx_desc; ++j) {
3956		struct mbuf	*mh, *mp;
3957
3958		rxbuf = &rxr->rx_buffers[j];
3959		if (rxr->hdr_split == FALSE)
3960			goto skip_head;
3961
3962		/* First the header */
3963		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3964		if (rxbuf->m_head == NULL) {
3965			error = ENOBUFS;
3966                        goto fail;
3967		}
3968		m_adj(rxbuf->m_head, ETHER_ALIGN);
3969		mh = rxbuf->m_head;
3970		mh->m_len = mh->m_pkthdr.len = MHLEN;
3971		mh->m_flags |= M_PKTHDR;
3972		/* Get the memory mapping */
3973		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3974		    rxbuf->hmap, rxbuf->m_head, hseg,
3975		    &nsegs, BUS_DMA_NOWAIT);
3976		if (error != 0) /* Nothing elegant to do here */
3977                        goto fail;
3978		bus_dmamap_sync(rxr->htag,
3979		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
3980		/* Update descriptor */
3981		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
3982
3983skip_head:
3984		/* Now the payload cluster */
3985		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
3986		    M_PKTHDR, adapter->rx_mbuf_sz);
3987		if (rxbuf->m_pack == NULL) {
3988			error = ENOBUFS;
3989                        goto fail;
3990		}
3991		mp = rxbuf->m_pack;
3992		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3993		/* Get the memory mapping */
3994		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3995		    rxbuf->pmap, mp, pseg,
3996		    &nsegs, BUS_DMA_NOWAIT);
3997		if (error != 0)
3998                        goto fail;
3999		bus_dmamap_sync(rxr->ptag,
4000		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4001		/* Update descriptor */
4002		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4003        }
4004
4005	/* Setup our descriptor indices */
4006	rxr->next_to_check = 0;
4007	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4008	rxr->lro_enabled = FALSE;
4009	rxr->rx_split_packets = 0;
4010	rxr->rx_bytes = 0;
4011
4012	rxr->fmp = NULL;
4013	rxr->lmp = NULL;
4014	rxr->discard = FALSE;
4015
4016	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4017	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4018
4019	/*
4020	** Now set up the LRO interface, we
4021	** also only do head split when LRO
4022	** is enabled, since so often they
4023	** are undesireable in similar setups.
4024	*/
4025	if (ifp->if_capenable & IFCAP_LRO) {
4026		error = tcp_lro_init(lro);
4027		if (error) {
4028			device_printf(dev, "LRO Initialization failed!\n");
4029			goto fail;
4030		}
4031		INIT_DEBUGOUT("RX LRO Initialized\n");
4032		rxr->lro_enabled = TRUE;
4033		lro->ifp = adapter->ifp;
4034	}
4035
4036	IGB_RX_UNLOCK(rxr);
4037	return (0);
4038
4039fail:
4040	igb_free_receive_ring(rxr);
4041	IGB_RX_UNLOCK(rxr);
4042	return (error);
4043}
4044
4045
4046/*********************************************************************
4047 *
4048 *  Initialize all receive rings.
4049 *
4050 **********************************************************************/
4051static int
4052igb_setup_receive_structures(struct adapter *adapter)
4053{
4054	struct rx_ring *rxr = adapter->rx_rings;
4055	int i;
4056
4057	for (i = 0; i < adapter->num_queues; i++, rxr++)
4058		if (igb_setup_receive_ring(rxr))
4059			goto fail;
4060
4061	return (0);
4062fail:
4063	/*
4064	 * Free RX buffers allocated so far, we will only handle
4065	 * the rings that completed, the failing case will have
4066	 * cleaned up for itself. 'i' is the endpoint.
4067	 */
4068	for (int j = 0; j > i; ++j) {
4069		rxr = &adapter->rx_rings[i];
4070		IGB_RX_LOCK(rxr);
4071		igb_free_receive_ring(rxr);
4072		IGB_RX_UNLOCK(rxr);
4073	}
4074
4075	return (ENOBUFS);
4076}
4077
4078/*********************************************************************
4079 *
4080 *  Enable receive unit.
4081 *
4082 **********************************************************************/
4083static void
4084igb_initialize_receive_units(struct adapter *adapter)
4085{
4086	struct rx_ring	*rxr = adapter->rx_rings;
4087	struct ifnet	*ifp = adapter->ifp;
4088	struct e1000_hw *hw = &adapter->hw;
4089	u32		rctl, rxcsum, psize, srrctl = 0;
4090
4091	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4092
4093	/*
4094	 * Make sure receives are disabled while setting
4095	 * up the descriptor ring
4096	 */
4097	rctl = E1000_READ_REG(hw, E1000_RCTL);
4098	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4099
4100	/*
4101	** Set up for header split
4102	*/
4103	if (igb_header_split) {
4104		/* Use a standard mbuf for the header */
4105		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4106		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4107	} else
4108		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4109
4110	/*
4111	** Set up for jumbo frames
4112	*/
4113	if (ifp->if_mtu > ETHERMTU) {
4114		rctl |= E1000_RCTL_LPE;
4115		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4116			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4117			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4118		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4119			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4120			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4121		}
4122		/* Set maximum packet len */
4123		psize = adapter->max_frame_size;
4124		/* are we on a vlan? */
4125		if (adapter->ifp->if_vlantrunk != NULL)
4126			psize += VLAN_TAG_SIZE;
4127		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4128	} else {
4129		rctl &= ~E1000_RCTL_LPE;
4130		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4131		rctl |= E1000_RCTL_SZ_2048;
4132	}
4133
4134	/* Setup the Base and Length of the Rx Descriptor Rings */
4135	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4136		u64 bus_addr = rxr->rxdma.dma_paddr;
4137		u32 rxdctl;
4138
4139		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4140		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4141		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4142		    (uint32_t)(bus_addr >> 32));
4143		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4144		    (uint32_t)bus_addr);
4145		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4146		/* Enable this Queue */
4147		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4148		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4149		rxdctl &= 0xFFF00000;
4150		rxdctl |= IGB_RX_PTHRESH;
4151		rxdctl |= IGB_RX_HTHRESH << 8;
4152		rxdctl |= IGB_RX_WTHRESH << 16;
4153		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4154	}
4155
4156	/*
4157	** Setup for RX MultiQueue
4158	*/
4159	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4160	if (adapter->num_queues >1) {
4161		u32 random[10], mrqc, shift = 0;
4162		union igb_reta {
4163			u32 dword;
4164			u8  bytes[4];
4165		} reta;
4166
4167		arc4rand(&random, sizeof(random), 0);
4168		if (adapter->hw.mac.type == e1000_82575)
4169			shift = 6;
4170		/* Warning FM follows */
4171		for (int i = 0; i < 128; i++) {
4172			reta.bytes[i & 3] =
4173			    (i % adapter->num_queues) << shift;
4174			if ((i & 3) == 3)
4175				E1000_WRITE_REG(hw,
4176				    E1000_RETA(i >> 2), reta.dword);
4177		}
4178		/* Now fill in hash table */
4179		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4180		for (int i = 0; i < 10; i++)
4181			E1000_WRITE_REG_ARRAY(hw,
4182			    E1000_RSSRK(0), i, random[i]);
4183
4184		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4185		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4186		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4187		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4188		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4189		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4190		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4191		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4192
4193		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4194
4195		/*
4196		** NOTE: Receive Full-Packet Checksum Offload
4197		** is mutually exclusive with Multiqueue. However
4198		** this is not the same as TCP/IP checksums which
4199		** still work.
4200		*/
4201		rxcsum |= E1000_RXCSUM_PCSD;
4202#if __FreeBSD_version >= 800000
4203		/* For SCTP Offload */
4204		if ((hw->mac.type == e1000_82576)
4205		    && (ifp->if_capenable & IFCAP_RXCSUM))
4206			rxcsum |= E1000_RXCSUM_CRCOFL;
4207#endif
4208	} else {
4209		/* Non RSS setup */
4210		if (ifp->if_capenable & IFCAP_RXCSUM) {
4211			rxcsum |= E1000_RXCSUM_IPPCSE;
4212#if __FreeBSD_version >= 800000
4213			if (adapter->hw.mac.type == e1000_82576)
4214				rxcsum |= E1000_RXCSUM_CRCOFL;
4215#endif
4216		} else
4217			rxcsum &= ~E1000_RXCSUM_TUOFL;
4218	}
4219	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4220
4221	/* Setup the Receive Control Register */
4222	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4223	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4224		   E1000_RCTL_RDMTS_HALF |
4225		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4226	/* Strip CRC bytes. */
4227	rctl |= E1000_RCTL_SECRC;
4228	/* Make sure VLAN Filters are off */
4229	rctl &= ~E1000_RCTL_VFE;
4230	/* Don't store bad packets */
4231	rctl &= ~E1000_RCTL_SBP;
4232
4233	/* Enable Receives */
4234	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4235
4236	/*
4237	 * Setup the HW Rx Head and Tail Descriptor Pointers
4238	 *   - needs to be after enable
4239	 */
4240	for (int i = 0; i < adapter->num_queues; i++) {
4241		rxr = &adapter->rx_rings[i];
4242		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4243		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4244	}
4245	return;
4246}
4247
4248/*********************************************************************
4249 *
4250 *  Free receive rings.
4251 *
4252 **********************************************************************/
4253static void
4254igb_free_receive_structures(struct adapter *adapter)
4255{
4256	struct rx_ring *rxr = adapter->rx_rings;
4257
4258	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4259		struct lro_ctrl	*lro = &rxr->lro;
4260		igb_free_receive_buffers(rxr);
4261		tcp_lro_free(lro);
4262		igb_dma_free(adapter, &rxr->rxdma);
4263	}
4264
4265	free(adapter->rx_rings, M_DEVBUF);
4266}
4267
4268/*********************************************************************
4269 *
4270 *  Free receive ring data structures.
4271 *
4272 **********************************************************************/
4273static void
4274igb_free_receive_buffers(struct rx_ring *rxr)
4275{
4276	struct adapter		*adapter = rxr->adapter;
4277	struct igb_rx_buf	*rxbuf;
4278	int i;
4279
4280	INIT_DEBUGOUT("free_receive_structures: begin");
4281
4282	/* Cleanup any existing buffers */
4283	if (rxr->rx_buffers != NULL) {
4284		for (i = 0; i < adapter->num_rx_desc; i++) {
4285			rxbuf = &rxr->rx_buffers[i];
4286			if (rxbuf->m_head != NULL) {
4287				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4288				    BUS_DMASYNC_POSTREAD);
4289				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4290				rxbuf->m_head->m_flags |= M_PKTHDR;
4291				m_freem(rxbuf->m_head);
4292			}
4293			if (rxbuf->m_pack != NULL) {
4294				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4295				    BUS_DMASYNC_POSTREAD);
4296				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4297				rxbuf->m_pack->m_flags |= M_PKTHDR;
4298				m_freem(rxbuf->m_pack);
4299			}
4300			rxbuf->m_head = NULL;
4301			rxbuf->m_pack = NULL;
4302			if (rxbuf->hmap != NULL) {
4303				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4304				rxbuf->hmap = NULL;
4305			}
4306			if (rxbuf->pmap != NULL) {
4307				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4308				rxbuf->pmap = NULL;
4309			}
4310		}
4311		if (rxr->rx_buffers != NULL) {
4312			free(rxr->rx_buffers, M_DEVBUF);
4313			rxr->rx_buffers = NULL;
4314		}
4315	}
4316
4317	if (rxr->htag != NULL) {
4318		bus_dma_tag_destroy(rxr->htag);
4319		rxr->htag = NULL;
4320	}
4321	if (rxr->ptag != NULL) {
4322		bus_dma_tag_destroy(rxr->ptag);
4323		rxr->ptag = NULL;
4324	}
4325}
4326
4327static __inline void
4328igb_rx_discard(struct rx_ring *rxr, int i)
4329{
4330	struct igb_rx_buf	*rbuf;
4331
4332	rbuf = &rxr->rx_buffers[i];
4333
4334	/* Partially received? Free the chain */
4335	if (rxr->fmp != NULL) {
4336		rxr->fmp->m_flags |= M_PKTHDR;
4337		m_freem(rxr->fmp);
4338		rxr->fmp = NULL;
4339		rxr->lmp = NULL;
4340	}
4341
4342	/*
4343	** With advanced descriptors the writeback
4344	** clobbers the buffer addrs, so its easier
4345	** to just free the existing mbufs and take
4346	** the normal refresh path to get new buffers
4347	** and mapping.
4348	*/
4349	if (rbuf->m_head) {
4350		m_free(rbuf->m_head);
4351		rbuf->m_head = NULL;
4352	}
4353
4354	if (rbuf->m_pack) {
4355		m_free(rbuf->m_pack);
4356		rbuf->m_pack = NULL;
4357	}
4358
4359	return;
4360}
4361
4362static __inline void
4363igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4364{
4365
4366	/*
4367	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4368	 * should be computed by hardware. Also it should not have VLAN tag in
4369	 * ethernet header.
4370	 */
4371	if (rxr->lro_enabled &&
4372	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4373	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4374	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4375	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4376	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4377	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4378		/*
4379		 * Send to the stack if:
4380		 **  - LRO not enabled, or
4381		 **  - no LRO resources, or
4382		 **  - lro enqueue fails
4383		 */
4384		if (rxr->lro.lro_cnt != 0)
4385			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4386				return;
4387	}
4388	IGB_RX_UNLOCK(rxr);
4389	(*ifp->if_input)(ifp, m);
4390	IGB_RX_LOCK(rxr);
4391}
4392
4393/*********************************************************************
4394 *
4395 *  This routine executes in interrupt context. It replenishes
4396 *  the mbufs in the descriptor and sends data which has been
4397 *  dma'ed into host memory to upper layer.
4398 *
4399 *  We loop at most count times if count is > 0, or until done if
4400 *  count < 0.
4401 *
4402 *  Return TRUE if more to clean, FALSE otherwise
4403 *********************************************************************/
4404static bool
4405igb_rxeof(struct igb_queue *que, int count, int *done)
4406{
4407	struct adapter		*adapter = que->adapter;
4408	struct rx_ring		*rxr = que->rxr;
4409	struct ifnet		*ifp = adapter->ifp;
4410	struct lro_ctrl		*lro = &rxr->lro;
4411	struct lro_entry	*queued;
4412	int			i, processed = 0, rxdone = 0;
4413	u32			ptype, staterr = 0;
4414	union e1000_adv_rx_desc	*cur;
4415
4416	IGB_RX_LOCK(rxr);
4417	/* Sync the ring. */
4418	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4419	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4420
4421	/* Main clean loop */
4422	for (i = rxr->next_to_check; count != 0;) {
4423		struct mbuf		*sendmp, *mh, *mp;
4424		struct igb_rx_buf	*rxbuf;
4425		u16			hlen, plen, hdr, vtag;
4426		bool			eop = FALSE;
4427
4428		cur = &rxr->rx_base[i];
4429		staterr = le32toh(cur->wb.upper.status_error);
4430		if ((staterr & E1000_RXD_STAT_DD) == 0)
4431			break;
4432		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4433			break;
4434		count--;
4435		sendmp = mh = mp = NULL;
4436		cur->wb.upper.status_error = 0;
4437		rxbuf = &rxr->rx_buffers[i];
4438		plen = le16toh(cur->wb.upper.length);
4439		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4440		if ((adapter->hw.mac.type == e1000_i350) &&
4441		    (staterr & E1000_RXDEXT_STATERR_LB))
4442			vtag = be16toh(cur->wb.upper.vlan);
4443		else
4444			vtag = le16toh(cur->wb.upper.vlan);
4445		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4446		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4447
4448		/* Make sure all segments of a bad packet are discarded */
4449		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4450		    (rxr->discard)) {
4451			ifp->if_ierrors++;
4452			++rxr->rx_discarded;
4453			if (!eop) /* Catch subsequent segs */
4454				rxr->discard = TRUE;
4455			else
4456				rxr->discard = FALSE;
4457			igb_rx_discard(rxr, i);
4458			goto next_desc;
4459		}
4460
4461		/*
4462		** The way the hardware is configured to
4463		** split, it will ONLY use the header buffer
4464		** when header split is enabled, otherwise we
4465		** get normal behavior, ie, both header and
4466		** payload are DMA'd into the payload buffer.
4467		**
4468		** The fmp test is to catch the case where a
4469		** packet spans multiple descriptors, in that
4470		** case only the first header is valid.
4471		*/
4472		if (rxr->hdr_split && rxr->fmp == NULL) {
4473			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4474			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4475			if (hlen > IGB_HDR_BUF)
4476				hlen = IGB_HDR_BUF;
4477			mh = rxr->rx_buffers[i].m_head;
4478			mh->m_len = hlen;
4479			/* clear buf pointer for refresh */
4480			rxbuf->m_head = NULL;
4481			/*
4482			** Get the payload length, this
4483			** could be zero if its a small
4484			** packet.
4485			*/
4486			if (plen > 0) {
4487				mp = rxr->rx_buffers[i].m_pack;
4488				mp->m_len = plen;
4489				mh->m_next = mp;
4490				/* clear buf pointer */
4491				rxbuf->m_pack = NULL;
4492				rxr->rx_split_packets++;
4493			}
4494		} else {
4495			/*
4496			** Either no header split, or a
4497			** secondary piece of a fragmented
4498			** split packet.
4499			*/
4500			mh = rxr->rx_buffers[i].m_pack;
4501			mh->m_len = plen;
4502			/* clear buf info for refresh */
4503			rxbuf->m_pack = NULL;
4504		}
4505
4506		++processed; /* So we know when to refresh */
4507
4508		/* Initial frame - setup */
4509		if (rxr->fmp == NULL) {
4510			mh->m_pkthdr.len = mh->m_len;
4511			/* Save the head of the chain */
4512			rxr->fmp = mh;
4513			rxr->lmp = mh;
4514			if (mp != NULL) {
4515				/* Add payload if split */
4516				mh->m_pkthdr.len += mp->m_len;
4517				rxr->lmp = mh->m_next;
4518			}
4519		} else {
4520			/* Chain mbuf's together */
4521			rxr->lmp->m_next = mh;
4522			rxr->lmp = rxr->lmp->m_next;
4523			rxr->fmp->m_pkthdr.len += mh->m_len;
4524		}
4525
4526		if (eop) {
4527			rxr->fmp->m_pkthdr.rcvif = ifp;
4528			ifp->if_ipackets++;
4529			rxr->rx_packets++;
4530			/* capture data for AIM */
4531			rxr->packets++;
4532			rxr->bytes += rxr->fmp->m_pkthdr.len;
4533			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4534
4535			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4536				igb_rx_checksum(staterr, rxr->fmp, ptype);
4537
4538			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4539			    (staterr & E1000_RXD_STAT_VP) != 0) {
4540				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4541				rxr->fmp->m_flags |= M_VLANTAG;
4542			}
4543#if __FreeBSD_version >= 800000
4544			rxr->fmp->m_pkthdr.flowid = que->msix;
4545			rxr->fmp->m_flags |= M_FLOWID;
4546#endif
4547			sendmp = rxr->fmp;
4548			/* Make sure to set M_PKTHDR. */
4549			sendmp->m_flags |= M_PKTHDR;
4550			rxr->fmp = NULL;
4551			rxr->lmp = NULL;
4552		}
4553
4554next_desc:
4555		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4556		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4557
4558		/* Advance our pointers to the next descriptor. */
4559		if (++i == adapter->num_rx_desc)
4560			i = 0;
4561		/*
4562		** Send to the stack or LRO
4563		*/
4564		if (sendmp != NULL) {
4565			rxr->next_to_check = i;
4566			igb_rx_input(rxr, ifp, sendmp, ptype);
4567			i = rxr->next_to_check;
4568			rxdone++;
4569		}
4570
4571		/* Every 8 descriptors we go to refresh mbufs */
4572		if (processed == 8) {
4573                        igb_refresh_mbufs(rxr, i);
4574                        processed = 0;
4575		}
4576	}
4577
4578	/* Catch any remainders */
4579	if (igb_rx_unrefreshed(rxr))
4580		igb_refresh_mbufs(rxr, i);
4581
4582	rxr->next_to_check = i;
4583
4584	/*
4585	 * Flush any outstanding LRO work
4586	 */
4587	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4588		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4589		tcp_lro_flush(lro, queued);
4590	}
4591
4592	if (done != NULL)
4593		*done = rxdone;
4594
4595	IGB_RX_UNLOCK(rxr);
4596	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4597}
4598
4599/*********************************************************************
4600 *
4601 *  Verify that the hardware indicated that the checksum is valid.
4602 *  Inform the stack about the status of checksum so that stack
4603 *  doesn't spend time verifying the checksum.
4604 *
4605 *********************************************************************/
4606static void
4607igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4608{
4609	u16 status = (u16)staterr;
4610	u8  errors = (u8) (staterr >> 24);
4611	int sctp;
4612
4613	/* Ignore Checksum bit is set */
4614	if (status & E1000_RXD_STAT_IXSM) {
4615		mp->m_pkthdr.csum_flags = 0;
4616		return;
4617	}
4618
4619	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4620	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4621		sctp = 1;
4622	else
4623		sctp = 0;
4624	if (status & E1000_RXD_STAT_IPCS) {
4625		/* Did it pass? */
4626		if (!(errors & E1000_RXD_ERR_IPE)) {
4627			/* IP Checksum Good */
4628			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4629			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4630		} else
4631			mp->m_pkthdr.csum_flags = 0;
4632	}
4633
4634	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4635		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4636#if __FreeBSD_version >= 800000
4637		if (sctp) /* reassign */
4638			type = CSUM_SCTP_VALID;
4639#endif
4640		/* Did it pass? */
4641		if (!(errors & E1000_RXD_ERR_TCPE)) {
4642			mp->m_pkthdr.csum_flags |= type;
4643			if (sctp == 0)
4644				mp->m_pkthdr.csum_data = htons(0xffff);
4645		}
4646	}
4647	return;
4648}
4649
4650/*
4651 * This routine is run via an vlan
4652 * config EVENT
4653 */
4654static void
4655igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4656{
4657	struct adapter	*adapter = ifp->if_softc;
4658	u32		index, bit;
4659
4660	if (ifp->if_softc !=  arg)   /* Not our event */
4661		return;
4662
4663	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4664                return;
4665
4666	IGB_CORE_LOCK(adapter);
4667	index = (vtag >> 5) & 0x7F;
4668	bit = vtag & 0x1F;
4669	adapter->shadow_vfta[index] |= (1 << bit);
4670	++adapter->num_vlans;
4671	/* Change hw filter setting */
4672	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4673		igb_setup_vlan_hw_support(adapter);
4674	IGB_CORE_UNLOCK(adapter);
4675}
4676
4677/*
4678 * This routine is run via an vlan
4679 * unconfig EVENT
4680 */
4681static void
4682igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4683{
4684	struct adapter	*adapter = ifp->if_softc;
4685	u32		index, bit;
4686
4687	if (ifp->if_softc !=  arg)
4688		return;
4689
4690	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4691                return;
4692
4693	IGB_CORE_LOCK(adapter);
4694	index = (vtag >> 5) & 0x7F;
4695	bit = vtag & 0x1F;
4696	adapter->shadow_vfta[index] &= ~(1 << bit);
4697	--adapter->num_vlans;
4698	/* Change hw filter setting */
4699	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4700		igb_setup_vlan_hw_support(adapter);
4701	IGB_CORE_UNLOCK(adapter);
4702}
4703
4704static void
4705igb_setup_vlan_hw_support(struct adapter *adapter)
4706{
4707	struct e1000_hw *hw = &adapter->hw;
4708	struct ifnet	*ifp = adapter->ifp;
4709	u32             reg;
4710
4711	if (adapter->vf_ifp) {
4712		e1000_rlpml_set_vf(hw,
4713		    adapter->max_frame_size + VLAN_TAG_SIZE);
4714		return;
4715	}
4716
4717	reg = E1000_READ_REG(hw, E1000_CTRL);
4718	reg |= E1000_CTRL_VME;
4719	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4720
4721	/* Enable the Filter Table */
4722	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4723		reg = E1000_READ_REG(hw, E1000_RCTL);
4724		reg &= ~E1000_RCTL_CFIEN;
4725		reg |= E1000_RCTL_VFE;
4726		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4727	}
4728
4729	/* Update the frame size */
4730	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4731	    adapter->max_frame_size + VLAN_TAG_SIZE);
4732
4733	/* Don't bother with table if no vlans */
4734	if ((adapter->num_vlans == 0) ||
4735	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4736                return;
4737	/*
4738	** A soft reset zero's out the VFTA, so
4739	** we need to repopulate it now.
4740	*/
4741	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4742                if (adapter->shadow_vfta[i] != 0) {
4743			if (adapter->vf_ifp)
4744				e1000_vfta_set_vf(hw,
4745				    adapter->shadow_vfta[i], TRUE);
4746			else
4747				E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4748                           	 i, adapter->shadow_vfta[i]);
4749		}
4750}
4751
4752static void
4753igb_enable_intr(struct adapter *adapter)
4754{
4755	/* With RSS set up what to auto clear */
4756	if (adapter->msix_mem) {
4757		u32 mask = (adapter->que_mask | adapter->link_mask);
4758		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4759		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4760		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4761		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4762		    E1000_IMS_LSC);
4763	} else {
4764		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4765		    IMS_ENABLE_MASK);
4766	}
4767	E1000_WRITE_FLUSH(&adapter->hw);
4768
4769	return;
4770}
4771
4772static void
4773igb_disable_intr(struct adapter *adapter)
4774{
4775	if (adapter->msix_mem) {
4776		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4777		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4778	}
4779	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4780	E1000_WRITE_FLUSH(&adapter->hw);
4781	return;
4782}
4783
4784/*
4785 * Bit of a misnomer, what this really means is
4786 * to enable OS management of the system... aka
4787 * to disable special hardware management features
4788 */
4789static void
4790igb_init_manageability(struct adapter *adapter)
4791{
4792	if (adapter->has_manage) {
4793		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4794		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4795
4796		/* disable hardware interception of ARP */
4797		manc &= ~(E1000_MANC_ARP_EN);
4798
4799                /* enable receiving management packets to the host */
4800		manc |= E1000_MANC_EN_MNG2HOST;
4801		manc2h |= 1 << 5;  /* Mng Port 623 */
4802		manc2h |= 1 << 6;  /* Mng Port 664 */
4803		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4804		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4805	}
4806}
4807
4808/*
4809 * Give control back to hardware management
4810 * controller if there is one.
4811 */
4812static void
4813igb_release_manageability(struct adapter *adapter)
4814{
4815	if (adapter->has_manage) {
4816		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4817
4818		/* re-enable hardware interception of ARP */
4819		manc |= E1000_MANC_ARP_EN;
4820		manc &= ~E1000_MANC_EN_MNG2HOST;
4821
4822		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4823	}
4824}
4825
4826/*
4827 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4828 * For ASF and Pass Through versions of f/w this means that
4829 * the driver is loaded.
4830 *
4831 */
4832static void
4833igb_get_hw_control(struct adapter *adapter)
4834{
4835	u32 ctrl_ext;
4836
4837	if (adapter->vf_ifp)
4838		return;
4839
4840	/* Let firmware know the driver has taken over */
4841	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4842	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4843	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4844}
4845
4846/*
4847 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4848 * For ASF and Pass Through versions of f/w this means that the
4849 * driver is no longer loaded.
4850 *
4851 */
4852static void
4853igb_release_hw_control(struct adapter *adapter)
4854{
4855	u32 ctrl_ext;
4856
4857	if (adapter->vf_ifp)
4858		return;
4859
4860	/* Let firmware taken over control of h/w */
4861	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4862	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4863	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4864}
4865
4866static int
4867igb_is_valid_ether_addr(uint8_t *addr)
4868{
4869	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4870
4871	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4872		return (FALSE);
4873	}
4874
4875	return (TRUE);
4876}
4877
4878
4879/*
4880 * Enable PCI Wake On Lan capability
4881 */
4882static void
4883igb_enable_wakeup(device_t dev)
4884{
4885	u16     cap, status;
4886	u8      id;
4887
4888	/* First find the capabilities pointer*/
4889	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4890	/* Read the PM Capabilities */
4891	id = pci_read_config(dev, cap, 1);
4892	if (id != PCIY_PMG)     /* Something wrong */
4893		return;
4894	/* OK, we have the power capabilities, so
4895	   now get the status register */
4896	cap += PCIR_POWER_STATUS;
4897	status = pci_read_config(dev, cap, 2);
4898	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4899	pci_write_config(dev, cap, status, 2);
4900	return;
4901}
4902
4903static void
4904igb_led_func(void *arg, int onoff)
4905{
4906	struct adapter	*adapter = arg;
4907
4908	IGB_CORE_LOCK(adapter);
4909	if (onoff) {
4910		e1000_setup_led(&adapter->hw);
4911		e1000_led_on(&adapter->hw);
4912	} else {
4913		e1000_led_off(&adapter->hw);
4914		e1000_cleanup_led(&adapter->hw);
4915	}
4916	IGB_CORE_UNLOCK(adapter);
4917}
4918
4919/**********************************************************************
4920 *
4921 *  Update the board statistics counters.
4922 *
4923 **********************************************************************/
4924static void
4925igb_update_stats_counters(struct adapter *adapter)
4926{
4927	struct ifnet		*ifp;
4928        struct e1000_hw		*hw = &adapter->hw;
4929	struct e1000_hw_stats	*stats;
4930
4931	/*
4932	** The virtual function adapter has only a
4933	** small controlled set of stats, do only
4934	** those and return.
4935	*/
4936	if (adapter->vf_ifp) {
4937		igb_update_vf_stats_counters(adapter);
4938		return;
4939	}
4940
4941	stats = (struct e1000_hw_stats	*)adapter->stats;
4942
4943	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4944	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4945		stats->symerrs +=
4946		    E1000_READ_REG(hw,E1000_SYMERRS);
4947		stats->sec += E1000_READ_REG(hw, E1000_SEC);
4948	}
4949
4950	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4951	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4952	stats->scc += E1000_READ_REG(hw, E1000_SCC);
4953	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4954
4955	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4956	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4957	stats->colc += E1000_READ_REG(hw, E1000_COLC);
4958	stats->dc += E1000_READ_REG(hw, E1000_DC);
4959	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4960	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4961	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4962	/*
4963	** For watchdog management we need to know if we have been
4964	** paused during the last interval, so capture that here.
4965	*/
4966        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4967        stats->xoffrxc += adapter->pause_frames;
4968	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
4969	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
4970	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
4971	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
4972	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
4973	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
4974	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
4975	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
4976	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
4977	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
4978	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
4979	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
4980
4981	/* For the 64-bit byte counters the low dword must be read first. */
4982	/* Both registers clear on the read of the high dword */
4983
4984	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
4985	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
4986	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
4987	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
4988
4989	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
4990	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
4991	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
4992	stats->roc += E1000_READ_REG(hw, E1000_ROC);
4993	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
4994
4995	stats->tor += E1000_READ_REG(hw, E1000_TORH);
4996	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
4997
4998	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
4999	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5000	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5001	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5002	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5003	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5004	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5005	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5006	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5007	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5008
5009	/* Interrupt Counts */
5010
5011	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5012	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5013	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5014	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5015	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5016	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5017	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5018	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5019	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5020
5021	/* Host to Card Statistics */
5022
5023	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5024	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5025	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5026	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5027	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5028	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5029	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5030	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5031	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5032	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5033	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5034	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5035	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5036	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5037
5038	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5039	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5040	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5041	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5042	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5043	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5044
5045	ifp = adapter->ifp;
5046	ifp->if_collisions = stats->colc;
5047
5048	/* Rx Errors */
5049	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5050	    stats->crcerrs + stats->algnerrc +
5051	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5052
5053	/* Tx Errors */
5054	ifp->if_oerrors = stats->ecol +
5055	    stats->latecol + adapter->watchdog_events;
5056
5057	/* Driver specific counters */
5058	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5059	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5060	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5061	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5062	adapter->packet_buf_alloc_tx =
5063	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5064	adapter->packet_buf_alloc_rx =
5065	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5066}
5067
5068
5069/**********************************************************************
5070 *
5071 *  Initialize the VF board statistics counters.
5072 *
5073 **********************************************************************/
5074static void
5075igb_vf_init_stats(struct adapter *adapter)
5076{
5077        struct e1000_hw *hw = &adapter->hw;
5078	struct e1000_vf_stats	*stats;
5079
5080	stats = (struct e1000_vf_stats	*)adapter->stats;
5081	if (stats == NULL)
5082		return;
5083        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5084        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5085        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5086        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5087        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5088}
5089
5090/**********************************************************************
5091 *
5092 *  Update the VF board statistics counters.
5093 *
5094 **********************************************************************/
5095static void
5096igb_update_vf_stats_counters(struct adapter *adapter)
5097{
5098	struct e1000_hw *hw = &adapter->hw;
5099	struct e1000_vf_stats	*stats;
5100
5101	if (adapter->link_speed == 0)
5102		return;
5103
5104	stats = (struct e1000_vf_stats	*)adapter->stats;
5105
5106	UPDATE_VF_REG(E1000_VFGPRC,
5107	    stats->last_gprc, stats->gprc);
5108	UPDATE_VF_REG(E1000_VFGORC,
5109	    stats->last_gorc, stats->gorc);
5110	UPDATE_VF_REG(E1000_VFGPTC,
5111	    stats->last_gptc, stats->gptc);
5112	UPDATE_VF_REG(E1000_VFGOTC,
5113	    stats->last_gotc, stats->gotc);
5114	UPDATE_VF_REG(E1000_VFMPRC,
5115	    stats->last_mprc, stats->mprc);
5116}
5117
5118/* Export a single 32-bit register via a read-only sysctl. */
5119static int
5120igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5121{
5122	struct adapter *adapter;
5123	u_int val;
5124
5125	adapter = oidp->oid_arg1;
5126	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5127	return (sysctl_handle_int(oidp, &val, 0, req));
5128}
5129
5130/*
5131**  Tuneable interrupt rate handler
5132*/
5133static int
5134igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5135{
5136	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5137	int			error;
5138	u32			reg, usec, rate;
5139
5140	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5141	usec = ((reg & 0x7FFC) >> 2);
5142	if (usec > 0)
5143		rate = 1000000 / usec;
5144	else
5145		rate = 0;
5146	error = sysctl_handle_int(oidp, &rate, 0, req);
5147	if (error || !req->newptr)
5148		return error;
5149	return 0;
5150}
5151
5152/*
5153 * Add sysctl variables, one per statistic, to the system.
5154 */
5155static void
5156igb_add_hw_stats(struct adapter *adapter)
5157{
5158	device_t dev = adapter->dev;
5159
5160	struct tx_ring *txr = adapter->tx_rings;
5161	struct rx_ring *rxr = adapter->rx_rings;
5162
5163	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5164	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5165	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5166	struct e1000_hw_stats *stats = adapter->stats;
5167
5168	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5169	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5170
5171#define QUEUE_NAME_LEN 32
5172	char namebuf[QUEUE_NAME_LEN];
5173
5174	/* Driver Statistics */
5175	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5176			CTLFLAG_RD, &adapter->link_irq, 0,
5177			"Link MSIX IRQ Handled");
5178	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5179			CTLFLAG_RD, &adapter->dropped_pkts,
5180			"Driver dropped packets");
5181	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5182			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5183			"Driver tx dma failure in xmit");
5184	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5185			CTLFLAG_RD, &adapter->rx_overruns,
5186			"RX overruns");
5187	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5188			CTLFLAG_RD, &adapter->watchdog_events,
5189			"Watchdog timeouts");
5190
5191	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5192			CTLFLAG_RD, &adapter->device_control,
5193			"Device Control Register");
5194	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5195			CTLFLAG_RD, &adapter->rx_control,
5196			"Receiver Control Register");
5197	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5198			CTLFLAG_RD, &adapter->int_mask,
5199			"Interrupt Mask");
5200	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5201			CTLFLAG_RD, &adapter->eint_mask,
5202			"Extended Interrupt Mask");
5203	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5204			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5205			"Transmit Buffer Packet Allocation");
5206	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5207			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5208			"Receive Buffer Packet Allocation");
5209	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5210			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5211			"Flow Control High Watermark");
5212	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5213			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5214			"Flow Control Low Watermark");
5215
5216	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5217		struct lro_ctrl *lro = &rxr->lro;
5218
5219		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5220		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5221					    CTLFLAG_RD, NULL, "Queue Name");
5222		queue_list = SYSCTL_CHILDREN(queue_node);
5223
5224		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5225				CTLFLAG_RD, &adapter->queues[i],
5226				sizeof(&adapter->queues[i]),
5227				igb_sysctl_interrupt_rate_handler,
5228				"IU", "Interrupt Rate");
5229
5230		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5231				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5232				igb_sysctl_reg_handler, "IU",
5233 				"Transmit Descriptor Head");
5234		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5235				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5236				igb_sysctl_reg_handler, "IU",
5237 				"Transmit Descriptor Tail");
5238		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5239				CTLFLAG_RD, &txr->no_desc_avail,
5240				"Queue No Descriptor Available");
5241		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5242				CTLFLAG_RD, &txr->tx_packets,
5243				"Queue Packets Transmitted");
5244
5245		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5246				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5247				igb_sysctl_reg_handler, "IU",
5248				"Receive Descriptor Head");
5249		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5250				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5251				igb_sysctl_reg_handler, "IU",
5252				"Receive Descriptor Tail");
5253		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5254				CTLFLAG_RD, &rxr->rx_packets,
5255				"Queue Packets Received");
5256		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5257				CTLFLAG_RD, &rxr->rx_bytes,
5258				"Queue Bytes Received");
5259		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5260				CTLFLAG_RD, &lro->lro_queued, 0,
5261				"LRO Queued");
5262		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5263				CTLFLAG_RD, &lro->lro_flushed, 0,
5264				"LRO Flushed");
5265	}
5266
5267	/* MAC stats get their own sub node */
5268
5269	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5270				    CTLFLAG_RD, NULL, "MAC Statistics");
5271	stat_list = SYSCTL_CHILDREN(stat_node);
5272
5273	/*
5274	** VF adapter has a very limited set of stats
5275	** since its not managing the metal, so to speak.
5276	*/
5277	if (adapter->vf_ifp) {
5278	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5279			CTLFLAG_RD, &stats->gprc,
5280			"Good Packets Received");
5281	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5282			CTLFLAG_RD, &stats->gptc,
5283			"Good Packets Transmitted");
5284 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5285 			CTLFLAG_RD, &stats->gorc,
5286 			"Good Octets Received");
5287 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5288 			CTLFLAG_RD, &stats->gotc,
5289 			"Good Octets Transmitted");
5290	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5291			CTLFLAG_RD, &stats->mprc,
5292			"Multicast Packets Received");
5293		return;
5294	}
5295
5296	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5297			CTLFLAG_RD, &stats->ecol,
5298			"Excessive collisions");
5299	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5300			CTLFLAG_RD, &stats->scc,
5301			"Single collisions");
5302	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5303			CTLFLAG_RD, &stats->mcc,
5304			"Multiple collisions");
5305	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5306			CTLFLAG_RD, &stats->latecol,
5307			"Late collisions");
5308	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5309			CTLFLAG_RD, &stats->colc,
5310			"Collision Count");
5311	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5312			CTLFLAG_RD, &stats->symerrs,
5313			"Symbol Errors");
5314	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5315			CTLFLAG_RD, &stats->sec,
5316			"Sequence Errors");
5317	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5318			CTLFLAG_RD, &stats->dc,
5319			"Defer Count");
5320	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5321			CTLFLAG_RD, &stats->mpc,
5322			"Missed Packets");
5323	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5324			CTLFLAG_RD, &stats->rnbc,
5325			"Receive No Buffers");
5326	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5327			CTLFLAG_RD, &stats->ruc,
5328			"Receive Undersize");
5329	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5330			CTLFLAG_RD, &stats->rfc,
5331			"Fragmented Packets Received ");
5332	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5333			CTLFLAG_RD, &stats->roc,
5334			"Oversized Packets Received");
5335	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5336			CTLFLAG_RD, &stats->rjc,
5337			"Recevied Jabber");
5338	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5339			CTLFLAG_RD, &stats->rxerrc,
5340			"Receive Errors");
5341	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5342			CTLFLAG_RD, &stats->crcerrs,
5343			"CRC errors");
5344	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5345			CTLFLAG_RD, &stats->algnerrc,
5346			"Alignment Errors");
5347	/* On 82575 these are collision counts */
5348	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5349			CTLFLAG_RD, &stats->cexterr,
5350			"Collision/Carrier extension errors");
5351	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5352			CTLFLAG_RD, &stats->xonrxc,
5353			"XON Received");
5354	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5355			CTLFLAG_RD, &stats->xontxc,
5356			"XON Transmitted");
5357	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5358			CTLFLAG_RD, &stats->xoffrxc,
5359			"XOFF Received");
5360	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5361			CTLFLAG_RD, &stats->xofftxc,
5362			"XOFF Transmitted");
5363	/* Packet Reception Stats */
5364	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5365			CTLFLAG_RD, &stats->tpr,
5366			"Total Packets Received ");
5367	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5368			CTLFLAG_RD, &stats->gprc,
5369			"Good Packets Received");
5370	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5371			CTLFLAG_RD, &stats->bprc,
5372			"Broadcast Packets Received");
5373	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5374			CTLFLAG_RD, &stats->mprc,
5375			"Multicast Packets Received");
5376	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5377			CTLFLAG_RD, &stats->prc64,
5378			"64 byte frames received ");
5379	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5380			CTLFLAG_RD, &stats->prc127,
5381			"65-127 byte frames received");
5382	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5383			CTLFLAG_RD, &stats->prc255,
5384			"128-255 byte frames received");
5385	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5386			CTLFLAG_RD, &stats->prc511,
5387			"256-511 byte frames received");
5388	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5389			CTLFLAG_RD, &stats->prc1023,
5390			"512-1023 byte frames received");
5391	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5392			CTLFLAG_RD, &stats->prc1522,
5393			"1023-1522 byte frames received");
5394 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5395 			CTLFLAG_RD, &stats->gorc,
5396 			"Good Octets Received");
5397
5398	/* Packet Transmission Stats */
5399 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5400 			CTLFLAG_RD, &stats->gotc,
5401 			"Good Octets Transmitted");
5402	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5403			CTLFLAG_RD, &stats->tpt,
5404			"Total Packets Transmitted");
5405	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5406			CTLFLAG_RD, &stats->gptc,
5407			"Good Packets Transmitted");
5408	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5409			CTLFLAG_RD, &stats->bptc,
5410			"Broadcast Packets Transmitted");
5411	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5412			CTLFLAG_RD, &stats->mptc,
5413			"Multicast Packets Transmitted");
5414	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5415			CTLFLAG_RD, &stats->ptc64,
5416			"64 byte frames transmitted ");
5417	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5418			CTLFLAG_RD, &stats->ptc127,
5419			"65-127 byte frames transmitted");
5420	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5421			CTLFLAG_RD, &stats->ptc255,
5422			"128-255 byte frames transmitted");
5423	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5424			CTLFLAG_RD, &stats->ptc511,
5425			"256-511 byte frames transmitted");
5426	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5427			CTLFLAG_RD, &stats->ptc1023,
5428			"512-1023 byte frames transmitted");
5429	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5430			CTLFLAG_RD, &stats->ptc1522,
5431			"1024-1522 byte frames transmitted");
5432	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5433			CTLFLAG_RD, &stats->tsctc,
5434			"TSO Contexts Transmitted");
5435	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5436			CTLFLAG_RD, &stats->tsctfc,
5437			"TSO Contexts Failed");
5438
5439
5440	/* Interrupt Stats */
5441
5442	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5443				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5444	int_list = SYSCTL_CHILDREN(int_node);
5445
5446	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5447			CTLFLAG_RD, &stats->iac,
5448			"Interrupt Assertion Count");
5449
5450	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5451			CTLFLAG_RD, &stats->icrxptc,
5452			"Interrupt Cause Rx Pkt Timer Expire Count");
5453
5454	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5455			CTLFLAG_RD, &stats->icrxatc,
5456			"Interrupt Cause Rx Abs Timer Expire Count");
5457
5458	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5459			CTLFLAG_RD, &stats->ictxptc,
5460			"Interrupt Cause Tx Pkt Timer Expire Count");
5461
5462	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5463			CTLFLAG_RD, &stats->ictxatc,
5464			"Interrupt Cause Tx Abs Timer Expire Count");
5465
5466	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5467			CTLFLAG_RD, &stats->ictxqec,
5468			"Interrupt Cause Tx Queue Empty Count");
5469
5470	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5471			CTLFLAG_RD, &stats->ictxqmtc,
5472			"Interrupt Cause Tx Queue Min Thresh Count");
5473
5474	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5475			CTLFLAG_RD, &stats->icrxdmtc,
5476			"Interrupt Cause Rx Desc Min Thresh Count");
5477
5478	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5479			CTLFLAG_RD, &stats->icrxoc,
5480			"Interrupt Cause Receiver Overrun Count");
5481
5482	/* Host to Card Stats */
5483
5484	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5485				    CTLFLAG_RD, NULL,
5486				    "Host to Card Statistics");
5487
5488	host_list = SYSCTL_CHILDREN(host_node);
5489
5490	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5491			CTLFLAG_RD, &stats->cbtmpc,
5492			"Circuit Breaker Tx Packet Count");
5493
5494	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5495			CTLFLAG_RD, &stats->htdpmc,
5496			"Host Transmit Discarded Packets");
5497
5498	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5499			CTLFLAG_RD, &stats->rpthc,
5500			"Rx Packets To Host");
5501
5502	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5503			CTLFLAG_RD, &stats->cbrmpc,
5504			"Circuit Breaker Rx Packet Count");
5505
5506	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5507			CTLFLAG_RD, &stats->cbrdpc,
5508			"Circuit Breaker Rx Dropped Count");
5509
5510	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5511			CTLFLAG_RD, &stats->hgptc,
5512			"Host Good Packets Tx Count");
5513
5514	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5515			CTLFLAG_RD, &stats->htcbdpc,
5516			"Host Tx Circuit Breaker Dropped Count");
5517
5518	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5519			CTLFLAG_RD, &stats->hgorc,
5520			"Host Good Octets Received Count");
5521
5522	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5523			CTLFLAG_RD, &stats->hgotc,
5524			"Host Good Octets Transmit Count");
5525
5526	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5527			CTLFLAG_RD, &stats->lenerrs,
5528			"Length Errors");
5529
5530	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5531			CTLFLAG_RD, &stats->scvpc,
5532			"SerDes/SGMII Code Violation Pkt Count");
5533
5534	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5535			CTLFLAG_RD, &stats->hrmpc,
5536			"Header Redirection Missed Packet Count");
5537}
5538
5539
5540/**********************************************************************
5541 *
5542 *  This routine provides a way to dump out the adapter eeprom,
5543 *  often a useful debug/service tool. This only dumps the first
5544 *  32 words, stuff that matters is in that extent.
5545 *
5546 **********************************************************************/
5547static int
5548igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5549{
5550	struct adapter *adapter;
5551	int error;
5552	int result;
5553
5554	result = -1;
5555	error = sysctl_handle_int(oidp, &result, 0, req);
5556
5557	if (error || !req->newptr)
5558		return (error);
5559
5560	/*
5561	 * This value will cause a hex dump of the
5562	 * first 32 16-bit words of the EEPROM to
5563	 * the screen.
5564	 */
5565	if (result == 1) {
5566		adapter = (struct adapter *)arg1;
5567		igb_print_nvm_info(adapter);
5568        }
5569
5570	return (error);
5571}
5572
5573static void
5574igb_print_nvm_info(struct adapter *adapter)
5575{
5576	u16	eeprom_data;
5577	int	i, j, row = 0;
5578
5579	/* Its a bit crude, but it gets the job done */
5580	printf("\nInterface EEPROM Dump:\n");
5581	printf("Offset\n0x0000  ");
5582	for (i = 0, j = 0; i < 32; i++, j++) {
5583		if (j == 8) { /* Make the offset block */
5584			j = 0; ++row;
5585			printf("\n0x00%x0  ",row);
5586		}
5587		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5588		printf("%04x ", eeprom_data);
5589	}
5590	printf("\n");
5591}
5592
5593static void
5594igb_set_sysctl_value(struct adapter *adapter, const char *name,
5595	const char *description, int *limit, int value)
5596{
5597	*limit = value;
5598	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5599	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5600	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5601}
5602
5603/*
5604** Set flow control using sysctl:
5605** Flow control values:
5606** 	0 - off
5607**	1 - rx pause
5608**	2 - tx pause
5609**	3 - full
5610*/
5611static int
5612igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5613{
5614	int error;
5615	struct adapter *adapter = (struct adapter *) arg1;
5616
5617	error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5618
5619	if ((error) || (req->newptr == NULL))
5620		return (error);
5621
5622	switch (adapter->fc) {
5623		case e1000_fc_rx_pause:
5624		case e1000_fc_tx_pause:
5625		case e1000_fc_full:
5626			adapter->hw.fc.requested_mode = adapter->fc;
5627			break;
5628		case e1000_fc_none:
5629		default:
5630			adapter->hw.fc.requested_mode = e1000_fc_none;
5631	}
5632
5633	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5634	e1000_force_mac_fc(&adapter->hw);
5635	return (error);
5636}
5637
5638/*
5639** Manage DMA Coalesce:
5640** Control values:
5641** 	0/1 - off/on
5642**	Legal timer values are:
5643**	250,500,1000-10000 in thousands
5644*/
5645static int
5646igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5647{
5648	struct adapter *adapter = (struct adapter *) arg1;
5649	int		error;
5650
5651	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5652
5653	if ((error) || (req->newptr == NULL))
5654		return (error);
5655
5656	switch (adapter->dmac) {
5657		case 0:
5658			/*Disabling */
5659			break;
5660		case 1: /* Just enable and use default */
5661			adapter->dmac = 1000;
5662			break;
5663		case 250:
5664		case 500:
5665		case 1000:
5666		case 2000:
5667		case 3000:
5668		case 4000:
5669		case 5000:
5670		case 6000:
5671		case 7000:
5672		case 8000:
5673		case 9000:
5674		case 10000:
5675			/* Legal values - allow */
5676			break;
5677		default:
5678			/* Do nothing, illegal value */
5679			adapter->dmac = 0;
5680			return (error);
5681	}
5682	/* Reinit the interface */
5683	igb_init(adapter);
5684	return (error);
5685}
5686