if_igb.c revision 232238
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 232238 2012-02-27 19:05:01Z luigi $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#if __FreeBSD_version >= 800000
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.3.1";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	/* required last entry */
154	{ 0, 0, 0, 0, 0}
155};
156
157/*********************************************************************
158 *  Table of branding strings for all supported NICs.
159 *********************************************************************/
160
161static char *igb_strings[] = {
162	"Intel(R) PRO/1000 Network Connection"
163};
164
165/*********************************************************************
166 *  Function prototypes
167 *********************************************************************/
168static int	igb_probe(device_t);
169static int	igb_attach(device_t);
170static int	igb_detach(device_t);
171static int	igb_shutdown(device_t);
172static int	igb_suspend(device_t);
173static int	igb_resume(device_t);
174static void	igb_start(struct ifnet *);
175static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
176#if __FreeBSD_version >= 800000
177static int	igb_mq_start(struct ifnet *, struct mbuf *);
178static int	igb_mq_start_locked(struct ifnet *,
179		    struct tx_ring *, struct mbuf *);
180static void	igb_qflush(struct ifnet *);
181#endif
182static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
183static void	igb_init(void *);
184static void	igb_init_locked(struct adapter *);
185static void	igb_stop(void *);
186static void	igb_media_status(struct ifnet *, struct ifmediareq *);
187static int	igb_media_change(struct ifnet *);
188static void	igb_identify_hardware(struct adapter *);
189static int	igb_allocate_pci_resources(struct adapter *);
190static int	igb_allocate_msix(struct adapter *);
191static int	igb_allocate_legacy(struct adapter *);
192static int	igb_setup_msix(struct adapter *);
193static void	igb_free_pci_resources(struct adapter *);
194static void	igb_local_timer(void *);
195static void	igb_reset(struct adapter *);
196static int	igb_setup_interface(device_t, struct adapter *);
197static int	igb_allocate_queues(struct adapter *);
198static void	igb_configure_queues(struct adapter *);
199
200static int	igb_allocate_transmit_buffers(struct tx_ring *);
201static void	igb_setup_transmit_structures(struct adapter *);
202static void	igb_setup_transmit_ring(struct tx_ring *);
203static void	igb_initialize_transmit_units(struct adapter *);
204static void	igb_free_transmit_structures(struct adapter *);
205static void	igb_free_transmit_buffers(struct tx_ring *);
206
207static int	igb_allocate_receive_buffers(struct rx_ring *);
208static int	igb_setup_receive_structures(struct adapter *);
209static int	igb_setup_receive_ring(struct rx_ring *);
210static void	igb_initialize_receive_units(struct adapter *);
211static void	igb_free_receive_structures(struct adapter *);
212static void	igb_free_receive_buffers(struct rx_ring *);
213static void	igb_free_receive_ring(struct rx_ring *);
214
215static void	igb_enable_intr(struct adapter *);
216static void	igb_disable_intr(struct adapter *);
217static void	igb_update_stats_counters(struct adapter *);
218static bool	igb_txeof(struct tx_ring *);
219
220static __inline	void igb_rx_discard(struct rx_ring *, int);
221static __inline void igb_rx_input(struct rx_ring *,
222		    struct ifnet *, struct mbuf *, u32);
223
224static bool	igb_rxeof(struct igb_queue *, int, int *);
225static void	igb_rx_checksum(u32, struct mbuf *, u32);
226static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
227static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
228		    struct ip *, struct tcphdr *);
229static void	igb_set_promisc(struct adapter *);
230static void	igb_disable_promisc(struct adapter *);
231static void	igb_set_multi(struct adapter *);
232static void	igb_update_link_status(struct adapter *);
233static void	igb_refresh_mbufs(struct rx_ring *, int);
234
235static void	igb_register_vlan(void *, struct ifnet *, u16);
236static void	igb_unregister_vlan(void *, struct ifnet *, u16);
237static void	igb_setup_vlan_hw_support(struct adapter *);
238
239static int	igb_xmit(struct tx_ring *, struct mbuf **);
240static int	igb_dma_malloc(struct adapter *, bus_size_t,
241		    struct igb_dma_alloc *, int);
242static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
243static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
244static void	igb_print_nvm_info(struct adapter *);
245static int 	igb_is_valid_ether_addr(u8 *);
246static void     igb_add_hw_stats(struct adapter *);
247
248static void	igb_vf_init_stats(struct adapter *);
249static void	igb_update_vf_stats_counters(struct adapter *);
250
251/* Management and WOL Support */
252static void	igb_init_manageability(struct adapter *);
253static void	igb_release_manageability(struct adapter *);
254static void     igb_get_hw_control(struct adapter *);
255static void     igb_release_hw_control(struct adapter *);
256static void     igb_enable_wakeup(device_t);
257static void     igb_led_func(void *, int);
258
259static int	igb_irq_fast(void *);
260static void	igb_msix_que(void *);
261static void	igb_msix_link(void *);
262static void	igb_handle_que(void *context, int pending);
263static void	igb_handle_link(void *context, int pending);
264
265static void	igb_set_sysctl_value(struct adapter *, const char *,
266		    const char *, int *, int);
267static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
268static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
269
270#ifdef DEVICE_POLLING
271static poll_handler_t igb_poll;
272#endif /* POLLING */
273
274/*********************************************************************
275 *  FreeBSD Device Interface Entry Points
276 *********************************************************************/
277
278static device_method_t igb_methods[] = {
279	/* Device interface */
280	DEVMETHOD(device_probe, igb_probe),
281	DEVMETHOD(device_attach, igb_attach),
282	DEVMETHOD(device_detach, igb_detach),
283	DEVMETHOD(device_shutdown, igb_shutdown),
284	DEVMETHOD(device_suspend, igb_suspend),
285	DEVMETHOD(device_resume, igb_resume),
286	{0, 0}
287};
288
289static driver_t igb_driver = {
290	"igb", igb_methods, sizeof(struct adapter),
291};
292
293static devclass_t igb_devclass;
294DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
295MODULE_DEPEND(igb, pci, 1, 1, 1);
296MODULE_DEPEND(igb, ether, 1, 1, 1);
297
298/*********************************************************************
299 *  Tunable default values.
300 *********************************************************************/
301
302static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
303
304/* Descriptor defaults */
305static int igb_rxd = IGB_DEFAULT_RXD;
306static int igb_txd = IGB_DEFAULT_TXD;
307TUNABLE_INT("hw.igb.rxd", &igb_rxd);
308TUNABLE_INT("hw.igb.txd", &igb_txd);
309SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
310    "Number of receive descriptors per queue");
311SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
312    "Number of transmit descriptors per queue");
313
314/*
315** AIM: Adaptive Interrupt Moderation
316** which means that the interrupt rate
317** is varied over time based on the
318** traffic for that interrupt vector
319*/
320static int igb_enable_aim = TRUE;
321TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
322SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
323    "Enable adaptive interrupt moderation");
324
325/*
326 * MSIX should be the default for best performance,
327 * but this allows it to be forced off for testing.
328 */
329static int igb_enable_msix = 1;
330TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
331SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
332    "Enable MSI-X interrupts");
333
334/*
335** Tuneable Interrupt rate
336*/
337static int igb_max_interrupt_rate = 8000;
338TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
339SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
340    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
341
342/*
343** Header split causes the packet header to
344** be dma'd to a seperate mbuf from the payload.
345** this can have memory alignment benefits. But
346** another plus is that small packets often fit
347** into the header and thus use no cluster. Its
348** a very workload dependent type feature.
349*/
350static int igb_header_split = FALSE;
351TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
352SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
353    "Enable receive mbuf header split");
354
355/*
356** This will autoconfigure based on
357** the number of CPUs if left at 0.
358*/
359static int igb_num_queues = 0;
360TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
361SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
362    "Number of queues to configure, 0 indicates autoconfigure");
363
364/* How many packets rxeof tries to clean at a time */
365static int igb_rx_process_limit = 100;
366TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
367SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
368    &igb_rx_process_limit, 0,
369    "Maximum number of received packets to process at a time, -1 means unlimited");
370
371#ifdef DEV_NETMAP	/* see ixgbe.c for details */
372#include <dev/netmap/if_igb_netmap.h>
373#endif /* DEV_NETMAP */
374/*********************************************************************
375 *  Device identification routine
376 *
377 *  igb_probe determines if the driver should be loaded on
378 *  adapter based on PCI vendor/device id of the adapter.
379 *
380 *  return BUS_PROBE_DEFAULT on success, positive on failure
381 *********************************************************************/
382
383static int
384igb_probe(device_t dev)
385{
386	char		adapter_name[60];
387	uint16_t	pci_vendor_id = 0;
388	uint16_t	pci_device_id = 0;
389	uint16_t	pci_subvendor_id = 0;
390	uint16_t	pci_subdevice_id = 0;
391	igb_vendor_info_t *ent;
392
393	INIT_DEBUGOUT("igb_probe: begin");
394
395	pci_vendor_id = pci_get_vendor(dev);
396	if (pci_vendor_id != IGB_VENDOR_ID)
397		return (ENXIO);
398
399	pci_device_id = pci_get_device(dev);
400	pci_subvendor_id = pci_get_subvendor(dev);
401	pci_subdevice_id = pci_get_subdevice(dev);
402
403	ent = igb_vendor_info_array;
404	while (ent->vendor_id != 0) {
405		if ((pci_vendor_id == ent->vendor_id) &&
406		    (pci_device_id == ent->device_id) &&
407
408		    ((pci_subvendor_id == ent->subvendor_id) ||
409		    (ent->subvendor_id == PCI_ANY_ID)) &&
410
411		    ((pci_subdevice_id == ent->subdevice_id) ||
412		    (ent->subdevice_id == PCI_ANY_ID))) {
413			sprintf(adapter_name, "%s %s",
414				igb_strings[ent->index],
415				igb_driver_version);
416			device_set_desc_copy(dev, adapter_name);
417			return (BUS_PROBE_DEFAULT);
418		}
419		ent++;
420	}
421
422	return (ENXIO);
423}
424
425/*********************************************************************
426 *  Device initialization routine
427 *
428 *  The attach entry point is called when the driver is being loaded.
429 *  This routine identifies the type of hardware, allocates all resources
430 *  and initializes the hardware.
431 *
432 *  return 0 on success, positive on failure
433 *********************************************************************/
434
435static int
436igb_attach(device_t dev)
437{
438	struct adapter	*adapter;
439	int		error = 0;
440	u16		eeprom_data;
441
442	INIT_DEBUGOUT("igb_attach: begin");
443
444	if (resource_disabled("igb", device_get_unit(dev))) {
445		device_printf(dev, "Disabled by device hint\n");
446		return (ENXIO);
447	}
448
449	adapter = device_get_softc(dev);
450	adapter->dev = adapter->osdep.dev = dev;
451	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
452
453	/* SYSCTL stuff */
454	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
455	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
456	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
457	    igb_sysctl_nvm_info, "I", "NVM Information");
458
459	igb_set_sysctl_value(adapter, "enable_aim",
460	    "Interrupt Moderation", &adapter->enable_aim,
461	    igb_enable_aim);
462
463	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
464	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
465	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
466	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
467
468	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
469
470	/* Determine hardware and mac info */
471	igb_identify_hardware(adapter);
472
473	/* Setup PCI resources */
474	if (igb_allocate_pci_resources(adapter)) {
475		device_printf(dev, "Allocation of PCI resources failed\n");
476		error = ENXIO;
477		goto err_pci;
478	}
479
480	/* Do Shared Code initialization */
481	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
482		device_printf(dev, "Setup of Shared code failed\n");
483		error = ENXIO;
484		goto err_pci;
485	}
486
487	e1000_get_bus_info(&adapter->hw);
488
489	/* Sysctl for limiting the amount of work done in the taskqueue */
490	igb_set_sysctl_value(adapter, "rx_processing_limit",
491	    "max number of rx packets to process",
492	    &adapter->rx_process_limit, igb_rx_process_limit);
493
494	/*
495	 * Validate number of transmit and receive descriptors. It
496	 * must not exceed hardware maximum, and must be multiple
497	 * of E1000_DBA_ALIGN.
498	 */
499	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
500	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
501		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
502		    IGB_DEFAULT_TXD, igb_txd);
503		adapter->num_tx_desc = IGB_DEFAULT_TXD;
504	} else
505		adapter->num_tx_desc = igb_txd;
506	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
507	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
508		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
509		    IGB_DEFAULT_RXD, igb_rxd);
510		adapter->num_rx_desc = IGB_DEFAULT_RXD;
511	} else
512		adapter->num_rx_desc = igb_rxd;
513
514	adapter->hw.mac.autoneg = DO_AUTO_NEG;
515	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
516	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
517
518	/* Copper options */
519	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
520		adapter->hw.phy.mdix = AUTO_ALL_MODES;
521		adapter->hw.phy.disable_polarity_correction = FALSE;
522		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
523	}
524
525	/*
526	 * Set the frame limits assuming
527	 * standard ethernet sized frames.
528	 */
529	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
530	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
531
532	/*
533	** Allocate and Setup Queues
534	*/
535	if (igb_allocate_queues(adapter)) {
536		error = ENOMEM;
537		goto err_pci;
538	}
539
540	/* Allocate the appropriate stats memory */
541	if (adapter->vf_ifp) {
542		adapter->stats =
543		    (struct e1000_vf_stats *)malloc(sizeof \
544		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
545		igb_vf_init_stats(adapter);
546	} else
547		adapter->stats =
548		    (struct e1000_hw_stats *)malloc(sizeof \
549		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
550	if (adapter->stats == NULL) {
551		device_printf(dev, "Can not allocate stats memory\n");
552		error = ENOMEM;
553		goto err_late;
554	}
555
556	/* Allocate multicast array memory. */
557	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
558	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
559	if (adapter->mta == NULL) {
560		device_printf(dev, "Can not allocate multicast setup array\n");
561		error = ENOMEM;
562		goto err_late;
563	}
564
565	/* Some adapter-specific advanced features */
566	if (adapter->hw.mac.type >= e1000_i350) {
567		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
568		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
569		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
570		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
571		igb_set_sysctl_value(adapter, "eee_disabled",
572		    "enable Energy Efficient Ethernet",
573		    &adapter->hw.dev_spec._82575.eee_disable,
574		    TRUE);
575		e1000_set_eee_i350(&adapter->hw);
576	}
577
578	/*
579	** Start from a known state, this is
580	** important in reading the nvm and
581	** mac from that.
582	*/
583	e1000_reset_hw(&adapter->hw);
584
585	/* Make sure we have a good EEPROM before we read from it */
586	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
587		/*
588		** Some PCI-E parts fail the first check due to
589		** the link being in sleep state, call it again,
590		** if it fails a second time its a real issue.
591		*/
592		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
593			device_printf(dev,
594			    "The EEPROM Checksum Is Not Valid\n");
595			error = EIO;
596			goto err_late;
597		}
598	}
599
600	/*
601	** Copy the permanent MAC address out of the EEPROM
602	*/
603	if (e1000_read_mac_addr(&adapter->hw) < 0) {
604		device_printf(dev, "EEPROM read error while reading MAC"
605		    " address\n");
606		error = EIO;
607		goto err_late;
608	}
609	/* Check its sanity */
610	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
611		device_printf(dev, "Invalid MAC address\n");
612		error = EIO;
613		goto err_late;
614	}
615
616	/* Setup OS specific network interface */
617	if (igb_setup_interface(dev, adapter) != 0)
618		goto err_late;
619
620	/* Now get a good starting state */
621	igb_reset(adapter);
622
623	/* Initialize statistics */
624	igb_update_stats_counters(adapter);
625
626	adapter->hw.mac.get_link_status = 1;
627	igb_update_link_status(adapter);
628
629	/* Indicate SOL/IDER usage */
630	if (e1000_check_reset_block(&adapter->hw))
631		device_printf(dev,
632		    "PHY reset is blocked due to SOL/IDER session.\n");
633
634	/* Determine if we have to control management hardware */
635	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
636
637	/*
638	 * Setup Wake-on-Lan
639	 */
640	/* APME bit in EEPROM is mapped to WUC.APME */
641	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
642	if (eeprom_data)
643		adapter->wol = E1000_WUFC_MAG;
644
645	/* Register for VLAN events */
646	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
647	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
648	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
649	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
650
651	igb_add_hw_stats(adapter);
652
653	/* Tell the stack that the interface is not active */
654	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
655	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
656
657	adapter->led_dev = led_create(igb_led_func, adapter,
658	    device_get_nameunit(dev));
659
660	/*
661	** Configure Interrupts
662	*/
663	if ((adapter->msix > 1) && (igb_enable_msix))
664		error = igb_allocate_msix(adapter);
665	else /* MSI or Legacy */
666		error = igb_allocate_legacy(adapter);
667	if (error)
668		goto err_late;
669
670#ifdef DEV_NETMAP
671	igb_netmap_attach(adapter);
672#endif /* DEV_NETMAP */
673	INIT_DEBUGOUT("igb_attach: end");
674
675	return (0);
676
677err_late:
678	igb_detach(dev);
679	igb_free_transmit_structures(adapter);
680	igb_free_receive_structures(adapter);
681	igb_release_hw_control(adapter);
682err_pci:
683	igb_free_pci_resources(adapter);
684	if (adapter->ifp != NULL)
685		if_free(adapter->ifp);
686	free(adapter->mta, M_DEVBUF);
687	IGB_CORE_LOCK_DESTROY(adapter);
688
689	return (error);
690}
691
692/*********************************************************************
693 *  Device removal routine
694 *
695 *  The detach entry point is called when the driver is being removed.
696 *  This routine stops the adapter and deallocates all the resources
697 *  that were allocated for driver operation.
698 *
699 *  return 0 on success, positive on failure
700 *********************************************************************/
701
702static int
703igb_detach(device_t dev)
704{
705	struct adapter	*adapter = device_get_softc(dev);
706	struct ifnet	*ifp = adapter->ifp;
707
708	INIT_DEBUGOUT("igb_detach: begin");
709
710	/* Make sure VLANS are not using driver */
711	if (adapter->ifp->if_vlantrunk != NULL) {
712		device_printf(dev,"Vlan in use, detach first\n");
713		return (EBUSY);
714	}
715
716	if (adapter->led_dev != NULL)
717		led_destroy(adapter->led_dev);
718
719#ifdef DEVICE_POLLING
720	if (ifp->if_capenable & IFCAP_POLLING)
721		ether_poll_deregister(ifp);
722#endif
723
724	IGB_CORE_LOCK(adapter);
725	adapter->in_detach = 1;
726	igb_stop(adapter);
727	IGB_CORE_UNLOCK(adapter);
728
729	e1000_phy_hw_reset(&adapter->hw);
730
731	/* Give control back to firmware */
732	igb_release_manageability(adapter);
733	igb_release_hw_control(adapter);
734
735	if (adapter->wol) {
736		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
737		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
738		igb_enable_wakeup(dev);
739	}
740
741	/* Unregister VLAN events */
742	if (adapter->vlan_attach != NULL)
743		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
744	if (adapter->vlan_detach != NULL)
745		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
746
747	ether_ifdetach(adapter->ifp);
748
749	callout_drain(&adapter->timer);
750
751#ifdef DEV_NETMAP
752	netmap_detach(adapter->ifp);
753#endif /* DEV_NETMAP */
754	igb_free_pci_resources(adapter);
755	bus_generic_detach(dev);
756	if_free(ifp);
757
758	igb_free_transmit_structures(adapter);
759	igb_free_receive_structures(adapter);
760	if (adapter->mta != NULL)
761		free(adapter->mta, M_DEVBUF);
762
763	IGB_CORE_LOCK_DESTROY(adapter);
764
765	return (0);
766}
767
768/*********************************************************************
769 *
770 *  Shutdown entry point
771 *
772 **********************************************************************/
773
774static int
775igb_shutdown(device_t dev)
776{
777	return igb_suspend(dev);
778}
779
780/*
781 * Suspend/resume device methods.
782 */
783static int
784igb_suspend(device_t dev)
785{
786	struct adapter *adapter = device_get_softc(dev);
787
788	IGB_CORE_LOCK(adapter);
789
790	igb_stop(adapter);
791
792        igb_release_manageability(adapter);
793	igb_release_hw_control(adapter);
794
795        if (adapter->wol) {
796                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
797                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
798                igb_enable_wakeup(dev);
799        }
800
801	IGB_CORE_UNLOCK(adapter);
802
803	return bus_generic_suspend(dev);
804}
805
806static int
807igb_resume(device_t dev)
808{
809	struct adapter *adapter = device_get_softc(dev);
810	struct ifnet *ifp = adapter->ifp;
811
812	IGB_CORE_LOCK(adapter);
813	igb_init_locked(adapter);
814	igb_init_manageability(adapter);
815
816	if ((ifp->if_flags & IFF_UP) &&
817	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
818		igb_start(ifp);
819
820	IGB_CORE_UNLOCK(adapter);
821
822	return bus_generic_resume(dev);
823}
824
825
826/*********************************************************************
827 *  Transmit entry point
828 *
829 *  igb_start is called by the stack to initiate a transmit.
830 *  The driver will remain in this routine as long as there are
831 *  packets to transmit and transmit resources are available.
832 *  In case resources are not available stack is notified and
833 *  the packet is requeued.
834 **********************************************************************/
835
836static void
837igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
838{
839	struct adapter	*adapter = ifp->if_softc;
840	struct mbuf	*m_head;
841
842	IGB_TX_LOCK_ASSERT(txr);
843
844	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
845	    IFF_DRV_RUNNING)
846		return;
847	if (!adapter->link_active)
848		return;
849
850	/* Call cleanup if number of TX descriptors low */
851	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
852		igb_txeof(txr);
853
854	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
855		if (txr->tx_avail <= IGB_MAX_SCATTER) {
856			txr->queue_status |= IGB_QUEUE_DEPLETED;
857			break;
858		}
859		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
860		if (m_head == NULL)
861			break;
862		/*
863		 *  Encapsulation can modify our pointer, and or make it
864		 *  NULL on failure.  In that event, we can't requeue.
865		 */
866		if (igb_xmit(txr, &m_head)) {
867			if (m_head != NULL)
868				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
869			if (txr->tx_avail <= IGB_MAX_SCATTER)
870				txr->queue_status |= IGB_QUEUE_DEPLETED;
871			break;
872		}
873
874		/* Send a copy of the frame to the BPF listener */
875		ETHER_BPF_MTAP(ifp, m_head);
876
877		/* Set watchdog on */
878		txr->watchdog_time = ticks;
879		txr->queue_status |= IGB_QUEUE_WORKING;
880	}
881}
882
883/*
884 * Legacy TX driver routine, called from the
885 * stack, always uses tx[0], and spins for it.
886 * Should not be used with multiqueue tx
887 */
888static void
889igb_start(struct ifnet *ifp)
890{
891	struct adapter	*adapter = ifp->if_softc;
892	struct tx_ring	*txr = adapter->tx_rings;
893
894	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
895		IGB_TX_LOCK(txr);
896		igb_start_locked(txr, ifp);
897		IGB_TX_UNLOCK(txr);
898	}
899	return;
900}
901
902#if __FreeBSD_version >= 800000
903/*
904** Multiqueue Transmit driver
905**
906*/
907static int
908igb_mq_start(struct ifnet *ifp, struct mbuf *m)
909{
910	struct adapter		*adapter = ifp->if_softc;
911	struct igb_queue	*que;
912	struct tx_ring		*txr;
913	int 			i, err = 0;
914	bool			moveable = TRUE;
915
916	/* Which queue to use */
917	if ((m->m_flags & M_FLOWID) != 0) {
918		i = m->m_pkthdr.flowid % adapter->num_queues;
919		moveable = FALSE;
920	} else
921		i = curcpu % adapter->num_queues;
922
923	txr = &adapter->tx_rings[i];
924	que = &adapter->queues[i];
925	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
926	    IGB_TX_TRYLOCK(txr)) {
927		err = igb_mq_start_locked(ifp, txr, m);
928		IGB_TX_UNLOCK(txr);
929	} else {
930		err = drbr_enqueue(ifp, txr->br, m);
931		taskqueue_enqueue(que->tq, &que->que_task);
932	}
933
934	return (err);
935}
936
937static int
938igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
939{
940	struct adapter  *adapter = txr->adapter;
941        struct mbuf     *next;
942        int             err = 0, enq;
943
944	IGB_TX_LOCK_ASSERT(txr);
945
946	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
947	    (txr->queue_status == IGB_QUEUE_DEPLETED) ||
948	    adapter->link_active == 0) {
949		if (m != NULL)
950			err = drbr_enqueue(ifp, txr->br, m);
951		return (err);
952	}
953
954	enq = 0;
955	if (m == NULL) {
956		next = drbr_dequeue(ifp, txr->br);
957	} else if (drbr_needs_enqueue(ifp, txr->br)) {
958		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
959			return (err);
960		next = drbr_dequeue(ifp, txr->br);
961	} else
962		next = m;
963
964	/* Process the queue */
965	while (next != NULL) {
966		if ((err = igb_xmit(txr, &next)) != 0) {
967			if (next != NULL)
968				err = drbr_enqueue(ifp, txr->br, next);
969			break;
970		}
971		enq++;
972		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
973		ETHER_BPF_MTAP(ifp, next);
974		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
975			break;
976		next = drbr_dequeue(ifp, txr->br);
977	}
978	if (enq > 0) {
979		/* Set the watchdog */
980		txr->queue_status |= IGB_QUEUE_WORKING;
981		txr->watchdog_time = ticks;
982	}
983	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
984		igb_txeof(txr);
985	if (txr->tx_avail <= IGB_MAX_SCATTER)
986		txr->queue_status |= IGB_QUEUE_DEPLETED;
987	return (err);
988}
989
990/*
991** Flush all ring buffers
992*/
993static void
994igb_qflush(struct ifnet *ifp)
995{
996	struct adapter	*adapter = ifp->if_softc;
997	struct tx_ring	*txr = adapter->tx_rings;
998	struct mbuf	*m;
999
1000	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1001		IGB_TX_LOCK(txr);
1002		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1003			m_freem(m);
1004		IGB_TX_UNLOCK(txr);
1005	}
1006	if_qflush(ifp);
1007}
1008#endif /* __FreeBSD_version >= 800000 */
1009
1010/*********************************************************************
1011 *  Ioctl entry point
1012 *
1013 *  igb_ioctl is called when the user wants to configure the
1014 *  interface.
1015 *
1016 *  return 0 on success, positive on failure
1017 **********************************************************************/
1018
1019static int
1020igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1021{
1022	struct adapter	*adapter = ifp->if_softc;
1023	struct ifreq	*ifr = (struct ifreq *)data;
1024#if defined(INET) || defined(INET6)
1025	struct ifaddr	*ifa = (struct ifaddr *)data;
1026#endif
1027	bool		avoid_reset = FALSE;
1028	int		error = 0;
1029
1030	if (adapter->in_detach)
1031		return (error);
1032
1033	switch (command) {
1034	case SIOCSIFADDR:
1035#ifdef INET
1036		if (ifa->ifa_addr->sa_family == AF_INET)
1037			avoid_reset = TRUE;
1038#endif
1039#ifdef INET6
1040		if (ifa->ifa_addr->sa_family == AF_INET6)
1041			avoid_reset = TRUE;
1042#endif
1043		/*
1044		** Calling init results in link renegotiation,
1045		** so we avoid doing it when possible.
1046		*/
1047		if (avoid_reset) {
1048			ifp->if_flags |= IFF_UP;
1049			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1050				igb_init(adapter);
1051#ifdef INET
1052			if (!(ifp->if_flags & IFF_NOARP))
1053				arp_ifinit(ifp, ifa);
1054#endif
1055		} else
1056			error = ether_ioctl(ifp, command, data);
1057		break;
1058	case SIOCSIFMTU:
1059	    {
1060		int max_frame_size;
1061
1062		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1063
1064		IGB_CORE_LOCK(adapter);
1065		max_frame_size = 9234;
1066		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1067		    ETHER_CRC_LEN) {
1068			IGB_CORE_UNLOCK(adapter);
1069			error = EINVAL;
1070			break;
1071		}
1072
1073		ifp->if_mtu = ifr->ifr_mtu;
1074		adapter->max_frame_size =
1075		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1076		igb_init_locked(adapter);
1077		IGB_CORE_UNLOCK(adapter);
1078		break;
1079	    }
1080	case SIOCSIFFLAGS:
1081		IOCTL_DEBUGOUT("ioctl rcv'd:\
1082		    SIOCSIFFLAGS (Set Interface Flags)");
1083		IGB_CORE_LOCK(adapter);
1084		if (ifp->if_flags & IFF_UP) {
1085			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1086				if ((ifp->if_flags ^ adapter->if_flags) &
1087				    (IFF_PROMISC | IFF_ALLMULTI)) {
1088					igb_disable_promisc(adapter);
1089					igb_set_promisc(adapter);
1090				}
1091			} else
1092				igb_init_locked(adapter);
1093		} else
1094			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1095				igb_stop(adapter);
1096		adapter->if_flags = ifp->if_flags;
1097		IGB_CORE_UNLOCK(adapter);
1098		break;
1099	case SIOCADDMULTI:
1100	case SIOCDELMULTI:
1101		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1102		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1103			IGB_CORE_LOCK(adapter);
1104			igb_disable_intr(adapter);
1105			igb_set_multi(adapter);
1106#ifdef DEVICE_POLLING
1107			if (!(ifp->if_capenable & IFCAP_POLLING))
1108#endif
1109				igb_enable_intr(adapter);
1110			IGB_CORE_UNLOCK(adapter);
1111		}
1112		break;
1113	case SIOCSIFMEDIA:
1114		/* Check SOL/IDER usage */
1115		IGB_CORE_LOCK(adapter);
1116		if (e1000_check_reset_block(&adapter->hw)) {
1117			IGB_CORE_UNLOCK(adapter);
1118			device_printf(adapter->dev, "Media change is"
1119			    " blocked due to SOL/IDER session.\n");
1120			break;
1121		}
1122		IGB_CORE_UNLOCK(adapter);
1123	case SIOCGIFMEDIA:
1124		IOCTL_DEBUGOUT("ioctl rcv'd: \
1125		    SIOCxIFMEDIA (Get/Set Interface Media)");
1126		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1127		break;
1128	case SIOCSIFCAP:
1129	    {
1130		int mask, reinit;
1131
1132		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1133		reinit = 0;
1134		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1135#ifdef DEVICE_POLLING
1136		if (mask & IFCAP_POLLING) {
1137			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1138				error = ether_poll_register(igb_poll, ifp);
1139				if (error)
1140					return (error);
1141				IGB_CORE_LOCK(adapter);
1142				igb_disable_intr(adapter);
1143				ifp->if_capenable |= IFCAP_POLLING;
1144				IGB_CORE_UNLOCK(adapter);
1145			} else {
1146				error = ether_poll_deregister(ifp);
1147				/* Enable interrupt even in error case */
1148				IGB_CORE_LOCK(adapter);
1149				igb_enable_intr(adapter);
1150				ifp->if_capenable &= ~IFCAP_POLLING;
1151				IGB_CORE_UNLOCK(adapter);
1152			}
1153		}
1154#endif
1155		if (mask & IFCAP_HWCSUM) {
1156			ifp->if_capenable ^= IFCAP_HWCSUM;
1157			reinit = 1;
1158		}
1159		if (mask & IFCAP_TSO4) {
1160			ifp->if_capenable ^= IFCAP_TSO4;
1161			reinit = 1;
1162		}
1163		if (mask & IFCAP_VLAN_HWTAGGING) {
1164			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1165			reinit = 1;
1166		}
1167		if (mask & IFCAP_VLAN_HWFILTER) {
1168			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1169			reinit = 1;
1170		}
1171		if (mask & IFCAP_VLAN_HWTSO) {
1172			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1173			reinit = 1;
1174		}
1175		if (mask & IFCAP_LRO) {
1176			ifp->if_capenable ^= IFCAP_LRO;
1177			reinit = 1;
1178		}
1179		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1180			igb_init(adapter);
1181		VLAN_CAPABILITIES(ifp);
1182		break;
1183	    }
1184
1185	default:
1186		error = ether_ioctl(ifp, command, data);
1187		break;
1188	}
1189
1190	return (error);
1191}
1192
1193
1194/*********************************************************************
1195 *  Init entry point
1196 *
1197 *  This routine is used in two ways. It is used by the stack as
1198 *  init entry point in network interface structure. It is also used
1199 *  by the driver as a hw/sw initialization routine to get to a
1200 *  consistent state.
1201 *
1202 *  return 0 on success, positive on failure
1203 **********************************************************************/
1204
1205static void
1206igb_init_locked(struct adapter *adapter)
1207{
1208	struct ifnet	*ifp = adapter->ifp;
1209	device_t	dev = adapter->dev;
1210
1211	INIT_DEBUGOUT("igb_init: begin");
1212
1213	IGB_CORE_LOCK_ASSERT(adapter);
1214
1215	igb_disable_intr(adapter);
1216	callout_stop(&adapter->timer);
1217
1218	/* Get the latest mac address, User can use a LAA */
1219        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1220              ETHER_ADDR_LEN);
1221
1222	/* Put the address into the Receive Address Array */
1223	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1224
1225	igb_reset(adapter);
1226	igb_update_link_status(adapter);
1227
1228	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1229
1230	/* Set hardware offload abilities */
1231	ifp->if_hwassist = 0;
1232	if (ifp->if_capenable & IFCAP_TXCSUM) {
1233		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1234#if __FreeBSD_version >= 800000
1235		if (adapter->hw.mac.type == e1000_82576)
1236			ifp->if_hwassist |= CSUM_SCTP;
1237#endif
1238	}
1239
1240	if (ifp->if_capenable & IFCAP_TSO4)
1241		ifp->if_hwassist |= CSUM_TSO;
1242
1243	/* Configure for OS presence */
1244	igb_init_manageability(adapter);
1245
1246	/* Prepare transmit descriptors and buffers */
1247	igb_setup_transmit_structures(adapter);
1248	igb_initialize_transmit_units(adapter);
1249
1250	/* Setup Multicast table */
1251	igb_set_multi(adapter);
1252
1253	/*
1254	** Figure out the desired mbuf pool
1255	** for doing jumbo/packetsplit
1256	*/
1257	if (adapter->max_frame_size <= 2048)
1258		adapter->rx_mbuf_sz = MCLBYTES;
1259	else if (adapter->max_frame_size <= 4096)
1260		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1261	else
1262		adapter->rx_mbuf_sz = MJUM9BYTES;
1263
1264	/* Prepare receive descriptors and buffers */
1265	if (igb_setup_receive_structures(adapter)) {
1266		device_printf(dev, "Could not setup receive structures\n");
1267		return;
1268	}
1269	igb_initialize_receive_units(adapter);
1270
1271        /* Enable VLAN support */
1272	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1273		igb_setup_vlan_hw_support(adapter);
1274
1275	/* Don't lose promiscuous settings */
1276	igb_set_promisc(adapter);
1277
1278	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1279	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1280
1281	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1282	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1283
1284	if (adapter->msix > 1) /* Set up queue routing */
1285		igb_configure_queues(adapter);
1286
1287	/* this clears any pending interrupts */
1288	E1000_READ_REG(&adapter->hw, E1000_ICR);
1289#ifdef DEVICE_POLLING
1290	/*
1291	 * Only enable interrupts if we are not polling, make sure
1292	 * they are off otherwise.
1293	 */
1294	if (ifp->if_capenable & IFCAP_POLLING)
1295		igb_disable_intr(adapter);
1296	else
1297#endif /* DEVICE_POLLING */
1298	{
1299		igb_enable_intr(adapter);
1300		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1301	}
1302
1303	/* Set Energy Efficient Ethernet */
1304
1305	e1000_set_eee_i350(&adapter->hw);
1306}
1307
1308static void
1309igb_init(void *arg)
1310{
1311	struct adapter *adapter = arg;
1312
1313	IGB_CORE_LOCK(adapter);
1314	igb_init_locked(adapter);
1315	IGB_CORE_UNLOCK(adapter);
1316}
1317
1318
1319static void
1320igb_handle_que(void *context, int pending)
1321{
1322	struct igb_queue *que = context;
1323	struct adapter *adapter = que->adapter;
1324	struct tx_ring *txr = que->txr;
1325	struct ifnet	*ifp = adapter->ifp;
1326
1327	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1328		bool	more;
1329
1330		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1331
1332		IGB_TX_LOCK(txr);
1333		if (igb_txeof(txr))
1334			more = TRUE;
1335#if __FreeBSD_version >= 800000
1336		/* Process the stack queue only if not depleted */
1337		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1338		    !drbr_empty(ifp, txr->br))
1339			igb_mq_start_locked(ifp, txr, NULL);
1340#else
1341		igb_start_locked(txr, ifp);
1342#endif
1343		IGB_TX_UNLOCK(txr);
1344		/* Do we need another? */
1345		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1346			taskqueue_enqueue(que->tq, &que->que_task);
1347			return;
1348		}
1349	}
1350
1351#ifdef DEVICE_POLLING
1352	if (ifp->if_capenable & IFCAP_POLLING)
1353		return;
1354#endif
1355	/* Reenable this interrupt */
1356	if (que->eims)
1357		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1358	else
1359		igb_enable_intr(adapter);
1360}
1361
1362/* Deal with link in a sleepable context */
1363static void
1364igb_handle_link(void *context, int pending)
1365{
1366	struct adapter *adapter = context;
1367
1368	adapter->hw.mac.get_link_status = 1;
1369	igb_update_link_status(adapter);
1370}
1371
1372/*********************************************************************
1373 *
1374 *  MSI/Legacy Deferred
1375 *  Interrupt Service routine
1376 *
1377 *********************************************************************/
1378static int
1379igb_irq_fast(void *arg)
1380{
1381	struct adapter		*adapter = arg;
1382	struct igb_queue	*que = adapter->queues;
1383	u32			reg_icr;
1384
1385
1386	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1387
1388	/* Hot eject?  */
1389	if (reg_icr == 0xffffffff)
1390		return FILTER_STRAY;
1391
1392	/* Definitely not our interrupt.  */
1393	if (reg_icr == 0x0)
1394		return FILTER_STRAY;
1395
1396	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1397		return FILTER_STRAY;
1398
1399	/*
1400	 * Mask interrupts until the taskqueue is finished running.  This is
1401	 * cheap, just assume that it is needed.  This also works around the
1402	 * MSI message reordering errata on certain systems.
1403	 */
1404	igb_disable_intr(adapter);
1405	taskqueue_enqueue(que->tq, &que->que_task);
1406
1407	/* Link status change */
1408	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1409		taskqueue_enqueue(que->tq, &adapter->link_task);
1410
1411	if (reg_icr & E1000_ICR_RXO)
1412		adapter->rx_overruns++;
1413	return FILTER_HANDLED;
1414}
1415
1416#ifdef DEVICE_POLLING
1417/*********************************************************************
1418 *
1419 *  Legacy polling routine : if using this code you MUST be sure that
1420 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1421 *
1422 *********************************************************************/
1423#if __FreeBSD_version >= 800000
1424#define POLL_RETURN_COUNT(a) (a)
1425static int
1426#else
1427#define POLL_RETURN_COUNT(a)
1428static void
1429#endif
1430igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1431{
1432	struct adapter		*adapter = ifp->if_softc;
1433	struct igb_queue	*que = adapter->queues;
1434	struct tx_ring		*txr = adapter->tx_rings;
1435	u32			reg_icr, rx_done = 0;
1436	u32			loop = IGB_MAX_LOOP;
1437	bool			more;
1438
1439	IGB_CORE_LOCK(adapter);
1440	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1441		IGB_CORE_UNLOCK(adapter);
1442		return POLL_RETURN_COUNT(rx_done);
1443	}
1444
1445	if (cmd == POLL_AND_CHECK_STATUS) {
1446		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1447		/* Link status change */
1448		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1449			igb_handle_link(adapter, 0);
1450
1451		if (reg_icr & E1000_ICR_RXO)
1452			adapter->rx_overruns++;
1453	}
1454	IGB_CORE_UNLOCK(adapter);
1455
1456	igb_rxeof(que, count, &rx_done);
1457
1458	IGB_TX_LOCK(txr);
1459	do {
1460		more = igb_txeof(txr);
1461	} while (loop-- && more);
1462#if __FreeBSD_version >= 800000
1463	if (!drbr_empty(ifp, txr->br))
1464		igb_mq_start_locked(ifp, txr, NULL);
1465#else
1466	igb_start_locked(txr, ifp);
1467#endif
1468	IGB_TX_UNLOCK(txr);
1469	return POLL_RETURN_COUNT(rx_done);
1470}
1471#endif /* DEVICE_POLLING */
1472
1473/*********************************************************************
1474 *
1475 *  MSIX Que Interrupt Service routine
1476 *
1477 **********************************************************************/
1478static void
1479igb_msix_que(void *arg)
1480{
1481	struct igb_queue *que = arg;
1482	struct adapter *adapter = que->adapter;
1483	struct tx_ring *txr = que->txr;
1484	struct rx_ring *rxr = que->rxr;
1485	u32		newitr = 0;
1486	bool		more_tx, more_rx;
1487
1488	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1489	++que->irqs;
1490
1491	IGB_TX_LOCK(txr);
1492	more_tx = igb_txeof(txr);
1493	IGB_TX_UNLOCK(txr);
1494
1495	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1496
1497	if (adapter->enable_aim == FALSE)
1498		goto no_calc;
1499	/*
1500	** Do Adaptive Interrupt Moderation:
1501        **  - Write out last calculated setting
1502	**  - Calculate based on average size over
1503	**    the last interval.
1504	*/
1505        if (que->eitr_setting)
1506                E1000_WRITE_REG(&adapter->hw,
1507                    E1000_EITR(que->msix), que->eitr_setting);
1508
1509        que->eitr_setting = 0;
1510
1511        /* Idle, do nothing */
1512        if ((txr->bytes == 0) && (rxr->bytes == 0))
1513                goto no_calc;
1514
1515        /* Used half Default if sub-gig */
1516        if (adapter->link_speed != 1000)
1517                newitr = IGB_DEFAULT_ITR / 2;
1518        else {
1519		if ((txr->bytes) && (txr->packets))
1520                	newitr = txr->bytes/txr->packets;
1521		if ((rxr->bytes) && (rxr->packets))
1522			newitr = max(newitr,
1523			    (rxr->bytes / rxr->packets));
1524                newitr += 24; /* account for hardware frame, crc */
1525		/* set an upper boundary */
1526		newitr = min(newitr, 3000);
1527		/* Be nice to the mid range */
1528                if ((newitr > 300) && (newitr < 1200))
1529                        newitr = (newitr / 3);
1530                else
1531                        newitr = (newitr / 2);
1532        }
1533        newitr &= 0x7FFC;  /* Mask invalid bits */
1534        if (adapter->hw.mac.type == e1000_82575)
1535                newitr |= newitr << 16;
1536        else
1537                newitr |= E1000_EITR_CNT_IGNR;
1538
1539        /* save for next interrupt */
1540        que->eitr_setting = newitr;
1541
1542        /* Reset state */
1543        txr->bytes = 0;
1544        txr->packets = 0;
1545        rxr->bytes = 0;
1546        rxr->packets = 0;
1547
1548no_calc:
1549	/* Schedule a clean task if needed*/
1550	if (more_tx || more_rx)
1551		taskqueue_enqueue(que->tq, &que->que_task);
1552	else
1553		/* Reenable this interrupt */
1554		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1555	return;
1556}
1557
1558
1559/*********************************************************************
1560 *
1561 *  MSIX Link Interrupt Service routine
1562 *
1563 **********************************************************************/
1564
1565static void
1566igb_msix_link(void *arg)
1567{
1568	struct adapter	*adapter = arg;
1569	u32       	icr;
1570
1571	++adapter->link_irq;
1572	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1573	if (!(icr & E1000_ICR_LSC))
1574		goto spurious;
1575	igb_handle_link(adapter, 0);
1576
1577spurious:
1578	/* Rearm */
1579	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1580	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1581	return;
1582}
1583
1584
1585/*********************************************************************
1586 *
1587 *  Media Ioctl callback
1588 *
1589 *  This routine is called whenever the user queries the status of
1590 *  the interface using ifconfig.
1591 *
1592 **********************************************************************/
1593static void
1594igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1595{
1596	struct adapter *adapter = ifp->if_softc;
1597	u_char fiber_type = IFM_1000_SX;
1598
1599	INIT_DEBUGOUT("igb_media_status: begin");
1600
1601	IGB_CORE_LOCK(adapter);
1602	igb_update_link_status(adapter);
1603
1604	ifmr->ifm_status = IFM_AVALID;
1605	ifmr->ifm_active = IFM_ETHER;
1606
1607	if (!adapter->link_active) {
1608		IGB_CORE_UNLOCK(adapter);
1609		return;
1610	}
1611
1612	ifmr->ifm_status |= IFM_ACTIVE;
1613
1614	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1615	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1616		ifmr->ifm_active |= fiber_type | IFM_FDX;
1617	else {
1618		switch (adapter->link_speed) {
1619		case 10:
1620			ifmr->ifm_active |= IFM_10_T;
1621			break;
1622		case 100:
1623			ifmr->ifm_active |= IFM_100_TX;
1624			break;
1625		case 1000:
1626			ifmr->ifm_active |= IFM_1000_T;
1627			break;
1628		}
1629		if (adapter->link_duplex == FULL_DUPLEX)
1630			ifmr->ifm_active |= IFM_FDX;
1631		else
1632			ifmr->ifm_active |= IFM_HDX;
1633	}
1634	IGB_CORE_UNLOCK(adapter);
1635}
1636
1637/*********************************************************************
1638 *
1639 *  Media Ioctl callback
1640 *
1641 *  This routine is called when the user changes speed/duplex using
1642 *  media/mediopt option with ifconfig.
1643 *
1644 **********************************************************************/
1645static int
1646igb_media_change(struct ifnet *ifp)
1647{
1648	struct adapter *adapter = ifp->if_softc;
1649	struct ifmedia  *ifm = &adapter->media;
1650
1651	INIT_DEBUGOUT("igb_media_change: begin");
1652
1653	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1654		return (EINVAL);
1655
1656	IGB_CORE_LOCK(adapter);
1657	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1658	case IFM_AUTO:
1659		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1660		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1661		break;
1662	case IFM_1000_LX:
1663	case IFM_1000_SX:
1664	case IFM_1000_T:
1665		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1666		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1667		break;
1668	case IFM_100_TX:
1669		adapter->hw.mac.autoneg = FALSE;
1670		adapter->hw.phy.autoneg_advertised = 0;
1671		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1672			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1673		else
1674			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1675		break;
1676	case IFM_10_T:
1677		adapter->hw.mac.autoneg = FALSE;
1678		adapter->hw.phy.autoneg_advertised = 0;
1679		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1680			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1681		else
1682			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1683		break;
1684	default:
1685		device_printf(adapter->dev, "Unsupported media type\n");
1686	}
1687
1688	igb_init_locked(adapter);
1689	IGB_CORE_UNLOCK(adapter);
1690
1691	return (0);
1692}
1693
1694
1695/*********************************************************************
1696 *
1697 *  This routine maps the mbufs to Advanced TX descriptors.
1698 *
1699 **********************************************************************/
1700static int
1701igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1702{
1703	struct adapter		*adapter = txr->adapter;
1704	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1705	bus_dmamap_t		map;
1706	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1707	union e1000_adv_tx_desc	*txd = NULL;
1708	struct mbuf		*m_head = *m_headp;
1709	struct ether_vlan_header *eh = NULL;
1710	struct ip		*ip = NULL;
1711	struct tcphdr		*th = NULL;
1712	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1713	int			ehdrlen, poff;
1714	int			nsegs, i, first, last = 0;
1715	int			error, do_tso, remap = 1;
1716
1717	/* Set basic descriptor constants */
1718	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1719	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1720	if (m_head->m_flags & M_VLANTAG)
1721		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1722
1723retry:
1724	m_head = *m_headp;
1725	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1726	hdrlen = ehdrlen = poff = 0;
1727
1728	/*
1729	 * Intel recommends entire IP/TCP header length reside in a single
1730	 * buffer. If multiple descriptors are used to describe the IP and
1731	 * TCP header, each descriptor should describe one or more
1732	 * complete headers; descriptors referencing only parts of headers
1733	 * are not supported. If all layer headers are not coalesced into
1734	 * a single buffer, each buffer should not cross a 4KB boundary,
1735	 * or be larger than the maximum read request size.
1736	 * Controller also requires modifing IP/TCP header to make TSO work
1737	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1738	 * IP/TCP header into a single buffer to meet the requirement of
1739	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1740	 * which also has similiar restrictions.
1741	 */
1742	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1743		if (do_tso || (m_head->m_next != NULL &&
1744		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1745			if (M_WRITABLE(*m_headp) == 0) {
1746				m_head = m_dup(*m_headp, M_DONTWAIT);
1747				m_freem(*m_headp);
1748				if (m_head == NULL) {
1749					*m_headp = NULL;
1750					return (ENOBUFS);
1751				}
1752				*m_headp = m_head;
1753			}
1754		}
1755		/*
1756		 * Assume IPv4, we don't have TSO/checksum offload support
1757		 * for IPv6 yet.
1758		 */
1759		ehdrlen = sizeof(struct ether_header);
1760		m_head = m_pullup(m_head, ehdrlen);
1761		if (m_head == NULL) {
1762			*m_headp = NULL;
1763			return (ENOBUFS);
1764		}
1765		eh = mtod(m_head, struct ether_vlan_header *);
1766		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1767			ehdrlen = sizeof(struct ether_vlan_header);
1768			m_head = m_pullup(m_head, ehdrlen);
1769			if (m_head == NULL) {
1770				*m_headp = NULL;
1771				return (ENOBUFS);
1772			}
1773		}
1774		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1775		if (m_head == NULL) {
1776			*m_headp = NULL;
1777			return (ENOBUFS);
1778		}
1779		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1780		poff = ehdrlen + (ip->ip_hl << 2);
1781		if (do_tso) {
1782			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1783			if (m_head == NULL) {
1784				*m_headp = NULL;
1785				return (ENOBUFS);
1786			}
1787			/*
1788			 * The pseudo TCP checksum does not include TCP payload
1789			 * length so driver should recompute the checksum here
1790			 * what hardware expect to see. This is adherence of
1791			 * Microsoft's Large Send specification.
1792			 */
1793			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1794			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1795			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1796			/* Keep track of the full header length */
1797			hdrlen = poff + (th->th_off << 2);
1798		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1799			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1800			if (m_head == NULL) {
1801				*m_headp = NULL;
1802				return (ENOBUFS);
1803			}
1804			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1805			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1806			if (m_head == NULL) {
1807				*m_headp = NULL;
1808				return (ENOBUFS);
1809			}
1810			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1811			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1812		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1813			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1814			if (m_head == NULL) {
1815				*m_headp = NULL;
1816				return (ENOBUFS);
1817			}
1818			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1819		}
1820		*m_headp = m_head;
1821	}
1822
1823	/*
1824	 * Map the packet for DMA
1825	 *
1826	 * Capture the first descriptor index,
1827	 * this descriptor will have the index
1828	 * of the EOP which is the only one that
1829	 * now gets a DONE bit writeback.
1830	 */
1831	first = txr->next_avail_desc;
1832	tx_buffer = &txr->tx_buffers[first];
1833	tx_buffer_mapped = tx_buffer;
1834	map = tx_buffer->map;
1835
1836	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1837	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1838
1839	/*
1840	 * There are two types of errors we can (try) to handle:
1841	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1842	 *   out of segments.  Defragment the mbuf chain and try again.
1843	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1844	 *   at this point in time.  Defer sending and try again later.
1845	 * All other errors, in particular EINVAL, are fatal and prevent the
1846	 * mbuf chain from ever going through.  Drop it and report error.
1847	 */
1848	if (error == EFBIG && remap) {
1849		struct mbuf *m;
1850
1851		m = m_defrag(*m_headp, M_DONTWAIT);
1852		if (m == NULL) {
1853			adapter->mbuf_defrag_failed++;
1854			m_freem(*m_headp);
1855			*m_headp = NULL;
1856			return (ENOBUFS);
1857		}
1858		*m_headp = m;
1859
1860		/* Try it again, but only once */
1861		remap = 0;
1862		goto retry;
1863	} else if (error == ENOMEM) {
1864		adapter->no_tx_dma_setup++;
1865		return (error);
1866	} else if (error != 0) {
1867		adapter->no_tx_dma_setup++;
1868		m_freem(*m_headp);
1869		*m_headp = NULL;
1870		return (error);
1871	}
1872
1873	/*
1874	** Make sure we don't overrun the ring,
1875	** we need nsegs descriptors and one for
1876	** the context descriptor used for the
1877	** offloads.
1878	*/
1879        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1880                txr->no_desc_avail++;
1881		bus_dmamap_unload(txr->txtag, map);
1882		return (ENOBUFS);
1883        }
1884	m_head = *m_headp;
1885
1886	/* Do hardware assists:
1887         * Set up the context descriptor, used
1888         * when any hardware offload is done.
1889         * This includes CSUM, VLAN, and TSO.
1890         * It will use the first descriptor.
1891         */
1892
1893	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1894		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1895			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1896			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1897			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1898		} else
1899			return (ENXIO);
1900	} else if (igb_tx_ctx_setup(txr, m_head))
1901			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1902
1903	/* Calculate payload length */
1904	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1905	    << E1000_ADVTXD_PAYLEN_SHIFT);
1906
1907	/* 82575 needs the queue index added */
1908	if (adapter->hw.mac.type == e1000_82575)
1909		olinfo_status |= txr->me << 4;
1910
1911	/* Set up our transmit descriptors */
1912	i = txr->next_avail_desc;
1913	for (int j = 0; j < nsegs; j++) {
1914		bus_size_t seg_len;
1915		bus_addr_t seg_addr;
1916
1917		tx_buffer = &txr->tx_buffers[i];
1918		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1919		seg_addr = segs[j].ds_addr;
1920		seg_len  = segs[j].ds_len;
1921
1922		txd->read.buffer_addr = htole64(seg_addr);
1923		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1924		txd->read.olinfo_status = htole32(olinfo_status);
1925		last = i;
1926		if (++i == adapter->num_tx_desc)
1927			i = 0;
1928		tx_buffer->m_head = NULL;
1929		tx_buffer->next_eop = -1;
1930	}
1931
1932	txr->next_avail_desc = i;
1933	txr->tx_avail -= nsegs;
1934        tx_buffer->m_head = m_head;
1935
1936	/*
1937	** Here we swap the map so the last descriptor,
1938	** which gets the completion interrupt has the
1939	** real map, and the first descriptor gets the
1940	** unused map from this descriptor.
1941	*/
1942	tx_buffer_mapped->map = tx_buffer->map;
1943	tx_buffer->map = map;
1944        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1945
1946        /*
1947         * Last Descriptor of Packet
1948	 * needs End Of Packet (EOP)
1949	 * and Report Status (RS)
1950         */
1951        txd->read.cmd_type_len |=
1952	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1953	/*
1954	 * Keep track in the first buffer which
1955	 * descriptor will be written back
1956	 */
1957	tx_buffer = &txr->tx_buffers[first];
1958	tx_buffer->next_eop = last;
1959	/* Update the watchdog time early and often */
1960	txr->watchdog_time = ticks;
1961
1962	/*
1963	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1964	 * that this frame is available to transmit.
1965	 */
1966	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1967	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1968	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1969	++txr->tx_packets;
1970
1971	return (0);
1972}
1973static void
1974igb_set_promisc(struct adapter *adapter)
1975{
1976	struct ifnet	*ifp = adapter->ifp;
1977	struct e1000_hw *hw = &adapter->hw;
1978	u32		reg;
1979
1980	if (adapter->vf_ifp) {
1981		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1982		return;
1983	}
1984
1985	reg = E1000_READ_REG(hw, E1000_RCTL);
1986	if (ifp->if_flags & IFF_PROMISC) {
1987		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1988		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1989	} else if (ifp->if_flags & IFF_ALLMULTI) {
1990		reg |= E1000_RCTL_MPE;
1991		reg &= ~E1000_RCTL_UPE;
1992		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1993	}
1994}
1995
1996static void
1997igb_disable_promisc(struct adapter *adapter)
1998{
1999	struct e1000_hw *hw = &adapter->hw;
2000	u32		reg;
2001
2002	if (adapter->vf_ifp) {
2003		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2004		return;
2005	}
2006	reg = E1000_READ_REG(hw, E1000_RCTL);
2007	reg &=  (~E1000_RCTL_UPE);
2008	reg &=  (~E1000_RCTL_MPE);
2009	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2010}
2011
2012
2013/*********************************************************************
2014 *  Multicast Update
2015 *
2016 *  This routine is called whenever multicast address list is updated.
2017 *
2018 **********************************************************************/
2019
2020static void
2021igb_set_multi(struct adapter *adapter)
2022{
2023	struct ifnet	*ifp = adapter->ifp;
2024	struct ifmultiaddr *ifma;
2025	u32 reg_rctl = 0;
2026	u8  *mta;
2027
2028	int mcnt = 0;
2029
2030	IOCTL_DEBUGOUT("igb_set_multi: begin");
2031
2032	mta = adapter->mta;
2033	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2034	    MAX_NUM_MULTICAST_ADDRESSES);
2035
2036#if __FreeBSD_version < 800000
2037	IF_ADDR_LOCK(ifp);
2038#else
2039	if_maddr_rlock(ifp);
2040#endif
2041	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2042		if (ifma->ifma_addr->sa_family != AF_LINK)
2043			continue;
2044
2045		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2046			break;
2047
2048		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2049		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2050		mcnt++;
2051	}
2052#if __FreeBSD_version < 800000
2053	IF_ADDR_UNLOCK(ifp);
2054#else
2055	if_maddr_runlock(ifp);
2056#endif
2057
2058	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2059		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2060		reg_rctl |= E1000_RCTL_MPE;
2061		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2062	} else
2063		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2064}
2065
2066
2067/*********************************************************************
2068 *  Timer routine:
2069 *  	This routine checks for link status,
2070 *	updates statistics, and does the watchdog.
2071 *
2072 **********************************************************************/
2073
2074static void
2075igb_local_timer(void *arg)
2076{
2077	struct adapter		*adapter = arg;
2078	device_t		dev = adapter->dev;
2079	struct ifnet		*ifp = adapter->ifp;
2080	struct tx_ring		*txr = adapter->tx_rings;
2081	struct igb_queue	*que = adapter->queues;
2082	int			hung = 0, busy = 0;
2083
2084
2085	IGB_CORE_LOCK_ASSERT(adapter);
2086
2087	igb_update_link_status(adapter);
2088	igb_update_stats_counters(adapter);
2089
2090        /*
2091        ** Check the TX queues status
2092	**	- central locked handling of OACTIVE
2093	**	- watchdog only if all queues show hung
2094        */
2095	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2096		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2097		    (adapter->pause_frames == 0))
2098			++hung;
2099		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2100			++busy;
2101		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2102			taskqueue_enqueue(que->tq, &que->que_task);
2103	}
2104	if (hung == adapter->num_queues)
2105		goto timeout;
2106	if (busy == adapter->num_queues)
2107		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2108	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2109	    (busy < adapter->num_queues))
2110		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2111
2112	adapter->pause_frames = 0;
2113	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2114#ifndef DEVICE_POLLING
2115	/* Schedule all queue interrupts - deadlock protection */
2116	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2117#endif
2118	return;
2119
2120timeout:
2121	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2122	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2123            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2124            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2125	device_printf(dev,"TX(%d) desc avail = %d,"
2126            "Next TX to Clean = %d\n",
2127            txr->me, txr->tx_avail, txr->next_to_clean);
2128	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2129	adapter->watchdog_events++;
2130	igb_init_locked(adapter);
2131}
2132
2133static void
2134igb_update_link_status(struct adapter *adapter)
2135{
2136	struct e1000_hw *hw = &adapter->hw;
2137	struct ifnet *ifp = adapter->ifp;
2138	device_t dev = adapter->dev;
2139	struct tx_ring *txr = adapter->tx_rings;
2140	u32 link_check, thstat, ctrl;
2141
2142	link_check = thstat = ctrl = 0;
2143
2144	/* Get the cached link value or read for real */
2145        switch (hw->phy.media_type) {
2146        case e1000_media_type_copper:
2147                if (hw->mac.get_link_status) {
2148			/* Do the work to read phy */
2149                        e1000_check_for_link(hw);
2150                        link_check = !hw->mac.get_link_status;
2151                } else
2152                        link_check = TRUE;
2153                break;
2154        case e1000_media_type_fiber:
2155                e1000_check_for_link(hw);
2156                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2157                                 E1000_STATUS_LU);
2158                break;
2159        case e1000_media_type_internal_serdes:
2160                e1000_check_for_link(hw);
2161                link_check = adapter->hw.mac.serdes_has_link;
2162                break;
2163	/* VF device is type_unknown */
2164        case e1000_media_type_unknown:
2165                e1000_check_for_link(hw);
2166		link_check = !hw->mac.get_link_status;
2167		/* Fall thru */
2168        default:
2169                break;
2170        }
2171
2172	/* Check for thermal downshift or shutdown */
2173	if (hw->mac.type == e1000_i350) {
2174		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2175		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2176	}
2177
2178	/* Now we check if a transition has happened */
2179	if (link_check && (adapter->link_active == 0)) {
2180		e1000_get_speed_and_duplex(&adapter->hw,
2181		    &adapter->link_speed, &adapter->link_duplex);
2182		if (bootverbose)
2183			device_printf(dev, "Link is up %d Mbps %s\n",
2184			    adapter->link_speed,
2185			    ((adapter->link_duplex == FULL_DUPLEX) ?
2186			    "Full Duplex" : "Half Duplex"));
2187		adapter->link_active = 1;
2188		ifp->if_baudrate = adapter->link_speed * 1000000;
2189		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2190		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2191			device_printf(dev, "Link: thermal downshift\n");
2192		/* This can sleep */
2193		if_link_state_change(ifp, LINK_STATE_UP);
2194	} else if (!link_check && (adapter->link_active == 1)) {
2195		ifp->if_baudrate = adapter->link_speed = 0;
2196		adapter->link_duplex = 0;
2197		if (bootverbose)
2198			device_printf(dev, "Link is Down\n");
2199		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2200		    (thstat & E1000_THSTAT_PWR_DOWN))
2201			device_printf(dev, "Link: thermal shutdown\n");
2202		adapter->link_active = 0;
2203		/* This can sleep */
2204		if_link_state_change(ifp, LINK_STATE_DOWN);
2205		/* Reset queue state */
2206		for (int i = 0; i < adapter->num_queues; i++, txr++)
2207			txr->queue_status = IGB_QUEUE_IDLE;
2208	}
2209}
2210
2211/*********************************************************************
2212 *
2213 *  This routine disables all traffic on the adapter by issuing a
2214 *  global reset on the MAC and deallocates TX/RX buffers.
2215 *
2216 **********************************************************************/
2217
2218static void
2219igb_stop(void *arg)
2220{
2221	struct adapter	*adapter = arg;
2222	struct ifnet	*ifp = adapter->ifp;
2223	struct tx_ring *txr = adapter->tx_rings;
2224
2225	IGB_CORE_LOCK_ASSERT(adapter);
2226
2227	INIT_DEBUGOUT("igb_stop: begin");
2228
2229	igb_disable_intr(adapter);
2230
2231	callout_stop(&adapter->timer);
2232
2233	/* Tell the stack that the interface is no longer active */
2234	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2235	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2236
2237	/* Disarm watchdog timer. */
2238	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2239		IGB_TX_LOCK(txr);
2240		txr->queue_status = IGB_QUEUE_IDLE;
2241		IGB_TX_UNLOCK(txr);
2242	}
2243
2244	e1000_reset_hw(&adapter->hw);
2245	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2246
2247	e1000_led_off(&adapter->hw);
2248	e1000_cleanup_led(&adapter->hw);
2249}
2250
2251
2252/*********************************************************************
2253 *
2254 *  Determine hardware revision.
2255 *
2256 **********************************************************************/
2257static void
2258igb_identify_hardware(struct adapter *adapter)
2259{
2260	device_t dev = adapter->dev;
2261
2262	/* Make sure our PCI config space has the necessary stuff set */
2263	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2264	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2265	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2266		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2267		    "bits were not set!\n");
2268		adapter->hw.bus.pci_cmd_word |=
2269		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2270		pci_write_config(dev, PCIR_COMMAND,
2271		    adapter->hw.bus.pci_cmd_word, 2);
2272	}
2273
2274	/* Save off the information about this board */
2275	adapter->hw.vendor_id = pci_get_vendor(dev);
2276	adapter->hw.device_id = pci_get_device(dev);
2277	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2278	adapter->hw.subsystem_vendor_id =
2279	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2280	adapter->hw.subsystem_device_id =
2281	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2282
2283	/* Set MAC type early for PCI setup */
2284	e1000_set_mac_type(&adapter->hw);
2285
2286	/* Are we a VF device? */
2287	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2288	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2289		adapter->vf_ifp = 1;
2290	else
2291		adapter->vf_ifp = 0;
2292}
2293
2294static int
2295igb_allocate_pci_resources(struct adapter *adapter)
2296{
2297	device_t	dev = adapter->dev;
2298	int		rid;
2299
2300	rid = PCIR_BAR(0);
2301	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2302	    &rid, RF_ACTIVE);
2303	if (adapter->pci_mem == NULL) {
2304		device_printf(dev, "Unable to allocate bus resource: memory\n");
2305		return (ENXIO);
2306	}
2307	adapter->osdep.mem_bus_space_tag =
2308	    rman_get_bustag(adapter->pci_mem);
2309	adapter->osdep.mem_bus_space_handle =
2310	    rman_get_bushandle(adapter->pci_mem);
2311	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2312
2313	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2314
2315	/* This will setup either MSI/X or MSI */
2316	adapter->msix = igb_setup_msix(adapter);
2317	adapter->hw.back = &adapter->osdep;
2318
2319	return (0);
2320}
2321
2322/*********************************************************************
2323 *
2324 *  Setup the Legacy or MSI Interrupt handler
2325 *
2326 **********************************************************************/
2327static int
2328igb_allocate_legacy(struct adapter *adapter)
2329{
2330	device_t		dev = adapter->dev;
2331	struct igb_queue	*que = adapter->queues;
2332	int			error, rid = 0;
2333
2334	/* Turn off all interrupts */
2335	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2336
2337	/* MSI RID is 1 */
2338	if (adapter->msix == 1)
2339		rid = 1;
2340
2341	/* We allocate a single interrupt resource */
2342	adapter->res = bus_alloc_resource_any(dev,
2343	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2344	if (adapter->res == NULL) {
2345		device_printf(dev, "Unable to allocate bus resource: "
2346		    "interrupt\n");
2347		return (ENXIO);
2348	}
2349
2350	/*
2351	 * Try allocating a fast interrupt and the associated deferred
2352	 * processing contexts.
2353	 */
2354	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2355	/* Make tasklet for deferred link handling */
2356	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2357	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2358	    taskqueue_thread_enqueue, &que->tq);
2359	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2360	    device_get_nameunit(adapter->dev));
2361	if ((error = bus_setup_intr(dev, adapter->res,
2362	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2363	    adapter, &adapter->tag)) != 0) {
2364		device_printf(dev, "Failed to register fast interrupt "
2365			    "handler: %d\n", error);
2366		taskqueue_free(que->tq);
2367		que->tq = NULL;
2368		return (error);
2369	}
2370
2371	return (0);
2372}
2373
2374
2375/*********************************************************************
2376 *
2377 *  Setup the MSIX Queue Interrupt handlers:
2378 *
2379 **********************************************************************/
2380static int
2381igb_allocate_msix(struct adapter *adapter)
2382{
2383	device_t		dev = adapter->dev;
2384	struct igb_queue	*que = adapter->queues;
2385	int			error, rid, vector = 0;
2386
2387	/* Be sure to start with all interrupts disabled */
2388	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2389	E1000_WRITE_FLUSH(&adapter->hw);
2390
2391	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2392		rid = vector +1;
2393		que->res = bus_alloc_resource_any(dev,
2394		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2395		if (que->res == NULL) {
2396			device_printf(dev,
2397			    "Unable to allocate bus resource: "
2398			    "MSIX Queue Interrupt\n");
2399			return (ENXIO);
2400		}
2401		error = bus_setup_intr(dev, que->res,
2402	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2403		    igb_msix_que, que, &que->tag);
2404		if (error) {
2405			que->res = NULL;
2406			device_printf(dev, "Failed to register Queue handler");
2407			return (error);
2408		}
2409#if __FreeBSD_version >= 800504
2410		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2411#endif
2412		que->msix = vector;
2413		if (adapter->hw.mac.type == e1000_82575)
2414			que->eims = E1000_EICR_TX_QUEUE0 << i;
2415		else
2416			que->eims = 1 << vector;
2417		/*
2418		** Bind the msix vector, and thus the
2419		** rings to the corresponding cpu.
2420		*/
2421		if (adapter->num_queues > 1)
2422			bus_bind_intr(dev, que->res, i);
2423		/* Make tasklet for deferred handling */
2424		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2425		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2426		    taskqueue_thread_enqueue, &que->tq);
2427		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2428		    device_get_nameunit(adapter->dev));
2429	}
2430
2431	/* And Link */
2432	rid = vector + 1;
2433	adapter->res = bus_alloc_resource_any(dev,
2434	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2435	if (adapter->res == NULL) {
2436		device_printf(dev,
2437		    "Unable to allocate bus resource: "
2438		    "MSIX Link Interrupt\n");
2439		return (ENXIO);
2440	}
2441	if ((error = bus_setup_intr(dev, adapter->res,
2442	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2443	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2444		device_printf(dev, "Failed to register Link handler");
2445		return (error);
2446	}
2447#if __FreeBSD_version >= 800504
2448	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2449#endif
2450	adapter->linkvec = vector;
2451
2452	return (0);
2453}
2454
2455
2456static void
2457igb_configure_queues(struct adapter *adapter)
2458{
2459	struct	e1000_hw	*hw = &adapter->hw;
2460	struct	igb_queue	*que;
2461	u32			tmp, ivar = 0, newitr = 0;
2462
2463	/* First turn on RSS capability */
2464	if (adapter->hw.mac.type != e1000_82575)
2465		E1000_WRITE_REG(hw, E1000_GPIE,
2466		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2467		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2468
2469	/* Turn on MSIX */
2470	switch (adapter->hw.mac.type) {
2471	case e1000_82580:
2472	case e1000_i350:
2473	case e1000_vfadapt:
2474	case e1000_vfadapt_i350:
2475		/* RX entries */
2476		for (int i = 0; i < adapter->num_queues; i++) {
2477			u32 index = i >> 1;
2478			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2479			que = &adapter->queues[i];
2480			if (i & 1) {
2481				ivar &= 0xFF00FFFF;
2482				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2483			} else {
2484				ivar &= 0xFFFFFF00;
2485				ivar |= que->msix | E1000_IVAR_VALID;
2486			}
2487			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2488		}
2489		/* TX entries */
2490		for (int i = 0; i < adapter->num_queues; i++) {
2491			u32 index = i >> 1;
2492			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2493			que = &adapter->queues[i];
2494			if (i & 1) {
2495				ivar &= 0x00FFFFFF;
2496				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2497			} else {
2498				ivar &= 0xFFFF00FF;
2499				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2500			}
2501			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2502			adapter->que_mask |= que->eims;
2503		}
2504
2505		/* And for the link interrupt */
2506		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2507		adapter->link_mask = 1 << adapter->linkvec;
2508		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2509		break;
2510	case e1000_82576:
2511		/* RX entries */
2512		for (int i = 0; i < adapter->num_queues; i++) {
2513			u32 index = i & 0x7; /* Each IVAR has two entries */
2514			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2515			que = &adapter->queues[i];
2516			if (i < 8) {
2517				ivar &= 0xFFFFFF00;
2518				ivar |= que->msix | E1000_IVAR_VALID;
2519			} else {
2520				ivar &= 0xFF00FFFF;
2521				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2522			}
2523			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2524			adapter->que_mask |= que->eims;
2525		}
2526		/* TX entries */
2527		for (int i = 0; i < adapter->num_queues; i++) {
2528			u32 index = i & 0x7; /* Each IVAR has two entries */
2529			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2530			que = &adapter->queues[i];
2531			if (i < 8) {
2532				ivar &= 0xFFFF00FF;
2533				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2534			} else {
2535				ivar &= 0x00FFFFFF;
2536				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2537			}
2538			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2539			adapter->que_mask |= que->eims;
2540		}
2541
2542		/* And for the link interrupt */
2543		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2544		adapter->link_mask = 1 << adapter->linkvec;
2545		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2546		break;
2547
2548	case e1000_82575:
2549                /* enable MSI-X support*/
2550		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2551                tmp |= E1000_CTRL_EXT_PBA_CLR;
2552                /* Auto-Mask interrupts upon ICR read. */
2553                tmp |= E1000_CTRL_EXT_EIAME;
2554                tmp |= E1000_CTRL_EXT_IRCA;
2555                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2556
2557		/* Queues */
2558		for (int i = 0; i < adapter->num_queues; i++) {
2559			que = &adapter->queues[i];
2560			tmp = E1000_EICR_RX_QUEUE0 << i;
2561			tmp |= E1000_EICR_TX_QUEUE0 << i;
2562			que->eims = tmp;
2563			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2564			    i, que->eims);
2565			adapter->que_mask |= que->eims;
2566		}
2567
2568		/* Link */
2569		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2570		    E1000_EIMS_OTHER);
2571		adapter->link_mask |= E1000_EIMS_OTHER;
2572	default:
2573		break;
2574	}
2575
2576	/* Set the starting interrupt rate */
2577	if (igb_max_interrupt_rate > 0)
2578		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2579
2580        if (hw->mac.type == e1000_82575)
2581                newitr |= newitr << 16;
2582        else
2583                newitr |= E1000_EITR_CNT_IGNR;
2584
2585	for (int i = 0; i < adapter->num_queues; i++) {
2586		que = &adapter->queues[i];
2587		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2588	}
2589
2590	return;
2591}
2592
2593
2594static void
2595igb_free_pci_resources(struct adapter *adapter)
2596{
2597	struct		igb_queue *que = adapter->queues;
2598	device_t	dev = adapter->dev;
2599	int		rid;
2600
2601	/*
2602	** There is a slight possibility of a failure mode
2603	** in attach that will result in entering this function
2604	** before interrupt resources have been initialized, and
2605	** in that case we do not want to execute the loops below
2606	** We can detect this reliably by the state of the adapter
2607	** res pointer.
2608	*/
2609	if (adapter->res == NULL)
2610		goto mem;
2611
2612	/*
2613	 * First release all the interrupt resources:
2614	 */
2615	for (int i = 0; i < adapter->num_queues; i++, que++) {
2616		rid = que->msix + 1;
2617		if (que->tag != NULL) {
2618			bus_teardown_intr(dev, que->res, que->tag);
2619			que->tag = NULL;
2620		}
2621		if (que->res != NULL)
2622			bus_release_resource(dev,
2623			    SYS_RES_IRQ, rid, que->res);
2624	}
2625
2626	/* Clean the Legacy or Link interrupt last */
2627	if (adapter->linkvec) /* we are doing MSIX */
2628		rid = adapter->linkvec + 1;
2629	else
2630		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2631
2632	if (adapter->tag != NULL) {
2633		bus_teardown_intr(dev, adapter->res, adapter->tag);
2634		adapter->tag = NULL;
2635	}
2636	if (adapter->res != NULL)
2637		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2638
2639mem:
2640	if (adapter->msix)
2641		pci_release_msi(dev);
2642
2643	if (adapter->msix_mem != NULL)
2644		bus_release_resource(dev, SYS_RES_MEMORY,
2645		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2646
2647	if (adapter->pci_mem != NULL)
2648		bus_release_resource(dev, SYS_RES_MEMORY,
2649		    PCIR_BAR(0), adapter->pci_mem);
2650
2651}
2652
2653/*
2654 * Setup Either MSI/X or MSI
2655 */
2656static int
2657igb_setup_msix(struct adapter *adapter)
2658{
2659	device_t dev = adapter->dev;
2660	int rid, want, queues, msgs;
2661
2662	/* tuneable override */
2663	if (igb_enable_msix == 0)
2664		goto msi;
2665
2666	/* First try MSI/X */
2667	rid = PCIR_BAR(IGB_MSIX_BAR);
2668	adapter->msix_mem = bus_alloc_resource_any(dev,
2669	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2670       	if (!adapter->msix_mem) {
2671		/* May not be enabled */
2672		device_printf(adapter->dev,
2673		    "Unable to map MSIX table \n");
2674		goto msi;
2675	}
2676
2677	msgs = pci_msix_count(dev);
2678	if (msgs == 0) { /* system has msix disabled */
2679		bus_release_resource(dev, SYS_RES_MEMORY,
2680		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2681		adapter->msix_mem = NULL;
2682		goto msi;
2683	}
2684
2685	/* Figure out a reasonable auto config value */
2686	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2687
2688	/* Manual override */
2689	if (igb_num_queues != 0)
2690		queues = igb_num_queues;
2691	if (queues > 8)  /* max queues */
2692		queues = 8;
2693
2694	/* Can have max of 4 queues on 82575 */
2695	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2696		queues = 4;
2697
2698	/* Limit the VF devices to one queue */
2699	if (adapter->vf_ifp)
2700		queues = 1;
2701
2702	/*
2703	** One vector (RX/TX pair) per queue
2704	** plus an additional for Link interrupt
2705	*/
2706	want = queues + 1;
2707	if (msgs >= want)
2708		msgs = want;
2709	else {
2710               	device_printf(adapter->dev,
2711		    "MSIX Configuration Problem, "
2712		    "%d vectors configured, but %d queues wanted!\n",
2713		    msgs, want);
2714		return (ENXIO);
2715	}
2716	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2717               	device_printf(adapter->dev,
2718		    "Using MSIX interrupts with %d vectors\n", msgs);
2719		adapter->num_queues = queues;
2720		return (msgs);
2721	}
2722msi:
2723       	msgs = pci_msi_count(dev);
2724       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2725               	device_printf(adapter->dev,"Using MSI interrupt\n");
2726	return (msgs);
2727}
2728
2729/*********************************************************************
2730 *
2731 *  Set up an fresh starting state
2732 *
2733 **********************************************************************/
2734static void
2735igb_reset(struct adapter *adapter)
2736{
2737	device_t	dev = adapter->dev;
2738	struct e1000_hw *hw = &adapter->hw;
2739	struct e1000_fc_info *fc = &hw->fc;
2740	struct ifnet	*ifp = adapter->ifp;
2741	u32		pba = 0;
2742	u16		hwm;
2743
2744	INIT_DEBUGOUT("igb_reset: begin");
2745
2746	/* Let the firmware know the OS is in control */
2747	igb_get_hw_control(adapter);
2748
2749	/*
2750	 * Packet Buffer Allocation (PBA)
2751	 * Writing PBA sets the receive portion of the buffer
2752	 * the remainder is used for the transmit buffer.
2753	 */
2754	switch (hw->mac.type) {
2755	case e1000_82575:
2756		pba = E1000_PBA_32K;
2757		break;
2758	case e1000_82576:
2759	case e1000_vfadapt:
2760		pba = E1000_READ_REG(hw, E1000_RXPBS);
2761		pba &= E1000_RXPBS_SIZE_MASK_82576;
2762		break;
2763	case e1000_82580:
2764	case e1000_i350:
2765	case e1000_vfadapt_i350:
2766		pba = E1000_READ_REG(hw, E1000_RXPBS);
2767		pba = e1000_rxpbs_adjust_82580(pba);
2768		break;
2769	default:
2770		break;
2771	}
2772
2773	/* Special needs in case of Jumbo frames */
2774	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2775		u32 tx_space, min_tx, min_rx;
2776		pba = E1000_READ_REG(hw, E1000_PBA);
2777		tx_space = pba >> 16;
2778		pba &= 0xffff;
2779		min_tx = (adapter->max_frame_size +
2780		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2781		min_tx = roundup2(min_tx, 1024);
2782		min_tx >>= 10;
2783                min_rx = adapter->max_frame_size;
2784                min_rx = roundup2(min_rx, 1024);
2785                min_rx >>= 10;
2786		if (tx_space < min_tx &&
2787		    ((min_tx - tx_space) < pba)) {
2788			pba = pba - (min_tx - tx_space);
2789			/*
2790                         * if short on rx space, rx wins
2791                         * and must trump tx adjustment
2792			 */
2793                        if (pba < min_rx)
2794                                pba = min_rx;
2795		}
2796		E1000_WRITE_REG(hw, E1000_PBA, pba);
2797	}
2798
2799	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2800
2801	/*
2802	 * These parameters control the automatic generation (Tx) and
2803	 * response (Rx) to Ethernet PAUSE frames.
2804	 * - High water mark should allow for at least two frames to be
2805	 *   received after sending an XOFF.
2806	 * - Low water mark works best when it is very near the high water mark.
2807	 *   This allows the receiver to restart by sending XON when it has
2808	 *   drained a bit.
2809	 */
2810	hwm = min(((pba << 10) * 9 / 10),
2811	    ((pba << 10) - 2 * adapter->max_frame_size));
2812
2813	if (hw->mac.type < e1000_82576) {
2814		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2815		fc->low_water = fc->high_water - 8;
2816	} else {
2817		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2818		fc->low_water = fc->high_water - 16;
2819	}
2820
2821	fc->pause_time = IGB_FC_PAUSE_TIME;
2822	fc->send_xon = TRUE;
2823	if (adapter->fc)
2824		fc->requested_mode = adapter->fc;
2825	else
2826		fc->requested_mode = e1000_fc_default;
2827
2828	/* Issue a global reset */
2829	e1000_reset_hw(hw);
2830	E1000_WRITE_REG(hw, E1000_WUC, 0);
2831
2832	if (e1000_init_hw(hw) < 0)
2833		device_printf(dev, "Hardware Initialization Failed\n");
2834
2835	/* Setup DMA Coalescing */
2836	if (hw->mac.type == e1000_i350) {
2837		u32 reg = ~E1000_DMACR_DMAC_EN;
2838
2839		if (adapter->dmac == 0) { /* Disabling it */
2840			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2841			goto reset_out;
2842		}
2843
2844		hwm = (pba - 4) << 10;
2845		reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2846		    & E1000_DMACR_DMACTHR_MASK);
2847
2848		/* transition to L0x or L1 if available..*/
2849		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2850
2851		/* timer = value in adapter->dmac in 32usec intervals */
2852		reg |= (adapter->dmac >> 5);
2853		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2854
2855		/* No lower threshold */
2856		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2857
2858		/* set hwm to PBA -  2 * max frame size */
2859		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2860
2861		/* Set the interval before transition */
2862		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2863		reg |= 0x800000FF; /* 255 usec */
2864		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2865
2866		/* free space in tx packet buffer to wake from DMA coal */
2867		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2868		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2869
2870		/* make low power state decision controlled by DMA coal */
2871		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2872		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2873		    reg | E1000_PCIEMISC_LX_DECISION);
2874		device_printf(dev, "DMA Coalescing enabled\n");
2875	}
2876
2877reset_out:
2878	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2879	e1000_get_phy_info(hw);
2880	e1000_check_for_link(hw);
2881	return;
2882}
2883
2884/*********************************************************************
2885 *
2886 *  Setup networking device structure and register an interface.
2887 *
2888 **********************************************************************/
2889static int
2890igb_setup_interface(device_t dev, struct adapter *adapter)
2891{
2892	struct ifnet   *ifp;
2893
2894	INIT_DEBUGOUT("igb_setup_interface: begin");
2895
2896	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2897	if (ifp == NULL) {
2898		device_printf(dev, "can not allocate ifnet structure\n");
2899		return (-1);
2900	}
2901	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2902	ifp->if_init =  igb_init;
2903	ifp->if_softc = adapter;
2904	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2905	ifp->if_ioctl = igb_ioctl;
2906	ifp->if_start = igb_start;
2907#if __FreeBSD_version >= 800000
2908	ifp->if_transmit = igb_mq_start;
2909	ifp->if_qflush = igb_qflush;
2910#endif
2911	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2912	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2913	IFQ_SET_READY(&ifp->if_snd);
2914
2915	ether_ifattach(ifp, adapter->hw.mac.addr);
2916
2917	ifp->if_capabilities = ifp->if_capenable = 0;
2918
2919	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2920	ifp->if_capabilities |= IFCAP_TSO4;
2921	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2922	ifp->if_capenable = ifp->if_capabilities;
2923
2924	/* Don't enable LRO by default */
2925	ifp->if_capabilities |= IFCAP_LRO;
2926
2927#ifdef DEVICE_POLLING
2928	ifp->if_capabilities |= IFCAP_POLLING;
2929#endif
2930
2931	/*
2932	 * Tell the upper layer(s) we
2933	 * support full VLAN capability.
2934	 */
2935	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2936	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2937			     |  IFCAP_VLAN_HWTSO
2938			     |  IFCAP_VLAN_MTU;
2939	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
2940			  |  IFCAP_VLAN_HWTSO
2941			  |  IFCAP_VLAN_MTU;
2942
2943	/*
2944	** Don't turn this on by default, if vlans are
2945	** created on another pseudo device (eg. lagg)
2946	** then vlan events are not passed thru, breaking
2947	** operation, but with HW FILTER off it works. If
2948	** using vlans directly on the igb driver you can
2949	** enable this and get full hardware tag filtering.
2950	*/
2951	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2952
2953	/*
2954	 * Specify the media types supported by this adapter and register
2955	 * callbacks to update media and link information
2956	 */
2957	ifmedia_init(&adapter->media, IFM_IMASK,
2958	    igb_media_change, igb_media_status);
2959	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2960	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2961		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2962			    0, NULL);
2963		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2964	} else {
2965		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2966		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2967			    0, NULL);
2968		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2969			    0, NULL);
2970		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2971			    0, NULL);
2972		if (adapter->hw.phy.type != e1000_phy_ife) {
2973			ifmedia_add(&adapter->media,
2974				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2975			ifmedia_add(&adapter->media,
2976				IFM_ETHER | IFM_1000_T, 0, NULL);
2977		}
2978	}
2979	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2980	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2981	return (0);
2982}
2983
2984
2985/*
2986 * Manage DMA'able memory.
2987 */
2988static void
2989igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2990{
2991	if (error)
2992		return;
2993	*(bus_addr_t *) arg = segs[0].ds_addr;
2994}
2995
2996static int
2997igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2998        struct igb_dma_alloc *dma, int mapflags)
2999{
3000	int error;
3001
3002	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3003				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3004				BUS_SPACE_MAXADDR,	/* lowaddr */
3005				BUS_SPACE_MAXADDR,	/* highaddr */
3006				NULL, NULL,		/* filter, filterarg */
3007				size,			/* maxsize */
3008				1,			/* nsegments */
3009				size,			/* maxsegsize */
3010				0,			/* flags */
3011				NULL,			/* lockfunc */
3012				NULL,			/* lockarg */
3013				&dma->dma_tag);
3014	if (error) {
3015		device_printf(adapter->dev,
3016		    "%s: bus_dma_tag_create failed: %d\n",
3017		    __func__, error);
3018		goto fail_0;
3019	}
3020
3021	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3022	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3023	if (error) {
3024		device_printf(adapter->dev,
3025		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3026		    __func__, (uintmax_t)size, error);
3027		goto fail_2;
3028	}
3029
3030	dma->dma_paddr = 0;
3031	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3032	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3033	if (error || dma->dma_paddr == 0) {
3034		device_printf(adapter->dev,
3035		    "%s: bus_dmamap_load failed: %d\n",
3036		    __func__, error);
3037		goto fail_3;
3038	}
3039
3040	return (0);
3041
3042fail_3:
3043	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3044fail_2:
3045	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3046	bus_dma_tag_destroy(dma->dma_tag);
3047fail_0:
3048	dma->dma_map = NULL;
3049	dma->dma_tag = NULL;
3050
3051	return (error);
3052}
3053
3054static void
3055igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3056{
3057	if (dma->dma_tag == NULL)
3058		return;
3059	if (dma->dma_map != NULL) {
3060		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3061		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3062		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3063		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3064		dma->dma_map = NULL;
3065	}
3066	bus_dma_tag_destroy(dma->dma_tag);
3067	dma->dma_tag = NULL;
3068}
3069
3070
3071/*********************************************************************
3072 *
3073 *  Allocate memory for the transmit and receive rings, and then
3074 *  the descriptors associated with each, called only once at attach.
3075 *
3076 **********************************************************************/
3077static int
3078igb_allocate_queues(struct adapter *adapter)
3079{
3080	device_t dev = adapter->dev;
3081	struct igb_queue	*que = NULL;
3082	struct tx_ring		*txr = NULL;
3083	struct rx_ring		*rxr = NULL;
3084	int rsize, tsize, error = E1000_SUCCESS;
3085	int txconf = 0, rxconf = 0;
3086
3087	/* First allocate the top level queue structs */
3088	if (!(adapter->queues =
3089	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3090	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3091		device_printf(dev, "Unable to allocate queue memory\n");
3092		error = ENOMEM;
3093		goto fail;
3094	}
3095
3096	/* Next allocate the TX ring struct memory */
3097	if (!(adapter->tx_rings =
3098	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3099	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3100		device_printf(dev, "Unable to allocate TX ring memory\n");
3101		error = ENOMEM;
3102		goto tx_fail;
3103	}
3104
3105	/* Now allocate the RX */
3106	if (!(adapter->rx_rings =
3107	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3108	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3109		device_printf(dev, "Unable to allocate RX ring memory\n");
3110		error = ENOMEM;
3111		goto rx_fail;
3112	}
3113
3114	tsize = roundup2(adapter->num_tx_desc *
3115	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3116	/*
3117	 * Now set up the TX queues, txconf is needed to handle the
3118	 * possibility that things fail midcourse and we need to
3119	 * undo memory gracefully
3120	 */
3121	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3122		/* Set up some basics */
3123		txr = &adapter->tx_rings[i];
3124		txr->adapter = adapter;
3125		txr->me = i;
3126
3127		/* Initialize the TX lock */
3128		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3129		    device_get_nameunit(dev), txr->me);
3130		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3131
3132		if (igb_dma_malloc(adapter, tsize,
3133			&txr->txdma, BUS_DMA_NOWAIT)) {
3134			device_printf(dev,
3135			    "Unable to allocate TX Descriptor memory\n");
3136			error = ENOMEM;
3137			goto err_tx_desc;
3138		}
3139		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3140		bzero((void *)txr->tx_base, tsize);
3141
3142        	/* Now allocate transmit buffers for the ring */
3143        	if (igb_allocate_transmit_buffers(txr)) {
3144			device_printf(dev,
3145			    "Critical Failure setting up transmit buffers\n");
3146			error = ENOMEM;
3147			goto err_tx_desc;
3148        	}
3149#if __FreeBSD_version >= 800000
3150		/* Allocate a buf ring */
3151		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3152		    M_WAITOK, &txr->tx_mtx);
3153#endif
3154	}
3155
3156	/*
3157	 * Next the RX queues...
3158	 */
3159	rsize = roundup2(adapter->num_rx_desc *
3160	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3161	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3162		rxr = &adapter->rx_rings[i];
3163		rxr->adapter = adapter;
3164		rxr->me = i;
3165
3166		/* Initialize the RX lock */
3167		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3168		    device_get_nameunit(dev), txr->me);
3169		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3170
3171		if (igb_dma_malloc(adapter, rsize,
3172			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3173			device_printf(dev,
3174			    "Unable to allocate RxDescriptor memory\n");
3175			error = ENOMEM;
3176			goto err_rx_desc;
3177		}
3178		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3179		bzero((void *)rxr->rx_base, rsize);
3180
3181        	/* Allocate receive buffers for the ring*/
3182		if (igb_allocate_receive_buffers(rxr)) {
3183			device_printf(dev,
3184			    "Critical Failure setting up receive buffers\n");
3185			error = ENOMEM;
3186			goto err_rx_desc;
3187		}
3188	}
3189
3190	/*
3191	** Finally set up the queue holding structs
3192	*/
3193	for (int i = 0; i < adapter->num_queues; i++) {
3194		que = &adapter->queues[i];
3195		que->adapter = adapter;
3196		que->txr = &adapter->tx_rings[i];
3197		que->rxr = &adapter->rx_rings[i];
3198	}
3199
3200	return (0);
3201
3202err_rx_desc:
3203	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3204		igb_dma_free(adapter, &rxr->rxdma);
3205err_tx_desc:
3206	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3207		igb_dma_free(adapter, &txr->txdma);
3208	free(adapter->rx_rings, M_DEVBUF);
3209rx_fail:
3210#if __FreeBSD_version >= 800000
3211	buf_ring_free(txr->br, M_DEVBUF);
3212#endif
3213	free(adapter->tx_rings, M_DEVBUF);
3214tx_fail:
3215	free(adapter->queues, M_DEVBUF);
3216fail:
3217	return (error);
3218}
3219
3220/*********************************************************************
3221 *
3222 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3223 *  the information needed to transmit a packet on the wire. This is
3224 *  called only once at attach, setup is done every reset.
3225 *
3226 **********************************************************************/
3227static int
3228igb_allocate_transmit_buffers(struct tx_ring *txr)
3229{
3230	struct adapter *adapter = txr->adapter;
3231	device_t dev = adapter->dev;
3232	struct igb_tx_buffer *txbuf;
3233	int error, i;
3234
3235	/*
3236	 * Setup DMA descriptor areas.
3237	 */
3238	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3239			       1, 0,			/* alignment, bounds */
3240			       BUS_SPACE_MAXADDR,	/* lowaddr */
3241			       BUS_SPACE_MAXADDR,	/* highaddr */
3242			       NULL, NULL,		/* filter, filterarg */
3243			       IGB_TSO_SIZE,		/* maxsize */
3244			       IGB_MAX_SCATTER,		/* nsegments */
3245			       PAGE_SIZE,		/* maxsegsize */
3246			       0,			/* flags */
3247			       NULL,			/* lockfunc */
3248			       NULL,			/* lockfuncarg */
3249			       &txr->txtag))) {
3250		device_printf(dev,"Unable to allocate TX DMA tag\n");
3251		goto fail;
3252	}
3253
3254	if (!(txr->tx_buffers =
3255	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3256	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3257		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3258		error = ENOMEM;
3259		goto fail;
3260	}
3261
3262        /* Create the descriptor buffer dma maps */
3263	txbuf = txr->tx_buffers;
3264	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3265		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3266		if (error != 0) {
3267			device_printf(dev, "Unable to create TX DMA map\n");
3268			goto fail;
3269		}
3270	}
3271
3272	return 0;
3273fail:
3274	/* We free all, it handles case where we are in the middle */
3275	igb_free_transmit_structures(adapter);
3276	return (error);
3277}
3278
3279/*********************************************************************
3280 *
3281 *  Initialize a transmit ring.
3282 *
3283 **********************************************************************/
3284static void
3285igb_setup_transmit_ring(struct tx_ring *txr)
3286{
3287	struct adapter *adapter = txr->adapter;
3288	struct igb_tx_buffer *txbuf;
3289	int i;
3290#ifdef DEV_NETMAP
3291	struct netmap_adapter *na = NA(adapter->ifp);
3292	struct netmap_slot *slot;
3293#endif /* DEV_NETMAP */
3294
3295	/* Clear the old descriptor contents */
3296	IGB_TX_LOCK(txr);
3297#ifdef DEV_NETMAP
3298	slot = netmap_reset(na, NR_TX, txr->me, 0);
3299#endif /* DEV_NETMAP */
3300	bzero((void *)txr->tx_base,
3301	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3302	/* Reset indices */
3303	txr->next_avail_desc = 0;
3304	txr->next_to_clean = 0;
3305
3306	/* Free any existing tx buffers. */
3307        txbuf = txr->tx_buffers;
3308	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3309		if (txbuf->m_head != NULL) {
3310			bus_dmamap_sync(txr->txtag, txbuf->map,
3311			    BUS_DMASYNC_POSTWRITE);
3312			bus_dmamap_unload(txr->txtag, txbuf->map);
3313			m_freem(txbuf->m_head);
3314			txbuf->m_head = NULL;
3315		}
3316#ifdef DEV_NETMAP
3317		if (slot) {
3318			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3319			/* no need to set the address */
3320			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3321		}
3322#endif /* DEV_NETMAP */
3323		/* clear the watch index */
3324		txbuf->next_eop = -1;
3325        }
3326
3327	/* Set number of descriptors available */
3328	txr->tx_avail = adapter->num_tx_desc;
3329
3330	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3331	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3332	IGB_TX_UNLOCK(txr);
3333}
3334
3335/*********************************************************************
3336 *
3337 *  Initialize all transmit rings.
3338 *
3339 **********************************************************************/
3340static void
3341igb_setup_transmit_structures(struct adapter *adapter)
3342{
3343	struct tx_ring *txr = adapter->tx_rings;
3344
3345	for (int i = 0; i < adapter->num_queues; i++, txr++)
3346		igb_setup_transmit_ring(txr);
3347
3348	return;
3349}
3350
3351/*********************************************************************
3352 *
3353 *  Enable transmit unit.
3354 *
3355 **********************************************************************/
3356static void
3357igb_initialize_transmit_units(struct adapter *adapter)
3358{
3359	struct tx_ring	*txr = adapter->tx_rings;
3360	struct e1000_hw *hw = &adapter->hw;
3361	u32		tctl, txdctl;
3362
3363	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3364	tctl = txdctl = 0;
3365
3366	/* Setup the Tx Descriptor Rings */
3367	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3368		u64 bus_addr = txr->txdma.dma_paddr;
3369
3370		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3371		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3372		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3373		    (uint32_t)(bus_addr >> 32));
3374		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3375		    (uint32_t)bus_addr);
3376
3377		/* Setup the HW Tx Head and Tail descriptor pointers */
3378		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3379		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3380
3381		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3382		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3383		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3384
3385		txr->queue_status = IGB_QUEUE_IDLE;
3386
3387		txdctl |= IGB_TX_PTHRESH;
3388		txdctl |= IGB_TX_HTHRESH << 8;
3389		txdctl |= IGB_TX_WTHRESH << 16;
3390		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3391		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3392	}
3393
3394	if (adapter->vf_ifp)
3395		return;
3396
3397	e1000_config_collision_dist(hw);
3398
3399	/* Program the Transmit Control Register */
3400	tctl = E1000_READ_REG(hw, E1000_TCTL);
3401	tctl &= ~E1000_TCTL_CT;
3402	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3403		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3404
3405	/* This write will effectively turn on the transmit unit. */
3406	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3407}
3408
3409/*********************************************************************
3410 *
3411 *  Free all transmit rings.
3412 *
3413 **********************************************************************/
3414static void
3415igb_free_transmit_structures(struct adapter *adapter)
3416{
3417	struct tx_ring *txr = adapter->tx_rings;
3418
3419	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3420		IGB_TX_LOCK(txr);
3421		igb_free_transmit_buffers(txr);
3422		igb_dma_free(adapter, &txr->txdma);
3423		IGB_TX_UNLOCK(txr);
3424		IGB_TX_LOCK_DESTROY(txr);
3425	}
3426	free(adapter->tx_rings, M_DEVBUF);
3427}
3428
3429/*********************************************************************
3430 *
3431 *  Free transmit ring related data structures.
3432 *
3433 **********************************************************************/
3434static void
3435igb_free_transmit_buffers(struct tx_ring *txr)
3436{
3437	struct adapter *adapter = txr->adapter;
3438	struct igb_tx_buffer *tx_buffer;
3439	int             i;
3440
3441	INIT_DEBUGOUT("free_transmit_ring: begin");
3442
3443	if (txr->tx_buffers == NULL)
3444		return;
3445
3446	tx_buffer = txr->tx_buffers;
3447	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3448		if (tx_buffer->m_head != NULL) {
3449			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3450			    BUS_DMASYNC_POSTWRITE);
3451			bus_dmamap_unload(txr->txtag,
3452			    tx_buffer->map);
3453			m_freem(tx_buffer->m_head);
3454			tx_buffer->m_head = NULL;
3455			if (tx_buffer->map != NULL) {
3456				bus_dmamap_destroy(txr->txtag,
3457				    tx_buffer->map);
3458				tx_buffer->map = NULL;
3459			}
3460		} else if (tx_buffer->map != NULL) {
3461			bus_dmamap_unload(txr->txtag,
3462			    tx_buffer->map);
3463			bus_dmamap_destroy(txr->txtag,
3464			    tx_buffer->map);
3465			tx_buffer->map = NULL;
3466		}
3467	}
3468#if __FreeBSD_version >= 800000
3469	if (txr->br != NULL)
3470		buf_ring_free(txr->br, M_DEVBUF);
3471#endif
3472	if (txr->tx_buffers != NULL) {
3473		free(txr->tx_buffers, M_DEVBUF);
3474		txr->tx_buffers = NULL;
3475	}
3476	if (txr->txtag != NULL) {
3477		bus_dma_tag_destroy(txr->txtag);
3478		txr->txtag = NULL;
3479	}
3480	return;
3481}
3482
3483/**********************************************************************
3484 *
3485 *  Setup work for hardware segmentation offload (TSO)
3486 *
3487 **********************************************************************/
3488static bool
3489igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3490	struct ip *ip, struct tcphdr *th)
3491{
3492	struct adapter *adapter = txr->adapter;
3493	struct e1000_adv_tx_context_desc *TXD;
3494	struct igb_tx_buffer        *tx_buffer;
3495	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3496	u32 mss_l4len_idx = 0;
3497	u16 vtag = 0;
3498	int ctxd, ip_hlen, tcp_hlen;
3499
3500	ctxd = txr->next_avail_desc;
3501	tx_buffer = &txr->tx_buffers[ctxd];
3502	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3503
3504	ip->ip_sum = 0;
3505	ip_hlen = ip->ip_hl << 2;
3506	tcp_hlen = th->th_off << 2;
3507
3508	/* VLAN MACLEN IPLEN */
3509	if (mp->m_flags & M_VLANTAG) {
3510		vtag = htole16(mp->m_pkthdr.ether_vtag);
3511		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3512	}
3513
3514	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3515	vlan_macip_lens |= ip_hlen;
3516	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3517
3518	/* ADV DTYPE TUCMD */
3519	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3520	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3521	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3522	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3523
3524	/* MSS L4LEN IDX */
3525	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3526	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3527	/* 82575 needs the queue index added */
3528	if (adapter->hw.mac.type == e1000_82575)
3529		mss_l4len_idx |= txr->me << 4;
3530	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3531
3532	TXD->seqnum_seed = htole32(0);
3533	tx_buffer->m_head = NULL;
3534	tx_buffer->next_eop = -1;
3535
3536	if (++ctxd == adapter->num_tx_desc)
3537		ctxd = 0;
3538
3539	txr->tx_avail--;
3540	txr->next_avail_desc = ctxd;
3541	return TRUE;
3542}
3543
3544
3545/*********************************************************************
3546 *
3547 *  Context Descriptor setup for VLAN or CSUM
3548 *
3549 **********************************************************************/
3550
3551static bool
3552igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3553{
3554	struct adapter *adapter = txr->adapter;
3555	struct e1000_adv_tx_context_desc *TXD;
3556	struct igb_tx_buffer        *tx_buffer;
3557	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3558	struct ether_vlan_header *eh;
3559	struct ip *ip = NULL;
3560	struct ip6_hdr *ip6;
3561	int  ehdrlen, ctxd, ip_hlen = 0;
3562	u16	etype, vtag = 0;
3563	u8	ipproto = 0;
3564	bool	offload = TRUE;
3565
3566	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3567		offload = FALSE;
3568
3569	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3570	ctxd = txr->next_avail_desc;
3571	tx_buffer = &txr->tx_buffers[ctxd];
3572	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3573
3574	/*
3575	** In advanced descriptors the vlan tag must
3576	** be placed into the context descriptor, thus
3577	** we need to be here just for that setup.
3578	*/
3579	if (mp->m_flags & M_VLANTAG) {
3580		vtag = htole16(mp->m_pkthdr.ether_vtag);
3581		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3582	} else if (offload == FALSE)
3583		return FALSE;
3584
3585	/*
3586	 * Determine where frame payload starts.
3587	 * Jump over vlan headers if already present,
3588	 * helpful for QinQ too.
3589	 */
3590	eh = mtod(mp, struct ether_vlan_header *);
3591	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3592		etype = ntohs(eh->evl_proto);
3593		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3594	} else {
3595		etype = ntohs(eh->evl_encap_proto);
3596		ehdrlen = ETHER_HDR_LEN;
3597	}
3598
3599	/* Set the ether header length */
3600	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3601
3602	switch (etype) {
3603		case ETHERTYPE_IP:
3604			ip = (struct ip *)(mp->m_data + ehdrlen);
3605			ip_hlen = ip->ip_hl << 2;
3606			if (mp->m_len < ehdrlen + ip_hlen) {
3607				offload = FALSE;
3608				break;
3609			}
3610			ipproto = ip->ip_p;
3611			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3612			break;
3613		case ETHERTYPE_IPV6:
3614			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3615			ip_hlen = sizeof(struct ip6_hdr);
3616			ipproto = ip6->ip6_nxt;
3617			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3618			break;
3619		default:
3620			offload = FALSE;
3621			break;
3622	}
3623
3624	vlan_macip_lens |= ip_hlen;
3625	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3626
3627	switch (ipproto) {
3628		case IPPROTO_TCP:
3629			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3630				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3631			break;
3632		case IPPROTO_UDP:
3633			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3634				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3635			break;
3636#if __FreeBSD_version >= 800000
3637		case IPPROTO_SCTP:
3638			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3639				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3640			break;
3641#endif
3642		default:
3643			offload = FALSE;
3644			break;
3645	}
3646
3647	/* 82575 needs the queue index added */
3648	if (adapter->hw.mac.type == e1000_82575)
3649		mss_l4len_idx = txr->me << 4;
3650
3651	/* Now copy bits into descriptor */
3652	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3653	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3654	TXD->seqnum_seed = htole32(0);
3655	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3656
3657	tx_buffer->m_head = NULL;
3658	tx_buffer->next_eop = -1;
3659
3660	/* We've consumed the first desc, adjust counters */
3661	if (++ctxd == adapter->num_tx_desc)
3662		ctxd = 0;
3663	txr->next_avail_desc = ctxd;
3664	--txr->tx_avail;
3665
3666        return (offload);
3667}
3668
3669
3670/**********************************************************************
3671 *
3672 *  Examine each tx_buffer in the used queue. If the hardware is done
3673 *  processing the packet then free associated resources. The
3674 *  tx_buffer is put back on the free queue.
3675 *
3676 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3677 **********************************************************************/
3678static bool
3679igb_txeof(struct tx_ring *txr)
3680{
3681	struct adapter	*adapter = txr->adapter;
3682        int first, last, done, processed;
3683        struct igb_tx_buffer *tx_buffer;
3684        struct e1000_tx_desc   *tx_desc, *eop_desc;
3685	struct ifnet   *ifp = adapter->ifp;
3686
3687	IGB_TX_LOCK_ASSERT(txr);
3688
3689#ifdef DEV_NETMAP
3690	if (ifp->if_capenable & IFCAP_NETMAP) {
3691		struct netmap_adapter *na = NA(ifp);
3692
3693		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3694		IGB_TX_UNLOCK(txr);
3695		IGB_CORE_LOCK(adapter);
3696		selwakeuppri(&na->tx_si, PI_NET);
3697		IGB_CORE_UNLOCK(adapter);
3698		IGB_TX_LOCK(txr);
3699		return FALSE;
3700	}
3701#endif /* DEV_NETMAP */
3702        if (txr->tx_avail == adapter->num_tx_desc) {
3703		txr->queue_status = IGB_QUEUE_IDLE;
3704                return FALSE;
3705	}
3706
3707	processed = 0;
3708        first = txr->next_to_clean;
3709        tx_desc = &txr->tx_base[first];
3710        tx_buffer = &txr->tx_buffers[first];
3711	last = tx_buffer->next_eop;
3712        eop_desc = &txr->tx_base[last];
3713
3714	/*
3715	 * What this does is get the index of the
3716	 * first descriptor AFTER the EOP of the
3717	 * first packet, that way we can do the
3718	 * simple comparison on the inner while loop.
3719	 */
3720	if (++last == adapter->num_tx_desc)
3721 		last = 0;
3722	done = last;
3723
3724        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3725            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3726
3727        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3728		/* We clean the range of the packet */
3729		while (first != done) {
3730                	tx_desc->upper.data = 0;
3731                	tx_desc->lower.data = 0;
3732                	tx_desc->buffer_addr = 0;
3733                	++txr->tx_avail;
3734			++processed;
3735
3736			if (tx_buffer->m_head) {
3737				txr->bytes +=
3738				    tx_buffer->m_head->m_pkthdr.len;
3739				bus_dmamap_sync(txr->txtag,
3740				    tx_buffer->map,
3741				    BUS_DMASYNC_POSTWRITE);
3742				bus_dmamap_unload(txr->txtag,
3743				    tx_buffer->map);
3744
3745                        	m_freem(tx_buffer->m_head);
3746                        	tx_buffer->m_head = NULL;
3747                	}
3748			tx_buffer->next_eop = -1;
3749			txr->watchdog_time = ticks;
3750
3751	                if (++first == adapter->num_tx_desc)
3752				first = 0;
3753
3754	                tx_buffer = &txr->tx_buffers[first];
3755			tx_desc = &txr->tx_base[first];
3756		}
3757		++txr->packets;
3758		++ifp->if_opackets;
3759		/* See if we can continue to the next packet */
3760		last = tx_buffer->next_eop;
3761		if (last != -1) {
3762        		eop_desc = &txr->tx_base[last];
3763			/* Get new done point */
3764			if (++last == adapter->num_tx_desc) last = 0;
3765			done = last;
3766		} else
3767			break;
3768        }
3769        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3770            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3771
3772        txr->next_to_clean = first;
3773
3774	/*
3775	** Watchdog calculation, we know there's
3776	** work outstanding or the first return
3777	** would have been taken, so none processed
3778	** for too long indicates a hang.
3779	*/
3780	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3781		txr->queue_status |= IGB_QUEUE_HUNG;
3782        /*
3783         * If we have a minimum free,
3784         * clear depleted state bit
3785         */
3786        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3787                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3788
3789	/* All clean, turn off the watchdog */
3790	if (txr->tx_avail == adapter->num_tx_desc) {
3791		txr->queue_status = IGB_QUEUE_IDLE;
3792		return (FALSE);
3793        }
3794
3795	return (TRUE);
3796}
3797
3798/*********************************************************************
3799 *
3800 *  Refresh mbuf buffers for RX descriptor rings
3801 *   - now keeps its own state so discards due to resource
3802 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3803 *     it just returns, keeping its placeholder, thus it can simply
3804 *     be recalled to try again.
3805 *
3806 **********************************************************************/
3807static void
3808igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3809{
3810	struct adapter		*adapter = rxr->adapter;
3811	bus_dma_segment_t	hseg[1];
3812	bus_dma_segment_t	pseg[1];
3813	struct igb_rx_buf	*rxbuf;
3814	struct mbuf		*mh, *mp;
3815	int			i, j, nsegs, error;
3816	bool			refreshed = FALSE;
3817
3818	i = j = rxr->next_to_refresh;
3819	/*
3820	** Get one descriptor beyond
3821	** our work mark to control
3822	** the loop.
3823        */
3824	if (++j == adapter->num_rx_desc)
3825		j = 0;
3826
3827	while (j != limit) {
3828		rxbuf = &rxr->rx_buffers[i];
3829		/* No hdr mbuf used with header split off */
3830		if (rxr->hdr_split == FALSE)
3831			goto no_split;
3832		if (rxbuf->m_head == NULL) {
3833			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3834			if (mh == NULL)
3835				goto update;
3836		} else
3837			mh = rxbuf->m_head;
3838
3839		mh->m_pkthdr.len = mh->m_len = MHLEN;
3840		mh->m_len = MHLEN;
3841		mh->m_flags |= M_PKTHDR;
3842		/* Get the memory mapping */
3843		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3844		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3845		if (error != 0) {
3846			printf("Refresh mbufs: hdr dmamap load"
3847			    " failure - %d\n", error);
3848			m_free(mh);
3849			rxbuf->m_head = NULL;
3850			goto update;
3851		}
3852		rxbuf->m_head = mh;
3853		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3854		    BUS_DMASYNC_PREREAD);
3855		rxr->rx_base[i].read.hdr_addr =
3856		    htole64(hseg[0].ds_addr);
3857no_split:
3858		if (rxbuf->m_pack == NULL) {
3859			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3860			    M_PKTHDR, adapter->rx_mbuf_sz);
3861			if (mp == NULL)
3862				goto update;
3863		} else
3864			mp = rxbuf->m_pack;
3865
3866		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3867		/* Get the memory mapping */
3868		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3869		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3870		if (error != 0) {
3871			printf("Refresh mbufs: payload dmamap load"
3872			    " failure - %d\n", error);
3873			m_free(mp);
3874			rxbuf->m_pack = NULL;
3875			goto update;
3876		}
3877		rxbuf->m_pack = mp;
3878		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3879		    BUS_DMASYNC_PREREAD);
3880		rxr->rx_base[i].read.pkt_addr =
3881		    htole64(pseg[0].ds_addr);
3882		refreshed = TRUE; /* I feel wefreshed :) */
3883
3884		i = j; /* our next is precalculated */
3885		rxr->next_to_refresh = i;
3886		if (++j == adapter->num_rx_desc)
3887			j = 0;
3888	}
3889update:
3890	if (refreshed) /* update tail */
3891		E1000_WRITE_REG(&adapter->hw,
3892		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3893	return;
3894}
3895
3896
3897/*********************************************************************
3898 *
3899 *  Allocate memory for rx_buffer structures. Since we use one
3900 *  rx_buffer per received packet, the maximum number of rx_buffer's
3901 *  that we'll need is equal to the number of receive descriptors
3902 *  that we've allocated.
3903 *
3904 **********************************************************************/
3905static int
3906igb_allocate_receive_buffers(struct rx_ring *rxr)
3907{
3908	struct	adapter 	*adapter = rxr->adapter;
3909	device_t 		dev = adapter->dev;
3910	struct igb_rx_buf	*rxbuf;
3911	int             	i, bsize, error;
3912
3913	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3914	if (!(rxr->rx_buffers =
3915	    (struct igb_rx_buf *) malloc(bsize,
3916	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3917		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3918		error = ENOMEM;
3919		goto fail;
3920	}
3921
3922	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3923				   1, 0,		/* alignment, bounds */
3924				   BUS_SPACE_MAXADDR,	/* lowaddr */
3925				   BUS_SPACE_MAXADDR,	/* highaddr */
3926				   NULL, NULL,		/* filter, filterarg */
3927				   MSIZE,		/* maxsize */
3928				   1,			/* nsegments */
3929				   MSIZE,		/* maxsegsize */
3930				   0,			/* flags */
3931				   NULL,		/* lockfunc */
3932				   NULL,		/* lockfuncarg */
3933				   &rxr->htag))) {
3934		device_printf(dev, "Unable to create RX DMA tag\n");
3935		goto fail;
3936	}
3937
3938	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3939				   1, 0,		/* alignment, bounds */
3940				   BUS_SPACE_MAXADDR,	/* lowaddr */
3941				   BUS_SPACE_MAXADDR,	/* highaddr */
3942				   NULL, NULL,		/* filter, filterarg */
3943				   MJUM9BYTES,		/* maxsize */
3944				   1,			/* nsegments */
3945				   MJUM9BYTES,		/* maxsegsize */
3946				   0,			/* flags */
3947				   NULL,		/* lockfunc */
3948				   NULL,		/* lockfuncarg */
3949				   &rxr->ptag))) {
3950		device_printf(dev, "Unable to create RX payload DMA tag\n");
3951		goto fail;
3952	}
3953
3954	for (i = 0; i < adapter->num_rx_desc; i++) {
3955		rxbuf = &rxr->rx_buffers[i];
3956		error = bus_dmamap_create(rxr->htag,
3957		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3958		if (error) {
3959			device_printf(dev,
3960			    "Unable to create RX head DMA maps\n");
3961			goto fail;
3962		}
3963		error = bus_dmamap_create(rxr->ptag,
3964		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3965		if (error) {
3966			device_printf(dev,
3967			    "Unable to create RX packet DMA maps\n");
3968			goto fail;
3969		}
3970	}
3971
3972	return (0);
3973
3974fail:
3975	/* Frees all, but can handle partial completion */
3976	igb_free_receive_structures(adapter);
3977	return (error);
3978}
3979
3980
3981static void
3982igb_free_receive_ring(struct rx_ring *rxr)
3983{
3984	struct	adapter		*adapter = rxr->adapter;
3985	struct igb_rx_buf	*rxbuf;
3986
3987
3988	for (int i = 0; i < adapter->num_rx_desc; i++) {
3989		rxbuf = &rxr->rx_buffers[i];
3990		if (rxbuf->m_head != NULL) {
3991			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3992			    BUS_DMASYNC_POSTREAD);
3993			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3994			rxbuf->m_head->m_flags |= M_PKTHDR;
3995			m_freem(rxbuf->m_head);
3996		}
3997		if (rxbuf->m_pack != NULL) {
3998			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3999			    BUS_DMASYNC_POSTREAD);
4000			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4001			rxbuf->m_pack->m_flags |= M_PKTHDR;
4002			m_freem(rxbuf->m_pack);
4003		}
4004		rxbuf->m_head = NULL;
4005		rxbuf->m_pack = NULL;
4006	}
4007}
4008
4009
4010/*********************************************************************
4011 *
4012 *  Initialize a receive ring and its buffers.
4013 *
4014 **********************************************************************/
4015static int
4016igb_setup_receive_ring(struct rx_ring *rxr)
4017{
4018	struct	adapter		*adapter;
4019	struct  ifnet		*ifp;
4020	device_t		dev;
4021	struct igb_rx_buf	*rxbuf;
4022	bus_dma_segment_t	pseg[1], hseg[1];
4023	struct lro_ctrl		*lro = &rxr->lro;
4024	int			rsize, nsegs, error = 0;
4025#ifdef DEV_NETMAP
4026	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4027	struct netmap_slot *slot;
4028#endif /* DEV_NETMAP */
4029
4030	adapter = rxr->adapter;
4031	dev = adapter->dev;
4032	ifp = adapter->ifp;
4033
4034	/* Clear the ring contents */
4035	IGB_RX_LOCK(rxr);
4036#ifdef DEV_NETMAP
4037	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4038#endif /* DEV_NETMAP */
4039	rsize = roundup2(adapter->num_rx_desc *
4040	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4041	bzero((void *)rxr->rx_base, rsize);
4042
4043	/*
4044	** Free current RX buffer structures and their mbufs
4045	*/
4046	igb_free_receive_ring(rxr);
4047
4048	/* Configure for header split? */
4049	if (igb_header_split)
4050		rxr->hdr_split = TRUE;
4051
4052        /* Now replenish the ring mbufs */
4053	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4054		struct mbuf	*mh, *mp;
4055
4056		rxbuf = &rxr->rx_buffers[j];
4057#ifdef DEV_NETMAP
4058		if (slot) {
4059			/* slot sj is mapped to the i-th NIC-ring entry */
4060			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4061			uint64_t paddr;
4062			void *addr;
4063
4064			addr = PNMB(slot + sj, &paddr);
4065			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4066			/* Update descriptor */
4067			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4068			continue;
4069		}
4070#endif /* DEV_NETMAP */
4071		if (rxr->hdr_split == FALSE)
4072			goto skip_head;
4073
4074		/* First the header */
4075		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4076		if (rxbuf->m_head == NULL) {
4077			error = ENOBUFS;
4078                        goto fail;
4079		}
4080		m_adj(rxbuf->m_head, ETHER_ALIGN);
4081		mh = rxbuf->m_head;
4082		mh->m_len = mh->m_pkthdr.len = MHLEN;
4083		mh->m_flags |= M_PKTHDR;
4084		/* Get the memory mapping */
4085		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4086		    rxbuf->hmap, rxbuf->m_head, hseg,
4087		    &nsegs, BUS_DMA_NOWAIT);
4088		if (error != 0) /* Nothing elegant to do here */
4089                        goto fail;
4090		bus_dmamap_sync(rxr->htag,
4091		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4092		/* Update descriptor */
4093		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4094
4095skip_head:
4096		/* Now the payload cluster */
4097		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4098		    M_PKTHDR, adapter->rx_mbuf_sz);
4099		if (rxbuf->m_pack == NULL) {
4100			error = ENOBUFS;
4101                        goto fail;
4102		}
4103		mp = rxbuf->m_pack;
4104		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4105		/* Get the memory mapping */
4106		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4107		    rxbuf->pmap, mp, pseg,
4108		    &nsegs, BUS_DMA_NOWAIT);
4109		if (error != 0)
4110                        goto fail;
4111		bus_dmamap_sync(rxr->ptag,
4112		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4113		/* Update descriptor */
4114		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4115        }
4116
4117	/* Setup our descriptor indices */
4118	rxr->next_to_check = 0;
4119	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4120	rxr->lro_enabled = FALSE;
4121	rxr->rx_split_packets = 0;
4122	rxr->rx_bytes = 0;
4123
4124	rxr->fmp = NULL;
4125	rxr->lmp = NULL;
4126	rxr->discard = FALSE;
4127
4128	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4129	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4130
4131	/*
4132	** Now set up the LRO interface, we
4133	** also only do head split when LRO
4134	** is enabled, since so often they
4135	** are undesireable in similar setups.
4136	*/
4137	if (ifp->if_capenable & IFCAP_LRO) {
4138		error = tcp_lro_init(lro);
4139		if (error) {
4140			device_printf(dev, "LRO Initialization failed!\n");
4141			goto fail;
4142		}
4143		INIT_DEBUGOUT("RX LRO Initialized\n");
4144		rxr->lro_enabled = TRUE;
4145		lro->ifp = adapter->ifp;
4146	}
4147
4148	IGB_RX_UNLOCK(rxr);
4149	return (0);
4150
4151fail:
4152	igb_free_receive_ring(rxr);
4153	IGB_RX_UNLOCK(rxr);
4154	return (error);
4155}
4156
4157
4158/*********************************************************************
4159 *
4160 *  Initialize all receive rings.
4161 *
4162 **********************************************************************/
4163static int
4164igb_setup_receive_structures(struct adapter *adapter)
4165{
4166	struct rx_ring *rxr = adapter->rx_rings;
4167	int i;
4168
4169	for (i = 0; i < adapter->num_queues; i++, rxr++)
4170		if (igb_setup_receive_ring(rxr))
4171			goto fail;
4172
4173	return (0);
4174fail:
4175	/*
4176	 * Free RX buffers allocated so far, we will only handle
4177	 * the rings that completed, the failing case will have
4178	 * cleaned up for itself. 'i' is the endpoint.
4179	 */
4180	for (int j = 0; j > i; ++j) {
4181		rxr = &adapter->rx_rings[i];
4182		IGB_RX_LOCK(rxr);
4183		igb_free_receive_ring(rxr);
4184		IGB_RX_UNLOCK(rxr);
4185	}
4186
4187	return (ENOBUFS);
4188}
4189
4190/*********************************************************************
4191 *
4192 *  Enable receive unit.
4193 *
4194 **********************************************************************/
4195static void
4196igb_initialize_receive_units(struct adapter *adapter)
4197{
4198	struct rx_ring	*rxr = adapter->rx_rings;
4199	struct ifnet	*ifp = adapter->ifp;
4200	struct e1000_hw *hw = &adapter->hw;
4201	u32		rctl, rxcsum, psize, srrctl = 0;
4202
4203	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4204
4205	/*
4206	 * Make sure receives are disabled while setting
4207	 * up the descriptor ring
4208	 */
4209	rctl = E1000_READ_REG(hw, E1000_RCTL);
4210	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4211
4212	/*
4213	** Set up for header split
4214	*/
4215	if (igb_header_split) {
4216		/* Use a standard mbuf for the header */
4217		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4218		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4219	} else
4220		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4221
4222	/*
4223	** Set up for jumbo frames
4224	*/
4225	if (ifp->if_mtu > ETHERMTU) {
4226		rctl |= E1000_RCTL_LPE;
4227		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4228			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4229			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4230		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4231			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4232			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4233		}
4234		/* Set maximum packet len */
4235		psize = adapter->max_frame_size;
4236		/* are we on a vlan? */
4237		if (adapter->ifp->if_vlantrunk != NULL)
4238			psize += VLAN_TAG_SIZE;
4239		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4240	} else {
4241		rctl &= ~E1000_RCTL_LPE;
4242		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4243		rctl |= E1000_RCTL_SZ_2048;
4244	}
4245
4246	/* Setup the Base and Length of the Rx Descriptor Rings */
4247	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4248		u64 bus_addr = rxr->rxdma.dma_paddr;
4249		u32 rxdctl;
4250
4251		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4252		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4253		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4254		    (uint32_t)(bus_addr >> 32));
4255		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4256		    (uint32_t)bus_addr);
4257		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4258		/* Enable this Queue */
4259		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4260		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4261		rxdctl &= 0xFFF00000;
4262		rxdctl |= IGB_RX_PTHRESH;
4263		rxdctl |= IGB_RX_HTHRESH << 8;
4264		rxdctl |= IGB_RX_WTHRESH << 16;
4265		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4266	}
4267
4268	/*
4269	** Setup for RX MultiQueue
4270	*/
4271	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4272	if (adapter->num_queues >1) {
4273		u32 random[10], mrqc, shift = 0;
4274		union igb_reta {
4275			u32 dword;
4276			u8  bytes[4];
4277		} reta;
4278
4279		arc4rand(&random, sizeof(random), 0);
4280		if (adapter->hw.mac.type == e1000_82575)
4281			shift = 6;
4282		/* Warning FM follows */
4283		for (int i = 0; i < 128; i++) {
4284			reta.bytes[i & 3] =
4285			    (i % adapter->num_queues) << shift;
4286			if ((i & 3) == 3)
4287				E1000_WRITE_REG(hw,
4288				    E1000_RETA(i >> 2), reta.dword);
4289		}
4290		/* Now fill in hash table */
4291		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4292		for (int i = 0; i < 10; i++)
4293			E1000_WRITE_REG_ARRAY(hw,
4294			    E1000_RSSRK(0), i, random[i]);
4295
4296		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4297		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4298		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4299		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4300		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4301		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4302		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4303		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4304
4305		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4306
4307		/*
4308		** NOTE: Receive Full-Packet Checksum Offload
4309		** is mutually exclusive with Multiqueue. However
4310		** this is not the same as TCP/IP checksums which
4311		** still work.
4312		*/
4313		rxcsum |= E1000_RXCSUM_PCSD;
4314#if __FreeBSD_version >= 800000
4315		/* For SCTP Offload */
4316		if ((hw->mac.type == e1000_82576)
4317		    && (ifp->if_capenable & IFCAP_RXCSUM))
4318			rxcsum |= E1000_RXCSUM_CRCOFL;
4319#endif
4320	} else {
4321		/* Non RSS setup */
4322		if (ifp->if_capenable & IFCAP_RXCSUM) {
4323			rxcsum |= E1000_RXCSUM_IPPCSE;
4324#if __FreeBSD_version >= 800000
4325			if (adapter->hw.mac.type == e1000_82576)
4326				rxcsum |= E1000_RXCSUM_CRCOFL;
4327#endif
4328		} else
4329			rxcsum &= ~E1000_RXCSUM_TUOFL;
4330	}
4331	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4332
4333	/* Setup the Receive Control Register */
4334	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4335	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4336		   E1000_RCTL_RDMTS_HALF |
4337		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4338	/* Strip CRC bytes. */
4339	rctl |= E1000_RCTL_SECRC;
4340	/* Make sure VLAN Filters are off */
4341	rctl &= ~E1000_RCTL_VFE;
4342	/* Don't store bad packets */
4343	rctl &= ~E1000_RCTL_SBP;
4344
4345	/* Enable Receives */
4346	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4347
4348	/*
4349	 * Setup the HW Rx Head and Tail Descriptor Pointers
4350	 *   - needs to be after enable
4351	 */
4352	for (int i = 0; i < adapter->num_queues; i++) {
4353		rxr = &adapter->rx_rings[i];
4354		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4355#ifdef DEV_NETMAP
4356		/*
4357		 * an init() while a netmap client is active must
4358		 * preserve the rx buffers passed to userspace.
4359		 * In this driver it means we adjust RDT to
4360		 * somthing different from next_to_refresh
4361		 * (which is not used in netmap mode).
4362		 */
4363		if (ifp->if_capenable & IFCAP_NETMAP) {
4364			struct netmap_adapter *na = NA(adapter->ifp);
4365			struct netmap_kring *kring = &na->rx_rings[i];
4366			int t = rxr->next_to_refresh - kring->nr_hwavail;
4367
4368			if (t >= adapter->num_rx_desc)
4369				t -= adapter->num_rx_desc;
4370			else if (t < 0)
4371				t += adapter->num_rx_desc;
4372			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4373		} else
4374#endif /* DEV_NETMAP */
4375		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4376	}
4377	return;
4378}
4379
4380/*********************************************************************
4381 *
4382 *  Free receive rings.
4383 *
4384 **********************************************************************/
4385static void
4386igb_free_receive_structures(struct adapter *adapter)
4387{
4388	struct rx_ring *rxr = adapter->rx_rings;
4389
4390	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4391		struct lro_ctrl	*lro = &rxr->lro;
4392		igb_free_receive_buffers(rxr);
4393		tcp_lro_free(lro);
4394		igb_dma_free(adapter, &rxr->rxdma);
4395	}
4396
4397	free(adapter->rx_rings, M_DEVBUF);
4398}
4399
4400/*********************************************************************
4401 *
4402 *  Free receive ring data structures.
4403 *
4404 **********************************************************************/
4405static void
4406igb_free_receive_buffers(struct rx_ring *rxr)
4407{
4408	struct adapter		*adapter = rxr->adapter;
4409	struct igb_rx_buf	*rxbuf;
4410	int i;
4411
4412	INIT_DEBUGOUT("free_receive_structures: begin");
4413
4414	/* Cleanup any existing buffers */
4415	if (rxr->rx_buffers != NULL) {
4416		for (i = 0; i < adapter->num_rx_desc; i++) {
4417			rxbuf = &rxr->rx_buffers[i];
4418			if (rxbuf->m_head != NULL) {
4419				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4420				    BUS_DMASYNC_POSTREAD);
4421				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4422				rxbuf->m_head->m_flags |= M_PKTHDR;
4423				m_freem(rxbuf->m_head);
4424			}
4425			if (rxbuf->m_pack != NULL) {
4426				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4427				    BUS_DMASYNC_POSTREAD);
4428				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4429				rxbuf->m_pack->m_flags |= M_PKTHDR;
4430				m_freem(rxbuf->m_pack);
4431			}
4432			rxbuf->m_head = NULL;
4433			rxbuf->m_pack = NULL;
4434			if (rxbuf->hmap != NULL) {
4435				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4436				rxbuf->hmap = NULL;
4437			}
4438			if (rxbuf->pmap != NULL) {
4439				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4440				rxbuf->pmap = NULL;
4441			}
4442		}
4443		if (rxr->rx_buffers != NULL) {
4444			free(rxr->rx_buffers, M_DEVBUF);
4445			rxr->rx_buffers = NULL;
4446		}
4447	}
4448
4449	if (rxr->htag != NULL) {
4450		bus_dma_tag_destroy(rxr->htag);
4451		rxr->htag = NULL;
4452	}
4453	if (rxr->ptag != NULL) {
4454		bus_dma_tag_destroy(rxr->ptag);
4455		rxr->ptag = NULL;
4456	}
4457}
4458
4459static __inline void
4460igb_rx_discard(struct rx_ring *rxr, int i)
4461{
4462	struct igb_rx_buf	*rbuf;
4463
4464	rbuf = &rxr->rx_buffers[i];
4465
4466	/* Partially received? Free the chain */
4467	if (rxr->fmp != NULL) {
4468		rxr->fmp->m_flags |= M_PKTHDR;
4469		m_freem(rxr->fmp);
4470		rxr->fmp = NULL;
4471		rxr->lmp = NULL;
4472	}
4473
4474	/*
4475	** With advanced descriptors the writeback
4476	** clobbers the buffer addrs, so its easier
4477	** to just free the existing mbufs and take
4478	** the normal refresh path to get new buffers
4479	** and mapping.
4480	*/
4481	if (rbuf->m_head) {
4482		m_free(rbuf->m_head);
4483		rbuf->m_head = NULL;
4484	}
4485
4486	if (rbuf->m_pack) {
4487		m_free(rbuf->m_pack);
4488		rbuf->m_pack = NULL;
4489	}
4490
4491	return;
4492}
4493
4494static __inline void
4495igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4496{
4497
4498	/*
4499	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4500	 * should be computed by hardware. Also it should not have VLAN tag in
4501	 * ethernet header.
4502	 */
4503	if (rxr->lro_enabled &&
4504	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4505	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4506	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4507	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4508	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4509	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4510		/*
4511		 * Send to the stack if:
4512		 **  - LRO not enabled, or
4513		 **  - no LRO resources, or
4514		 **  - lro enqueue fails
4515		 */
4516		if (rxr->lro.lro_cnt != 0)
4517			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4518				return;
4519	}
4520	IGB_RX_UNLOCK(rxr);
4521	(*ifp->if_input)(ifp, m);
4522	IGB_RX_LOCK(rxr);
4523}
4524
4525/*********************************************************************
4526 *
4527 *  This routine executes in interrupt context. It replenishes
4528 *  the mbufs in the descriptor and sends data which has been
4529 *  dma'ed into host memory to upper layer.
4530 *
4531 *  We loop at most count times if count is > 0, or until done if
4532 *  count < 0.
4533 *
4534 *  Return TRUE if more to clean, FALSE otherwise
4535 *********************************************************************/
4536static bool
4537igb_rxeof(struct igb_queue *que, int count, int *done)
4538{
4539	struct adapter		*adapter = que->adapter;
4540	struct rx_ring		*rxr = que->rxr;
4541	struct ifnet		*ifp = adapter->ifp;
4542	struct lro_ctrl		*lro = &rxr->lro;
4543	struct lro_entry	*queued;
4544	int			i, processed = 0, rxdone = 0;
4545	u32			ptype, staterr = 0;
4546	union e1000_adv_rx_desc	*cur;
4547
4548	IGB_RX_LOCK(rxr);
4549	/* Sync the ring. */
4550	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4551	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4552
4553#ifdef DEV_NETMAP
4554	if (ifp->if_capenable & IFCAP_NETMAP) {
4555		struct netmap_adapter *na = NA(ifp);
4556
4557		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4558		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4559		IGB_RX_UNLOCK(rxr);
4560		IGB_CORE_LOCK(adapter);
4561		selwakeuppri(&na->rx_si, PI_NET);
4562		IGB_CORE_UNLOCK(adapter);
4563		return (0);
4564	}
4565#endif /* DEV_NETMAP */
4566
4567	/* Main clean loop */
4568	for (i = rxr->next_to_check; count != 0;) {
4569		struct mbuf		*sendmp, *mh, *mp;
4570		struct igb_rx_buf	*rxbuf;
4571		u16			hlen, plen, hdr, vtag;
4572		bool			eop = FALSE;
4573
4574		cur = &rxr->rx_base[i];
4575		staterr = le32toh(cur->wb.upper.status_error);
4576		if ((staterr & E1000_RXD_STAT_DD) == 0)
4577			break;
4578		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4579			break;
4580		count--;
4581		sendmp = mh = mp = NULL;
4582		cur->wb.upper.status_error = 0;
4583		rxbuf = &rxr->rx_buffers[i];
4584		plen = le16toh(cur->wb.upper.length);
4585		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4586		if ((adapter->hw.mac.type == e1000_i350) &&
4587		    (staterr & E1000_RXDEXT_STATERR_LB))
4588			vtag = be16toh(cur->wb.upper.vlan);
4589		else
4590			vtag = le16toh(cur->wb.upper.vlan);
4591		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4592		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4593
4594		/* Make sure all segments of a bad packet are discarded */
4595		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4596		    (rxr->discard)) {
4597			ifp->if_ierrors++;
4598			++rxr->rx_discarded;
4599			if (!eop) /* Catch subsequent segs */
4600				rxr->discard = TRUE;
4601			else
4602				rxr->discard = FALSE;
4603			igb_rx_discard(rxr, i);
4604			goto next_desc;
4605		}
4606
4607		/*
4608		** The way the hardware is configured to
4609		** split, it will ONLY use the header buffer
4610		** when header split is enabled, otherwise we
4611		** get normal behavior, ie, both header and
4612		** payload are DMA'd into the payload buffer.
4613		**
4614		** The fmp test is to catch the case where a
4615		** packet spans multiple descriptors, in that
4616		** case only the first header is valid.
4617		*/
4618		if (rxr->hdr_split && rxr->fmp == NULL) {
4619			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4620			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4621			if (hlen > IGB_HDR_BUF)
4622				hlen = IGB_HDR_BUF;
4623			mh = rxr->rx_buffers[i].m_head;
4624			mh->m_len = hlen;
4625			/* clear buf pointer for refresh */
4626			rxbuf->m_head = NULL;
4627			/*
4628			** Get the payload length, this
4629			** could be zero if its a small
4630			** packet.
4631			*/
4632			if (plen > 0) {
4633				mp = rxr->rx_buffers[i].m_pack;
4634				mp->m_len = plen;
4635				mh->m_next = mp;
4636				/* clear buf pointer */
4637				rxbuf->m_pack = NULL;
4638				rxr->rx_split_packets++;
4639			}
4640		} else {
4641			/*
4642			** Either no header split, or a
4643			** secondary piece of a fragmented
4644			** split packet.
4645			*/
4646			mh = rxr->rx_buffers[i].m_pack;
4647			mh->m_len = plen;
4648			/* clear buf info for refresh */
4649			rxbuf->m_pack = NULL;
4650		}
4651
4652		++processed; /* So we know when to refresh */
4653
4654		/* Initial frame - setup */
4655		if (rxr->fmp == NULL) {
4656			mh->m_pkthdr.len = mh->m_len;
4657			/* Save the head of the chain */
4658			rxr->fmp = mh;
4659			rxr->lmp = mh;
4660			if (mp != NULL) {
4661				/* Add payload if split */
4662				mh->m_pkthdr.len += mp->m_len;
4663				rxr->lmp = mh->m_next;
4664			}
4665		} else {
4666			/* Chain mbuf's together */
4667			rxr->lmp->m_next = mh;
4668			rxr->lmp = rxr->lmp->m_next;
4669			rxr->fmp->m_pkthdr.len += mh->m_len;
4670		}
4671
4672		if (eop) {
4673			rxr->fmp->m_pkthdr.rcvif = ifp;
4674			ifp->if_ipackets++;
4675			rxr->rx_packets++;
4676			/* capture data for AIM */
4677			rxr->packets++;
4678			rxr->bytes += rxr->fmp->m_pkthdr.len;
4679			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4680
4681			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4682				igb_rx_checksum(staterr, rxr->fmp, ptype);
4683
4684			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4685			    (staterr & E1000_RXD_STAT_VP) != 0) {
4686				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4687				rxr->fmp->m_flags |= M_VLANTAG;
4688			}
4689#if __FreeBSD_version >= 800000
4690			rxr->fmp->m_pkthdr.flowid = que->msix;
4691			rxr->fmp->m_flags |= M_FLOWID;
4692#endif
4693			sendmp = rxr->fmp;
4694			/* Make sure to set M_PKTHDR. */
4695			sendmp->m_flags |= M_PKTHDR;
4696			rxr->fmp = NULL;
4697			rxr->lmp = NULL;
4698		}
4699
4700next_desc:
4701		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4702		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4703
4704		/* Advance our pointers to the next descriptor. */
4705		if (++i == adapter->num_rx_desc)
4706			i = 0;
4707		/*
4708		** Send to the stack or LRO
4709		*/
4710		if (sendmp != NULL) {
4711			rxr->next_to_check = i;
4712			igb_rx_input(rxr, ifp, sendmp, ptype);
4713			i = rxr->next_to_check;
4714			rxdone++;
4715		}
4716
4717		/* Every 8 descriptors we go to refresh mbufs */
4718		if (processed == 8) {
4719                        igb_refresh_mbufs(rxr, i);
4720                        processed = 0;
4721		}
4722	}
4723
4724	/* Catch any remainders */
4725	if (igb_rx_unrefreshed(rxr))
4726		igb_refresh_mbufs(rxr, i);
4727
4728	rxr->next_to_check = i;
4729
4730	/*
4731	 * Flush any outstanding LRO work
4732	 */
4733	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4734		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4735		tcp_lro_flush(lro, queued);
4736	}
4737
4738	if (done != NULL)
4739		*done = rxdone;
4740
4741	IGB_RX_UNLOCK(rxr);
4742	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4743}
4744
4745/*********************************************************************
4746 *
4747 *  Verify that the hardware indicated that the checksum is valid.
4748 *  Inform the stack about the status of checksum so that stack
4749 *  doesn't spend time verifying the checksum.
4750 *
4751 *********************************************************************/
4752static void
4753igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4754{
4755	u16 status = (u16)staterr;
4756	u8  errors = (u8) (staterr >> 24);
4757	int sctp;
4758
4759	/* Ignore Checksum bit is set */
4760	if (status & E1000_RXD_STAT_IXSM) {
4761		mp->m_pkthdr.csum_flags = 0;
4762		return;
4763	}
4764
4765	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4766	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4767		sctp = 1;
4768	else
4769		sctp = 0;
4770	if (status & E1000_RXD_STAT_IPCS) {
4771		/* Did it pass? */
4772		if (!(errors & E1000_RXD_ERR_IPE)) {
4773			/* IP Checksum Good */
4774			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4775			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4776		} else
4777			mp->m_pkthdr.csum_flags = 0;
4778	}
4779
4780	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4781		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4782#if __FreeBSD_version >= 800000
4783		if (sctp) /* reassign */
4784			type = CSUM_SCTP_VALID;
4785#endif
4786		/* Did it pass? */
4787		if (!(errors & E1000_RXD_ERR_TCPE)) {
4788			mp->m_pkthdr.csum_flags |= type;
4789			if (sctp == 0)
4790				mp->m_pkthdr.csum_data = htons(0xffff);
4791		}
4792	}
4793	return;
4794}
4795
4796/*
4797 * This routine is run via an vlan
4798 * config EVENT
4799 */
4800static void
4801igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4802{
4803	struct adapter	*adapter = ifp->if_softc;
4804	u32		index, bit;
4805
4806	if (ifp->if_softc !=  arg)   /* Not our event */
4807		return;
4808
4809	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4810                return;
4811
4812	IGB_CORE_LOCK(adapter);
4813	index = (vtag >> 5) & 0x7F;
4814	bit = vtag & 0x1F;
4815	adapter->shadow_vfta[index] |= (1 << bit);
4816	++adapter->num_vlans;
4817	/* Change hw filter setting */
4818	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4819		igb_setup_vlan_hw_support(adapter);
4820	IGB_CORE_UNLOCK(adapter);
4821}
4822
4823/*
4824 * This routine is run via an vlan
4825 * unconfig EVENT
4826 */
4827static void
4828igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4829{
4830	struct adapter	*adapter = ifp->if_softc;
4831	u32		index, bit;
4832
4833	if (ifp->if_softc !=  arg)
4834		return;
4835
4836	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4837                return;
4838
4839	IGB_CORE_LOCK(adapter);
4840	index = (vtag >> 5) & 0x7F;
4841	bit = vtag & 0x1F;
4842	adapter->shadow_vfta[index] &= ~(1 << bit);
4843	--adapter->num_vlans;
4844	/* Change hw filter setting */
4845	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4846		igb_setup_vlan_hw_support(adapter);
4847	IGB_CORE_UNLOCK(adapter);
4848}
4849
4850static void
4851igb_setup_vlan_hw_support(struct adapter *adapter)
4852{
4853	struct e1000_hw *hw = &adapter->hw;
4854	struct ifnet	*ifp = adapter->ifp;
4855	u32             reg;
4856
4857	if (adapter->vf_ifp) {
4858		e1000_rlpml_set_vf(hw,
4859		    adapter->max_frame_size + VLAN_TAG_SIZE);
4860		return;
4861	}
4862
4863	reg = E1000_READ_REG(hw, E1000_CTRL);
4864	reg |= E1000_CTRL_VME;
4865	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4866
4867	/* Enable the Filter Table */
4868	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4869		reg = E1000_READ_REG(hw, E1000_RCTL);
4870		reg &= ~E1000_RCTL_CFIEN;
4871		reg |= E1000_RCTL_VFE;
4872		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4873	}
4874
4875	/* Update the frame size */
4876	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4877	    adapter->max_frame_size + VLAN_TAG_SIZE);
4878
4879	/* Don't bother with table if no vlans */
4880	if ((adapter->num_vlans == 0) ||
4881	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4882                return;
4883	/*
4884	** A soft reset zero's out the VFTA, so
4885	** we need to repopulate it now.
4886	*/
4887	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4888                if (adapter->shadow_vfta[i] != 0) {
4889			if (adapter->vf_ifp)
4890				e1000_vfta_set_vf(hw,
4891				    adapter->shadow_vfta[i], TRUE);
4892			else
4893				e1000_write_vfta(hw,
4894				    i, adapter->shadow_vfta[i]);
4895		}
4896}
4897
4898static void
4899igb_enable_intr(struct adapter *adapter)
4900{
4901	/* With RSS set up what to auto clear */
4902	if (adapter->msix_mem) {
4903		u32 mask = (adapter->que_mask | adapter->link_mask);
4904		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4905		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4906		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4907		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4908		    E1000_IMS_LSC);
4909	} else {
4910		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4911		    IMS_ENABLE_MASK);
4912	}
4913	E1000_WRITE_FLUSH(&adapter->hw);
4914
4915	return;
4916}
4917
4918static void
4919igb_disable_intr(struct adapter *adapter)
4920{
4921	if (adapter->msix_mem) {
4922		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4923		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4924	}
4925	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4926	E1000_WRITE_FLUSH(&adapter->hw);
4927	return;
4928}
4929
4930/*
4931 * Bit of a misnomer, what this really means is
4932 * to enable OS management of the system... aka
4933 * to disable special hardware management features
4934 */
4935static void
4936igb_init_manageability(struct adapter *adapter)
4937{
4938	if (adapter->has_manage) {
4939		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4940		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4941
4942		/* disable hardware interception of ARP */
4943		manc &= ~(E1000_MANC_ARP_EN);
4944
4945                /* enable receiving management packets to the host */
4946		manc |= E1000_MANC_EN_MNG2HOST;
4947		manc2h |= 1 << 5;  /* Mng Port 623 */
4948		manc2h |= 1 << 6;  /* Mng Port 664 */
4949		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4950		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4951	}
4952}
4953
4954/*
4955 * Give control back to hardware management
4956 * controller if there is one.
4957 */
4958static void
4959igb_release_manageability(struct adapter *adapter)
4960{
4961	if (adapter->has_manage) {
4962		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4963
4964		/* re-enable hardware interception of ARP */
4965		manc |= E1000_MANC_ARP_EN;
4966		manc &= ~E1000_MANC_EN_MNG2HOST;
4967
4968		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4969	}
4970}
4971
4972/*
4973 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4974 * For ASF and Pass Through versions of f/w this means that
4975 * the driver is loaded.
4976 *
4977 */
4978static void
4979igb_get_hw_control(struct adapter *adapter)
4980{
4981	u32 ctrl_ext;
4982
4983	if (adapter->vf_ifp)
4984		return;
4985
4986	/* Let firmware know the driver has taken over */
4987	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4988	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4989	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4990}
4991
4992/*
4993 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4994 * For ASF and Pass Through versions of f/w this means that the
4995 * driver is no longer loaded.
4996 *
4997 */
4998static void
4999igb_release_hw_control(struct adapter *adapter)
5000{
5001	u32 ctrl_ext;
5002
5003	if (adapter->vf_ifp)
5004		return;
5005
5006	/* Let firmware taken over control of h/w */
5007	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5008	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5009	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5010}
5011
5012static int
5013igb_is_valid_ether_addr(uint8_t *addr)
5014{
5015	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5016
5017	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5018		return (FALSE);
5019	}
5020
5021	return (TRUE);
5022}
5023
5024
5025/*
5026 * Enable PCI Wake On Lan capability
5027 */
5028static void
5029igb_enable_wakeup(device_t dev)
5030{
5031	u16     cap, status;
5032	u8      id;
5033
5034	/* First find the capabilities pointer*/
5035	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5036	/* Read the PM Capabilities */
5037	id = pci_read_config(dev, cap, 1);
5038	if (id != PCIY_PMG)     /* Something wrong */
5039		return;
5040	/* OK, we have the power capabilities, so
5041	   now get the status register */
5042	cap += PCIR_POWER_STATUS;
5043	status = pci_read_config(dev, cap, 2);
5044	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5045	pci_write_config(dev, cap, status, 2);
5046	return;
5047}
5048
5049static void
5050igb_led_func(void *arg, int onoff)
5051{
5052	struct adapter	*adapter = arg;
5053
5054	IGB_CORE_LOCK(adapter);
5055	if (onoff) {
5056		e1000_setup_led(&adapter->hw);
5057		e1000_led_on(&adapter->hw);
5058	} else {
5059		e1000_led_off(&adapter->hw);
5060		e1000_cleanup_led(&adapter->hw);
5061	}
5062	IGB_CORE_UNLOCK(adapter);
5063}
5064
5065/**********************************************************************
5066 *
5067 *  Update the board statistics counters.
5068 *
5069 **********************************************************************/
5070static void
5071igb_update_stats_counters(struct adapter *adapter)
5072{
5073	struct ifnet		*ifp;
5074        struct e1000_hw		*hw = &adapter->hw;
5075	struct e1000_hw_stats	*stats;
5076
5077	/*
5078	** The virtual function adapter has only a
5079	** small controlled set of stats, do only
5080	** those and return.
5081	*/
5082	if (adapter->vf_ifp) {
5083		igb_update_vf_stats_counters(adapter);
5084		return;
5085	}
5086
5087	stats = (struct e1000_hw_stats	*)adapter->stats;
5088
5089	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5090	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5091		stats->symerrs +=
5092		    E1000_READ_REG(hw,E1000_SYMERRS);
5093		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5094	}
5095
5096	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5097	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5098	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5099	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5100
5101	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5102	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5103	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5104	stats->dc += E1000_READ_REG(hw, E1000_DC);
5105	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5106	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5107	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5108	/*
5109	** For watchdog management we need to know if we have been
5110	** paused during the last interval, so capture that here.
5111	*/
5112        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5113        stats->xoffrxc += adapter->pause_frames;
5114	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5115	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5116	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5117	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5118	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5119	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5120	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5121	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5122	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5123	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5124	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5125	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5126
5127	/* For the 64-bit byte counters the low dword must be read first. */
5128	/* Both registers clear on the read of the high dword */
5129
5130	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5131	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5132	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5133	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5134
5135	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5136	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5137	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5138	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5139	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5140
5141	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5142	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5143
5144	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5145	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5146	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5147	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5148	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5149	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5150	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5151	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5152	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5153	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5154
5155	/* Interrupt Counts */
5156
5157	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5158	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5159	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5160	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5161	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5162	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5163	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5164	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5165	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5166
5167	/* Host to Card Statistics */
5168
5169	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5170	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5171	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5172	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5173	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5174	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5175	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5176	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5177	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5178	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5179	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5180	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5181	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5182	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5183
5184	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5185	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5186	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5187	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5188	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5189	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5190
5191	ifp = adapter->ifp;
5192	ifp->if_collisions = stats->colc;
5193
5194	/* Rx Errors */
5195	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5196	    stats->crcerrs + stats->algnerrc +
5197	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5198
5199	/* Tx Errors */
5200	ifp->if_oerrors = stats->ecol +
5201	    stats->latecol + adapter->watchdog_events;
5202
5203	/* Driver specific counters */
5204	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5205	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5206	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5207	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5208	adapter->packet_buf_alloc_tx =
5209	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5210	adapter->packet_buf_alloc_rx =
5211	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5212}
5213
5214
5215/**********************************************************************
5216 *
5217 *  Initialize the VF board statistics counters.
5218 *
5219 **********************************************************************/
5220static void
5221igb_vf_init_stats(struct adapter *adapter)
5222{
5223        struct e1000_hw *hw = &adapter->hw;
5224	struct e1000_vf_stats	*stats;
5225
5226	stats = (struct e1000_vf_stats	*)adapter->stats;
5227	if (stats == NULL)
5228		return;
5229        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5230        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5231        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5232        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5233        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5234}
5235
5236/**********************************************************************
5237 *
5238 *  Update the VF board statistics counters.
5239 *
5240 **********************************************************************/
5241static void
5242igb_update_vf_stats_counters(struct adapter *adapter)
5243{
5244	struct e1000_hw *hw = &adapter->hw;
5245	struct e1000_vf_stats	*stats;
5246
5247	if (adapter->link_speed == 0)
5248		return;
5249
5250	stats = (struct e1000_vf_stats	*)adapter->stats;
5251
5252	UPDATE_VF_REG(E1000_VFGPRC,
5253	    stats->last_gprc, stats->gprc);
5254	UPDATE_VF_REG(E1000_VFGORC,
5255	    stats->last_gorc, stats->gorc);
5256	UPDATE_VF_REG(E1000_VFGPTC,
5257	    stats->last_gptc, stats->gptc);
5258	UPDATE_VF_REG(E1000_VFGOTC,
5259	    stats->last_gotc, stats->gotc);
5260	UPDATE_VF_REG(E1000_VFMPRC,
5261	    stats->last_mprc, stats->mprc);
5262}
5263
5264/* Export a single 32-bit register via a read-only sysctl. */
5265static int
5266igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5267{
5268	struct adapter *adapter;
5269	u_int val;
5270
5271	adapter = oidp->oid_arg1;
5272	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5273	return (sysctl_handle_int(oidp, &val, 0, req));
5274}
5275
5276/*
5277**  Tuneable interrupt rate handler
5278*/
5279static int
5280igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5281{
5282	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5283	int			error;
5284	u32			reg, usec, rate;
5285
5286	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5287	usec = ((reg & 0x7FFC) >> 2);
5288	if (usec > 0)
5289		rate = 1000000 / usec;
5290	else
5291		rate = 0;
5292	error = sysctl_handle_int(oidp, &rate, 0, req);
5293	if (error || !req->newptr)
5294		return error;
5295	return 0;
5296}
5297
5298/*
5299 * Add sysctl variables, one per statistic, to the system.
5300 */
5301static void
5302igb_add_hw_stats(struct adapter *adapter)
5303{
5304	device_t dev = adapter->dev;
5305
5306	struct tx_ring *txr = adapter->tx_rings;
5307	struct rx_ring *rxr = adapter->rx_rings;
5308
5309	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5310	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5311	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5312	struct e1000_hw_stats *stats = adapter->stats;
5313
5314	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5315	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5316
5317#define QUEUE_NAME_LEN 32
5318	char namebuf[QUEUE_NAME_LEN];
5319
5320	/* Driver Statistics */
5321	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5322			CTLFLAG_RD, &adapter->link_irq, 0,
5323			"Link MSIX IRQ Handled");
5324	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5325			CTLFLAG_RD, &adapter->dropped_pkts,
5326			"Driver dropped packets");
5327	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5328			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5329			"Driver tx dma failure in xmit");
5330	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5331			CTLFLAG_RD, &adapter->rx_overruns,
5332			"RX overruns");
5333	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5334			CTLFLAG_RD, &adapter->watchdog_events,
5335			"Watchdog timeouts");
5336
5337	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5338			CTLFLAG_RD, &adapter->device_control,
5339			"Device Control Register");
5340	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5341			CTLFLAG_RD, &adapter->rx_control,
5342			"Receiver Control Register");
5343	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5344			CTLFLAG_RD, &adapter->int_mask,
5345			"Interrupt Mask");
5346	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5347			CTLFLAG_RD, &adapter->eint_mask,
5348			"Extended Interrupt Mask");
5349	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5350			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5351			"Transmit Buffer Packet Allocation");
5352	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5353			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5354			"Receive Buffer Packet Allocation");
5355	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5356			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5357			"Flow Control High Watermark");
5358	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5359			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5360			"Flow Control Low Watermark");
5361
5362	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5363		struct lro_ctrl *lro = &rxr->lro;
5364
5365		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5366		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5367					    CTLFLAG_RD, NULL, "Queue Name");
5368		queue_list = SYSCTL_CHILDREN(queue_node);
5369
5370		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5371				CTLFLAG_RD, &adapter->queues[i],
5372				sizeof(&adapter->queues[i]),
5373				igb_sysctl_interrupt_rate_handler,
5374				"IU", "Interrupt Rate");
5375
5376		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5377				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5378				igb_sysctl_reg_handler, "IU",
5379 				"Transmit Descriptor Head");
5380		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5381				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5382				igb_sysctl_reg_handler, "IU",
5383 				"Transmit Descriptor Tail");
5384		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5385				CTLFLAG_RD, &txr->no_desc_avail,
5386				"Queue No Descriptor Available");
5387		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5388				CTLFLAG_RD, &txr->tx_packets,
5389				"Queue Packets Transmitted");
5390
5391		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5392				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5393				igb_sysctl_reg_handler, "IU",
5394				"Receive Descriptor Head");
5395		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5396				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5397				igb_sysctl_reg_handler, "IU",
5398				"Receive Descriptor Tail");
5399		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5400				CTLFLAG_RD, &rxr->rx_packets,
5401				"Queue Packets Received");
5402		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5403				CTLFLAG_RD, &rxr->rx_bytes,
5404				"Queue Bytes Received");
5405		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5406				CTLFLAG_RD, &lro->lro_queued, 0,
5407				"LRO Queued");
5408		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5409				CTLFLAG_RD, &lro->lro_flushed, 0,
5410				"LRO Flushed");
5411	}
5412
5413	/* MAC stats get their own sub node */
5414
5415	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5416				    CTLFLAG_RD, NULL, "MAC Statistics");
5417	stat_list = SYSCTL_CHILDREN(stat_node);
5418
5419	/*
5420	** VF adapter has a very limited set of stats
5421	** since its not managing the metal, so to speak.
5422	*/
5423	if (adapter->vf_ifp) {
5424	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5425			CTLFLAG_RD, &stats->gprc,
5426			"Good Packets Received");
5427	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5428			CTLFLAG_RD, &stats->gptc,
5429			"Good Packets Transmitted");
5430 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5431 			CTLFLAG_RD, &stats->gorc,
5432 			"Good Octets Received");
5433 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5434 			CTLFLAG_RD, &stats->gotc,
5435 			"Good Octets Transmitted");
5436	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5437			CTLFLAG_RD, &stats->mprc,
5438			"Multicast Packets Received");
5439		return;
5440	}
5441
5442	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5443			CTLFLAG_RD, &stats->ecol,
5444			"Excessive collisions");
5445	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5446			CTLFLAG_RD, &stats->scc,
5447			"Single collisions");
5448	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5449			CTLFLAG_RD, &stats->mcc,
5450			"Multiple collisions");
5451	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5452			CTLFLAG_RD, &stats->latecol,
5453			"Late collisions");
5454	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5455			CTLFLAG_RD, &stats->colc,
5456			"Collision Count");
5457	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5458			CTLFLAG_RD, &stats->symerrs,
5459			"Symbol Errors");
5460	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5461			CTLFLAG_RD, &stats->sec,
5462			"Sequence Errors");
5463	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5464			CTLFLAG_RD, &stats->dc,
5465			"Defer Count");
5466	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5467			CTLFLAG_RD, &stats->mpc,
5468			"Missed Packets");
5469	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5470			CTLFLAG_RD, &stats->rnbc,
5471			"Receive No Buffers");
5472	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5473			CTLFLAG_RD, &stats->ruc,
5474			"Receive Undersize");
5475	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5476			CTLFLAG_RD, &stats->rfc,
5477			"Fragmented Packets Received ");
5478	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5479			CTLFLAG_RD, &stats->roc,
5480			"Oversized Packets Received");
5481	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5482			CTLFLAG_RD, &stats->rjc,
5483			"Recevied Jabber");
5484	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5485			CTLFLAG_RD, &stats->rxerrc,
5486			"Receive Errors");
5487	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5488			CTLFLAG_RD, &stats->crcerrs,
5489			"CRC errors");
5490	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5491			CTLFLAG_RD, &stats->algnerrc,
5492			"Alignment Errors");
5493	/* On 82575 these are collision counts */
5494	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5495			CTLFLAG_RD, &stats->cexterr,
5496			"Collision/Carrier extension errors");
5497	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5498			CTLFLAG_RD, &stats->xonrxc,
5499			"XON Received");
5500	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5501			CTLFLAG_RD, &stats->xontxc,
5502			"XON Transmitted");
5503	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5504			CTLFLAG_RD, &stats->xoffrxc,
5505			"XOFF Received");
5506	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5507			CTLFLAG_RD, &stats->xofftxc,
5508			"XOFF Transmitted");
5509	/* Packet Reception Stats */
5510	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5511			CTLFLAG_RD, &stats->tpr,
5512			"Total Packets Received ");
5513	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5514			CTLFLAG_RD, &stats->gprc,
5515			"Good Packets Received");
5516	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5517			CTLFLAG_RD, &stats->bprc,
5518			"Broadcast Packets Received");
5519	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5520			CTLFLAG_RD, &stats->mprc,
5521			"Multicast Packets Received");
5522	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5523			CTLFLAG_RD, &stats->prc64,
5524			"64 byte frames received ");
5525	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5526			CTLFLAG_RD, &stats->prc127,
5527			"65-127 byte frames received");
5528	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5529			CTLFLAG_RD, &stats->prc255,
5530			"128-255 byte frames received");
5531	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5532			CTLFLAG_RD, &stats->prc511,
5533			"256-511 byte frames received");
5534	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5535			CTLFLAG_RD, &stats->prc1023,
5536			"512-1023 byte frames received");
5537	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5538			CTLFLAG_RD, &stats->prc1522,
5539			"1023-1522 byte frames received");
5540 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5541 			CTLFLAG_RD, &stats->gorc,
5542 			"Good Octets Received");
5543
5544	/* Packet Transmission Stats */
5545 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5546 			CTLFLAG_RD, &stats->gotc,
5547 			"Good Octets Transmitted");
5548	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5549			CTLFLAG_RD, &stats->tpt,
5550			"Total Packets Transmitted");
5551	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5552			CTLFLAG_RD, &stats->gptc,
5553			"Good Packets Transmitted");
5554	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5555			CTLFLAG_RD, &stats->bptc,
5556			"Broadcast Packets Transmitted");
5557	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5558			CTLFLAG_RD, &stats->mptc,
5559			"Multicast Packets Transmitted");
5560	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5561			CTLFLAG_RD, &stats->ptc64,
5562			"64 byte frames transmitted ");
5563	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5564			CTLFLAG_RD, &stats->ptc127,
5565			"65-127 byte frames transmitted");
5566	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5567			CTLFLAG_RD, &stats->ptc255,
5568			"128-255 byte frames transmitted");
5569	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5570			CTLFLAG_RD, &stats->ptc511,
5571			"256-511 byte frames transmitted");
5572	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5573			CTLFLAG_RD, &stats->ptc1023,
5574			"512-1023 byte frames transmitted");
5575	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5576			CTLFLAG_RD, &stats->ptc1522,
5577			"1024-1522 byte frames transmitted");
5578	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5579			CTLFLAG_RD, &stats->tsctc,
5580			"TSO Contexts Transmitted");
5581	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5582			CTLFLAG_RD, &stats->tsctfc,
5583			"TSO Contexts Failed");
5584
5585
5586	/* Interrupt Stats */
5587
5588	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5589				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5590	int_list = SYSCTL_CHILDREN(int_node);
5591
5592	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5593			CTLFLAG_RD, &stats->iac,
5594			"Interrupt Assertion Count");
5595
5596	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5597			CTLFLAG_RD, &stats->icrxptc,
5598			"Interrupt Cause Rx Pkt Timer Expire Count");
5599
5600	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5601			CTLFLAG_RD, &stats->icrxatc,
5602			"Interrupt Cause Rx Abs Timer Expire Count");
5603
5604	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5605			CTLFLAG_RD, &stats->ictxptc,
5606			"Interrupt Cause Tx Pkt Timer Expire Count");
5607
5608	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5609			CTLFLAG_RD, &stats->ictxatc,
5610			"Interrupt Cause Tx Abs Timer Expire Count");
5611
5612	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5613			CTLFLAG_RD, &stats->ictxqec,
5614			"Interrupt Cause Tx Queue Empty Count");
5615
5616	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5617			CTLFLAG_RD, &stats->ictxqmtc,
5618			"Interrupt Cause Tx Queue Min Thresh Count");
5619
5620	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5621			CTLFLAG_RD, &stats->icrxdmtc,
5622			"Interrupt Cause Rx Desc Min Thresh Count");
5623
5624	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5625			CTLFLAG_RD, &stats->icrxoc,
5626			"Interrupt Cause Receiver Overrun Count");
5627
5628	/* Host to Card Stats */
5629
5630	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5631				    CTLFLAG_RD, NULL,
5632				    "Host to Card Statistics");
5633
5634	host_list = SYSCTL_CHILDREN(host_node);
5635
5636	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5637			CTLFLAG_RD, &stats->cbtmpc,
5638			"Circuit Breaker Tx Packet Count");
5639
5640	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5641			CTLFLAG_RD, &stats->htdpmc,
5642			"Host Transmit Discarded Packets");
5643
5644	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5645			CTLFLAG_RD, &stats->rpthc,
5646			"Rx Packets To Host");
5647
5648	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5649			CTLFLAG_RD, &stats->cbrmpc,
5650			"Circuit Breaker Rx Packet Count");
5651
5652	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5653			CTLFLAG_RD, &stats->cbrdpc,
5654			"Circuit Breaker Rx Dropped Count");
5655
5656	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5657			CTLFLAG_RD, &stats->hgptc,
5658			"Host Good Packets Tx Count");
5659
5660	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5661			CTLFLAG_RD, &stats->htcbdpc,
5662			"Host Tx Circuit Breaker Dropped Count");
5663
5664	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5665			CTLFLAG_RD, &stats->hgorc,
5666			"Host Good Octets Received Count");
5667
5668	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5669			CTLFLAG_RD, &stats->hgotc,
5670			"Host Good Octets Transmit Count");
5671
5672	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5673			CTLFLAG_RD, &stats->lenerrs,
5674			"Length Errors");
5675
5676	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5677			CTLFLAG_RD, &stats->scvpc,
5678			"SerDes/SGMII Code Violation Pkt Count");
5679
5680	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5681			CTLFLAG_RD, &stats->hrmpc,
5682			"Header Redirection Missed Packet Count");
5683}
5684
5685
5686/**********************************************************************
5687 *
5688 *  This routine provides a way to dump out the adapter eeprom,
5689 *  often a useful debug/service tool. This only dumps the first
5690 *  32 words, stuff that matters is in that extent.
5691 *
5692 **********************************************************************/
5693static int
5694igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5695{
5696	struct adapter *adapter;
5697	int error;
5698	int result;
5699
5700	result = -1;
5701	error = sysctl_handle_int(oidp, &result, 0, req);
5702
5703	if (error || !req->newptr)
5704		return (error);
5705
5706	/*
5707	 * This value will cause a hex dump of the
5708	 * first 32 16-bit words of the EEPROM to
5709	 * the screen.
5710	 */
5711	if (result == 1) {
5712		adapter = (struct adapter *)arg1;
5713		igb_print_nvm_info(adapter);
5714        }
5715
5716	return (error);
5717}
5718
5719static void
5720igb_print_nvm_info(struct adapter *adapter)
5721{
5722	u16	eeprom_data;
5723	int	i, j, row = 0;
5724
5725	/* Its a bit crude, but it gets the job done */
5726	printf("\nInterface EEPROM Dump:\n");
5727	printf("Offset\n0x0000  ");
5728	for (i = 0, j = 0; i < 32; i++, j++) {
5729		if (j == 8) { /* Make the offset block */
5730			j = 0; ++row;
5731			printf("\n0x00%x0  ",row);
5732		}
5733		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5734		printf("%04x ", eeprom_data);
5735	}
5736	printf("\n");
5737}
5738
5739static void
5740igb_set_sysctl_value(struct adapter *adapter, const char *name,
5741	const char *description, int *limit, int value)
5742{
5743	*limit = value;
5744	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5745	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5746	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5747}
5748
5749/*
5750** Set flow control using sysctl:
5751** Flow control values:
5752** 	0 - off
5753**	1 - rx pause
5754**	2 - tx pause
5755**	3 - full
5756*/
5757static int
5758igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5759{
5760	int		error;
5761	static int	input = 3; /* default is full */
5762	struct adapter	*adapter = (struct adapter *) arg1;
5763
5764	error = sysctl_handle_int(oidp, &input, 0, req);
5765
5766	if ((error) || (req->newptr == NULL))
5767		return (error);
5768
5769	switch (input) {
5770		case e1000_fc_rx_pause:
5771		case e1000_fc_tx_pause:
5772		case e1000_fc_full:
5773		case e1000_fc_none:
5774			adapter->hw.fc.requested_mode = input;
5775			adapter->fc = input;
5776			break;
5777		default:
5778			/* Do nothing */
5779			return (error);
5780	}
5781
5782	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5783	e1000_force_mac_fc(&adapter->hw);
5784	return (error);
5785}
5786
5787/*
5788** Manage DMA Coalesce:
5789** Control values:
5790** 	0/1 - off/on
5791**	Legal timer values are:
5792**	250,500,1000-10000 in thousands
5793*/
5794static int
5795igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5796{
5797	struct adapter *adapter = (struct adapter *) arg1;
5798	int		error;
5799
5800	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5801
5802	if ((error) || (req->newptr == NULL))
5803		return (error);
5804
5805	switch (adapter->dmac) {
5806		case 0:
5807			/*Disabling */
5808			break;
5809		case 1: /* Just enable and use default */
5810			adapter->dmac = 1000;
5811			break;
5812		case 250:
5813		case 500:
5814		case 1000:
5815		case 2000:
5816		case 3000:
5817		case 4000:
5818		case 5000:
5819		case 6000:
5820		case 7000:
5821		case 8000:
5822		case 9000:
5823		case 10000:
5824			/* Legal values - allow */
5825			break;
5826		default:
5827			/* Do nothing, illegal value */
5828			adapter->dmac = 0;
5829			return (error);
5830	}
5831	/* Reinit the interface */
5832	igb_init(adapter);
5833	return (error);
5834}
5835