if_igb.c revision 228803
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 228803 2011-12-22 15:33:41Z luigi $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#if __FreeBSD_version >= 800000
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.3.1";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	/* required last entry */
154	{ 0, 0, 0, 0, 0}
155};
156
157/*********************************************************************
158 *  Table of branding strings for all supported NICs.
159 *********************************************************************/
160
161static char *igb_strings[] = {
162	"Intel(R) PRO/1000 Network Connection"
163};
164
165/*********************************************************************
166 *  Function prototypes
167 *********************************************************************/
168static int	igb_probe(device_t);
169static int	igb_attach(device_t);
170static int	igb_detach(device_t);
171static int	igb_shutdown(device_t);
172static int	igb_suspend(device_t);
173static int	igb_resume(device_t);
174static void	igb_start(struct ifnet *);
175static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
176#if __FreeBSD_version >= 800000
177static int	igb_mq_start(struct ifnet *, struct mbuf *);
178static int	igb_mq_start_locked(struct ifnet *,
179		    struct tx_ring *, struct mbuf *);
180static void	igb_qflush(struct ifnet *);
181#endif
182static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
183static void	igb_init(void *);
184static void	igb_init_locked(struct adapter *);
185static void	igb_stop(void *);
186static void	igb_media_status(struct ifnet *, struct ifmediareq *);
187static int	igb_media_change(struct ifnet *);
188static void	igb_identify_hardware(struct adapter *);
189static int	igb_allocate_pci_resources(struct adapter *);
190static int	igb_allocate_msix(struct adapter *);
191static int	igb_allocate_legacy(struct adapter *);
192static int	igb_setup_msix(struct adapter *);
193static void	igb_free_pci_resources(struct adapter *);
194static void	igb_local_timer(void *);
195static void	igb_reset(struct adapter *);
196static int	igb_setup_interface(device_t, struct adapter *);
197static int	igb_allocate_queues(struct adapter *);
198static void	igb_configure_queues(struct adapter *);
199
200static int	igb_allocate_transmit_buffers(struct tx_ring *);
201static void	igb_setup_transmit_structures(struct adapter *);
202static void	igb_setup_transmit_ring(struct tx_ring *);
203static void	igb_initialize_transmit_units(struct adapter *);
204static void	igb_free_transmit_structures(struct adapter *);
205static void	igb_free_transmit_buffers(struct tx_ring *);
206
207static int	igb_allocate_receive_buffers(struct rx_ring *);
208static int	igb_setup_receive_structures(struct adapter *);
209static int	igb_setup_receive_ring(struct rx_ring *);
210static void	igb_initialize_receive_units(struct adapter *);
211static void	igb_free_receive_structures(struct adapter *);
212static void	igb_free_receive_buffers(struct rx_ring *);
213static void	igb_free_receive_ring(struct rx_ring *);
214
215static void	igb_enable_intr(struct adapter *);
216static void	igb_disable_intr(struct adapter *);
217static void	igb_update_stats_counters(struct adapter *);
218static bool	igb_txeof(struct tx_ring *);
219
220static __inline	void igb_rx_discard(struct rx_ring *, int);
221static __inline void igb_rx_input(struct rx_ring *,
222		    struct ifnet *, struct mbuf *, u32);
223
224static bool	igb_rxeof(struct igb_queue *, int, int *);
225static void	igb_rx_checksum(u32, struct mbuf *, u32);
226static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
227static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
228		    struct ip *, struct tcphdr *);
229static void	igb_set_promisc(struct adapter *);
230static void	igb_disable_promisc(struct adapter *);
231static void	igb_set_multi(struct adapter *);
232static void	igb_update_link_status(struct adapter *);
233static void	igb_refresh_mbufs(struct rx_ring *, int);
234
235static void	igb_register_vlan(void *, struct ifnet *, u16);
236static void	igb_unregister_vlan(void *, struct ifnet *, u16);
237static void	igb_setup_vlan_hw_support(struct adapter *);
238
239static int	igb_xmit(struct tx_ring *, struct mbuf **);
240static int	igb_dma_malloc(struct adapter *, bus_size_t,
241		    struct igb_dma_alloc *, int);
242static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
243static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
244static void	igb_print_nvm_info(struct adapter *);
245static int 	igb_is_valid_ether_addr(u8 *);
246static void     igb_add_hw_stats(struct adapter *);
247
248static void	igb_vf_init_stats(struct adapter *);
249static void	igb_update_vf_stats_counters(struct adapter *);
250
251/* Management and WOL Support */
252static void	igb_init_manageability(struct adapter *);
253static void	igb_release_manageability(struct adapter *);
254static void     igb_get_hw_control(struct adapter *);
255static void     igb_release_hw_control(struct adapter *);
256static void     igb_enable_wakeup(device_t);
257static void     igb_led_func(void *, int);
258
259static int	igb_irq_fast(void *);
260static void	igb_msix_que(void *);
261static void	igb_msix_link(void *);
262static void	igb_handle_que(void *context, int pending);
263static void	igb_handle_link(void *context, int pending);
264
265static void	igb_set_sysctl_value(struct adapter *, const char *,
266		    const char *, int *, int);
267static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
268static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
269
270#ifdef DEVICE_POLLING
271static poll_handler_t igb_poll;
272#endif /* POLLING */
273
274/*********************************************************************
275 *  FreeBSD Device Interface Entry Points
276 *********************************************************************/
277
278static device_method_t igb_methods[] = {
279	/* Device interface */
280	DEVMETHOD(device_probe, igb_probe),
281	DEVMETHOD(device_attach, igb_attach),
282	DEVMETHOD(device_detach, igb_detach),
283	DEVMETHOD(device_shutdown, igb_shutdown),
284	DEVMETHOD(device_suspend, igb_suspend),
285	DEVMETHOD(device_resume, igb_resume),
286	{0, 0}
287};
288
289static driver_t igb_driver = {
290	"igb", igb_methods, sizeof(struct adapter),
291};
292
293static devclass_t igb_devclass;
294DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
295MODULE_DEPEND(igb, pci, 1, 1, 1);
296MODULE_DEPEND(igb, ether, 1, 1, 1);
297
298/*********************************************************************
299 *  Tunable default values.
300 *********************************************************************/
301
302static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
303
304/* Descriptor defaults */
305static int igb_rxd = IGB_DEFAULT_RXD;
306static int igb_txd = IGB_DEFAULT_TXD;
307TUNABLE_INT("hw.igb.rxd", &igb_rxd);
308TUNABLE_INT("hw.igb.txd", &igb_txd);
309SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
310    "Number of receive descriptors per queue");
311SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
312    "Number of transmit descriptors per queue");
313
314/*
315** AIM: Adaptive Interrupt Moderation
316** which means that the interrupt rate
317** is varied over time based on the
318** traffic for that interrupt vector
319*/
320static int igb_enable_aim = TRUE;
321TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
322SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
323    "Enable adaptive interrupt moderation");
324
325/*
326 * MSIX should be the default for best performance,
327 * but this allows it to be forced off for testing.
328 */
329static int igb_enable_msix = 1;
330TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
331SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
332    "Enable MSI-X interrupts");
333
334/*
335** Tuneable Interrupt rate
336*/
337static int igb_max_interrupt_rate = 8000;
338TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
339SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
340    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
341
342/*
343** Header split causes the packet header to
344** be dma'd to a seperate mbuf from the payload.
345** this can have memory alignment benefits. But
346** another plus is that small packets often fit
347** into the header and thus use no cluster. Its
348** a very workload dependent type feature.
349*/
350static int igb_header_split = FALSE;
351TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
352SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
353    "Enable receive mbuf header split");
354
355/*
356** This will autoconfigure based on
357** the number of CPUs if left at 0.
358*/
359static int igb_num_queues = 0;
360TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
361SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
362    "Number of queues to configure, 0 indicates autoconfigure");
363
364/* How many packets rxeof tries to clean at a time */
365static int igb_rx_process_limit = 100;
366TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
367SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
368    &igb_rx_process_limit, 0,
369    "Maximum number of received packets to process at a time, -1 means unlimited");
370
371#ifdef DEV_NETMAP	/* see ixgbe.c for details */
372#include <dev/netmap/if_igb_netmap.h>
373#endif /* DEV_NETMAP */
374/*********************************************************************
375 *  Device identification routine
376 *
377 *  igb_probe determines if the driver should be loaded on
378 *  adapter based on PCI vendor/device id of the adapter.
379 *
380 *  return BUS_PROBE_DEFAULT on success, positive on failure
381 *********************************************************************/
382
383static int
384igb_probe(device_t dev)
385{
386	char		adapter_name[60];
387	uint16_t	pci_vendor_id = 0;
388	uint16_t	pci_device_id = 0;
389	uint16_t	pci_subvendor_id = 0;
390	uint16_t	pci_subdevice_id = 0;
391	igb_vendor_info_t *ent;
392
393	INIT_DEBUGOUT("igb_probe: begin");
394
395	pci_vendor_id = pci_get_vendor(dev);
396	if (pci_vendor_id != IGB_VENDOR_ID)
397		return (ENXIO);
398
399	pci_device_id = pci_get_device(dev);
400	pci_subvendor_id = pci_get_subvendor(dev);
401	pci_subdevice_id = pci_get_subdevice(dev);
402
403	ent = igb_vendor_info_array;
404	while (ent->vendor_id != 0) {
405		if ((pci_vendor_id == ent->vendor_id) &&
406		    (pci_device_id == ent->device_id) &&
407
408		    ((pci_subvendor_id == ent->subvendor_id) ||
409		    (ent->subvendor_id == PCI_ANY_ID)) &&
410
411		    ((pci_subdevice_id == ent->subdevice_id) ||
412		    (ent->subdevice_id == PCI_ANY_ID))) {
413			sprintf(adapter_name, "%s %s",
414				igb_strings[ent->index],
415				igb_driver_version);
416			device_set_desc_copy(dev, adapter_name);
417			return (BUS_PROBE_DEFAULT);
418		}
419		ent++;
420	}
421
422	return (ENXIO);
423}
424
425/*********************************************************************
426 *  Device initialization routine
427 *
428 *  The attach entry point is called when the driver is being loaded.
429 *  This routine identifies the type of hardware, allocates all resources
430 *  and initializes the hardware.
431 *
432 *  return 0 on success, positive on failure
433 *********************************************************************/
434
435static int
436igb_attach(device_t dev)
437{
438	struct adapter	*adapter;
439	int		error = 0;
440	u16		eeprom_data;
441
442	INIT_DEBUGOUT("igb_attach: begin");
443
444	if (resource_disabled("igb", device_get_unit(dev))) {
445		device_printf(dev, "Disabled by device hint\n");
446		return (ENXIO);
447	}
448
449	adapter = device_get_softc(dev);
450	adapter->dev = adapter->osdep.dev = dev;
451	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
452
453	/* SYSCTL stuff */
454	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
455	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
456	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
457	    igb_sysctl_nvm_info, "I", "NVM Information");
458
459	igb_set_sysctl_value(adapter, "enable_aim",
460	    "Interrupt Moderation", &adapter->enable_aim,
461	    igb_enable_aim);
462
463	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
464	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
465	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
466	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
467
468	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
469
470	/* Determine hardware and mac info */
471	igb_identify_hardware(adapter);
472
473	/* Setup PCI resources */
474	if (igb_allocate_pci_resources(adapter)) {
475		device_printf(dev, "Allocation of PCI resources failed\n");
476		error = ENXIO;
477		goto err_pci;
478	}
479
480	/* Do Shared Code initialization */
481	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
482		device_printf(dev, "Setup of Shared code failed\n");
483		error = ENXIO;
484		goto err_pci;
485	}
486
487	e1000_get_bus_info(&adapter->hw);
488
489	/* Sysctl for limiting the amount of work done in the taskqueue */
490	igb_set_sysctl_value(adapter, "rx_processing_limit",
491	    "max number of rx packets to process",
492	    &adapter->rx_process_limit, igb_rx_process_limit);
493
494	/*
495	 * Validate number of transmit and receive descriptors. It
496	 * must not exceed hardware maximum, and must be multiple
497	 * of E1000_DBA_ALIGN.
498	 */
499	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
500	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
501		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
502		    IGB_DEFAULT_TXD, igb_txd);
503		adapter->num_tx_desc = IGB_DEFAULT_TXD;
504	} else
505		adapter->num_tx_desc = igb_txd;
506	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
507	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
508		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
509		    IGB_DEFAULT_RXD, igb_rxd);
510		adapter->num_rx_desc = IGB_DEFAULT_RXD;
511	} else
512		adapter->num_rx_desc = igb_rxd;
513
514	adapter->hw.mac.autoneg = DO_AUTO_NEG;
515	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
516	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
517
518	/* Copper options */
519	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
520		adapter->hw.phy.mdix = AUTO_ALL_MODES;
521		adapter->hw.phy.disable_polarity_correction = FALSE;
522		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
523	}
524
525	/*
526	 * Set the frame limits assuming
527	 * standard ethernet sized frames.
528	 */
529	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
530	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
531
532	/*
533	** Allocate and Setup Queues
534	*/
535	if (igb_allocate_queues(adapter)) {
536		error = ENOMEM;
537		goto err_pci;
538	}
539
540	/* Allocate the appropriate stats memory */
541	if (adapter->vf_ifp) {
542		adapter->stats =
543		    (struct e1000_vf_stats *)malloc(sizeof \
544		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
545		igb_vf_init_stats(adapter);
546	} else
547		adapter->stats =
548		    (struct e1000_hw_stats *)malloc(sizeof \
549		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
550	if (adapter->stats == NULL) {
551		device_printf(dev, "Can not allocate stats memory\n");
552		error = ENOMEM;
553		goto err_late;
554	}
555
556	/* Allocate multicast array memory. */
557	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
558	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
559	if (adapter->mta == NULL) {
560		device_printf(dev, "Can not allocate multicast setup array\n");
561		error = ENOMEM;
562		goto err_late;
563	}
564
565	/* Some adapter-specific advanced features */
566	if (adapter->hw.mac.type >= e1000_i350) {
567		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
568		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
569		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
570		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
571		igb_set_sysctl_value(adapter, "eee_disabled",
572		    "enable Energy Efficient Ethernet",
573		    &adapter->hw.dev_spec._82575.eee_disable,
574		    TRUE);
575		e1000_set_eee_i350(&adapter->hw);
576	}
577
578	/*
579	** Start from a known state, this is
580	** important in reading the nvm and
581	** mac from that.
582	*/
583	e1000_reset_hw(&adapter->hw);
584
585	/* Make sure we have a good EEPROM before we read from it */
586	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
587		/*
588		** Some PCI-E parts fail the first check due to
589		** the link being in sleep state, call it again,
590		** if it fails a second time its a real issue.
591		*/
592		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
593			device_printf(dev,
594			    "The EEPROM Checksum Is Not Valid\n");
595			error = EIO;
596			goto err_late;
597		}
598	}
599
600	/*
601	** Copy the permanent MAC address out of the EEPROM
602	*/
603	if (e1000_read_mac_addr(&adapter->hw) < 0) {
604		device_printf(dev, "EEPROM read error while reading MAC"
605		    " address\n");
606		error = EIO;
607		goto err_late;
608	}
609	/* Check its sanity */
610	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
611		device_printf(dev, "Invalid MAC address\n");
612		error = EIO;
613		goto err_late;
614	}
615
616	/* Setup OS specific network interface */
617	if (igb_setup_interface(dev, adapter) != 0)
618		goto err_late;
619
620	/* Now get a good starting state */
621	igb_reset(adapter);
622
623	/* Initialize statistics */
624	igb_update_stats_counters(adapter);
625
626	adapter->hw.mac.get_link_status = 1;
627	igb_update_link_status(adapter);
628
629	/* Indicate SOL/IDER usage */
630	if (e1000_check_reset_block(&adapter->hw))
631		device_printf(dev,
632		    "PHY reset is blocked due to SOL/IDER session.\n");
633
634	/* Determine if we have to control management hardware */
635	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
636
637	/*
638	 * Setup Wake-on-Lan
639	 */
640	/* APME bit in EEPROM is mapped to WUC.APME */
641	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
642	if (eeprom_data)
643		adapter->wol = E1000_WUFC_MAG;
644
645	/* Register for VLAN events */
646	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
647	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
648	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
649	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
650
651	igb_add_hw_stats(adapter);
652
653	/* Tell the stack that the interface is not active */
654	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
655	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
656
657	adapter->led_dev = led_create(igb_led_func, adapter,
658	    device_get_nameunit(dev));
659
660	/*
661	** Configure Interrupts
662	*/
663	if ((adapter->msix > 1) && (igb_enable_msix))
664		error = igb_allocate_msix(adapter);
665	else /* MSI or Legacy */
666		error = igb_allocate_legacy(adapter);
667	if (error)
668		goto err_late;
669
670#ifdef DEV_NETMAP
671	igb_netmap_attach(adapter);
672#endif /* DEV_NETMAP */
673	INIT_DEBUGOUT("igb_attach: end");
674
675	return (0);
676
677err_late:
678	igb_detach(dev);
679	igb_free_transmit_structures(adapter);
680	igb_free_receive_structures(adapter);
681	igb_release_hw_control(adapter);
682err_pci:
683	igb_free_pci_resources(adapter);
684	if (adapter->ifp != NULL)
685		if_free(adapter->ifp);
686	free(adapter->mta, M_DEVBUF);
687	IGB_CORE_LOCK_DESTROY(adapter);
688
689	return (error);
690}
691
692/*********************************************************************
693 *  Device removal routine
694 *
695 *  The detach entry point is called when the driver is being removed.
696 *  This routine stops the adapter and deallocates all the resources
697 *  that were allocated for driver operation.
698 *
699 *  return 0 on success, positive on failure
700 *********************************************************************/
701
702static int
703igb_detach(device_t dev)
704{
705	struct adapter	*adapter = device_get_softc(dev);
706	struct ifnet	*ifp = adapter->ifp;
707
708	INIT_DEBUGOUT("igb_detach: begin");
709
710	/* Make sure VLANS are not using driver */
711	if (adapter->ifp->if_vlantrunk != NULL) {
712		device_printf(dev,"Vlan in use, detach first\n");
713		return (EBUSY);
714	}
715
716	if (adapter->led_dev != NULL)
717		led_destroy(adapter->led_dev);
718
719#ifdef DEVICE_POLLING
720	if (ifp->if_capenable & IFCAP_POLLING)
721		ether_poll_deregister(ifp);
722#endif
723
724	IGB_CORE_LOCK(adapter);
725	adapter->in_detach = 1;
726	igb_stop(adapter);
727	IGB_CORE_UNLOCK(adapter);
728
729	e1000_phy_hw_reset(&adapter->hw);
730
731	/* Give control back to firmware */
732	igb_release_manageability(adapter);
733	igb_release_hw_control(adapter);
734
735	if (adapter->wol) {
736		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
737		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
738		igb_enable_wakeup(dev);
739	}
740
741	/* Unregister VLAN events */
742	if (adapter->vlan_attach != NULL)
743		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
744	if (adapter->vlan_detach != NULL)
745		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
746
747	ether_ifdetach(adapter->ifp);
748
749	callout_drain(&adapter->timer);
750
751#ifdef DEV_NETMAP
752	netmap_detach(adapter->ifp);
753#endif /* DEV_NETMAP */
754	igb_free_pci_resources(adapter);
755	bus_generic_detach(dev);
756	if_free(ifp);
757
758	igb_free_transmit_structures(adapter);
759	igb_free_receive_structures(adapter);
760	if (adapter->mta != NULL)
761		free(adapter->mta, M_DEVBUF);
762
763	IGB_CORE_LOCK_DESTROY(adapter);
764
765	return (0);
766}
767
768/*********************************************************************
769 *
770 *  Shutdown entry point
771 *
772 **********************************************************************/
773
774static int
775igb_shutdown(device_t dev)
776{
777	return igb_suspend(dev);
778}
779
780/*
781 * Suspend/resume device methods.
782 */
783static int
784igb_suspend(device_t dev)
785{
786	struct adapter *adapter = device_get_softc(dev);
787
788	IGB_CORE_LOCK(adapter);
789
790	igb_stop(adapter);
791
792        igb_release_manageability(adapter);
793	igb_release_hw_control(adapter);
794
795        if (adapter->wol) {
796                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
797                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
798                igb_enable_wakeup(dev);
799        }
800
801	IGB_CORE_UNLOCK(adapter);
802
803	return bus_generic_suspend(dev);
804}
805
806static int
807igb_resume(device_t dev)
808{
809	struct adapter *adapter = device_get_softc(dev);
810	struct ifnet *ifp = adapter->ifp;
811
812	IGB_CORE_LOCK(adapter);
813	igb_init_locked(adapter);
814	igb_init_manageability(adapter);
815
816	if ((ifp->if_flags & IFF_UP) &&
817	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
818		igb_start(ifp);
819
820	IGB_CORE_UNLOCK(adapter);
821
822	return bus_generic_resume(dev);
823}
824
825
826/*********************************************************************
827 *  Transmit entry point
828 *
829 *  igb_start is called by the stack to initiate a transmit.
830 *  The driver will remain in this routine as long as there are
831 *  packets to transmit and transmit resources are available.
832 *  In case resources are not available stack is notified and
833 *  the packet is requeued.
834 **********************************************************************/
835
836static void
837igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
838{
839	struct adapter	*adapter = ifp->if_softc;
840	struct mbuf	*m_head;
841
842	IGB_TX_LOCK_ASSERT(txr);
843
844	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
845	    IFF_DRV_RUNNING)
846		return;
847	if (!adapter->link_active)
848		return;
849
850	/* Call cleanup if number of TX descriptors low */
851	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
852		igb_txeof(txr);
853
854	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
855		if (txr->tx_avail <= IGB_MAX_SCATTER) {
856			txr->queue_status |= IGB_QUEUE_DEPLETED;
857			break;
858		}
859		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
860		if (m_head == NULL)
861			break;
862		/*
863		 *  Encapsulation can modify our pointer, and or make it
864		 *  NULL on failure.  In that event, we can't requeue.
865		 */
866		if (igb_xmit(txr, &m_head)) {
867			if (m_head != NULL)
868				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
869			if (txr->tx_avail <= IGB_MAX_SCATTER)
870				txr->queue_status |= IGB_QUEUE_DEPLETED;
871			break;
872		}
873
874		/* Send a copy of the frame to the BPF listener */
875		ETHER_BPF_MTAP(ifp, m_head);
876
877		/* Set watchdog on */
878		txr->watchdog_time = ticks;
879		txr->queue_status |= IGB_QUEUE_WORKING;
880	}
881}
882
883/*
884 * Legacy TX driver routine, called from the
885 * stack, always uses tx[0], and spins for it.
886 * Should not be used with multiqueue tx
887 */
888static void
889igb_start(struct ifnet *ifp)
890{
891	struct adapter	*adapter = ifp->if_softc;
892	struct tx_ring	*txr = adapter->tx_rings;
893
894	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
895		IGB_TX_LOCK(txr);
896		igb_start_locked(txr, ifp);
897		IGB_TX_UNLOCK(txr);
898	}
899	return;
900}
901
902#if __FreeBSD_version >= 800000
903/*
904** Multiqueue Transmit driver
905**
906*/
907static int
908igb_mq_start(struct ifnet *ifp, struct mbuf *m)
909{
910	struct adapter		*adapter = ifp->if_softc;
911	struct igb_queue	*que;
912	struct tx_ring		*txr;
913	int 			i, err = 0;
914	bool			moveable = TRUE;
915
916	/* Which queue to use */
917	if ((m->m_flags & M_FLOWID) != 0) {
918		i = m->m_pkthdr.flowid % adapter->num_queues;
919		moveable = FALSE;
920	} else
921		i = curcpu % adapter->num_queues;
922
923	txr = &adapter->tx_rings[i];
924	que = &adapter->queues[i];
925	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
926	    IGB_TX_TRYLOCK(txr)) {
927		err = igb_mq_start_locked(ifp, txr, m);
928		IGB_TX_UNLOCK(txr);
929	} else {
930		err = drbr_enqueue(ifp, txr->br, m);
931		taskqueue_enqueue(que->tq, &que->que_task);
932	}
933
934	return (err);
935}
936
937static int
938igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
939{
940	struct adapter  *adapter = txr->adapter;
941        struct mbuf     *next;
942        int             err = 0, enq;
943
944	IGB_TX_LOCK_ASSERT(txr);
945
946	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
947	    (txr->queue_status == IGB_QUEUE_DEPLETED) ||
948	    adapter->link_active == 0) {
949		if (m != NULL)
950			err = drbr_enqueue(ifp, txr->br, m);
951		return (err);
952	}
953
954	enq = 0;
955	if (m == NULL) {
956		next = drbr_dequeue(ifp, txr->br);
957	} else if (drbr_needs_enqueue(ifp, txr->br)) {
958		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
959			return (err);
960		next = drbr_dequeue(ifp, txr->br);
961	} else
962		next = m;
963
964	/* Process the queue */
965	while (next != NULL) {
966		if ((err = igb_xmit(txr, &next)) != 0) {
967			if (next != NULL)
968				err = drbr_enqueue(ifp, txr->br, next);
969			break;
970		}
971		enq++;
972		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
973		ETHER_BPF_MTAP(ifp, next);
974		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
975			break;
976		next = drbr_dequeue(ifp, txr->br);
977	}
978	if (enq > 0) {
979		/* Set the watchdog */
980		txr->queue_status |= IGB_QUEUE_WORKING;
981		txr->watchdog_time = ticks;
982	}
983	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
984		igb_txeof(txr);
985	if (txr->tx_avail <= IGB_MAX_SCATTER)
986		txr->queue_status |= IGB_QUEUE_DEPLETED;
987	return (err);
988}
989
990/*
991** Flush all ring buffers
992*/
993static void
994igb_qflush(struct ifnet *ifp)
995{
996	struct adapter	*adapter = ifp->if_softc;
997	struct tx_ring	*txr = adapter->tx_rings;
998	struct mbuf	*m;
999
1000	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1001		IGB_TX_LOCK(txr);
1002		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1003			m_freem(m);
1004		IGB_TX_UNLOCK(txr);
1005	}
1006	if_qflush(ifp);
1007}
1008#endif /* __FreeBSD_version >= 800000 */
1009
1010/*********************************************************************
1011 *  Ioctl entry point
1012 *
1013 *  igb_ioctl is called when the user wants to configure the
1014 *  interface.
1015 *
1016 *  return 0 on success, positive on failure
1017 **********************************************************************/
1018
1019static int
1020igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1021{
1022	struct adapter	*adapter = ifp->if_softc;
1023	struct ifreq	*ifr = (struct ifreq *)data;
1024#if defined(INET) || defined(INET6)
1025	struct ifaddr	*ifa = (struct ifaddr *)data;
1026#endif
1027	bool		avoid_reset = FALSE;
1028	int		error = 0;
1029
1030	if (adapter->in_detach)
1031		return (error);
1032
1033	switch (command) {
1034	case SIOCSIFADDR:
1035#ifdef INET
1036		if (ifa->ifa_addr->sa_family == AF_INET)
1037			avoid_reset = TRUE;
1038#endif
1039#ifdef INET6
1040		if (ifa->ifa_addr->sa_family == AF_INET6)
1041			avoid_reset = TRUE;
1042#endif
1043		/*
1044		** Calling init results in link renegotiation,
1045		** so we avoid doing it when possible.
1046		*/
1047		if (avoid_reset) {
1048			ifp->if_flags |= IFF_UP;
1049			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1050				igb_init(adapter);
1051#ifdef INET
1052			if (!(ifp->if_flags & IFF_NOARP))
1053				arp_ifinit(ifp, ifa);
1054#endif
1055		} else
1056			error = ether_ioctl(ifp, command, data);
1057		break;
1058	case SIOCSIFMTU:
1059	    {
1060		int max_frame_size;
1061
1062		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1063
1064		IGB_CORE_LOCK(adapter);
1065		max_frame_size = 9234;
1066		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1067		    ETHER_CRC_LEN) {
1068			IGB_CORE_UNLOCK(adapter);
1069			error = EINVAL;
1070			break;
1071		}
1072
1073		ifp->if_mtu = ifr->ifr_mtu;
1074		adapter->max_frame_size =
1075		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1076		igb_init_locked(adapter);
1077		IGB_CORE_UNLOCK(adapter);
1078		break;
1079	    }
1080	case SIOCSIFFLAGS:
1081		IOCTL_DEBUGOUT("ioctl rcv'd:\
1082		    SIOCSIFFLAGS (Set Interface Flags)");
1083		IGB_CORE_LOCK(adapter);
1084		if (ifp->if_flags & IFF_UP) {
1085			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1086				if ((ifp->if_flags ^ adapter->if_flags) &
1087				    (IFF_PROMISC | IFF_ALLMULTI)) {
1088					igb_disable_promisc(adapter);
1089					igb_set_promisc(adapter);
1090				}
1091			} else
1092				igb_init_locked(adapter);
1093		} else
1094			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1095				igb_stop(adapter);
1096		adapter->if_flags = ifp->if_flags;
1097		IGB_CORE_UNLOCK(adapter);
1098		break;
1099	case SIOCADDMULTI:
1100	case SIOCDELMULTI:
1101		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1102		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1103			IGB_CORE_LOCK(adapter);
1104			igb_disable_intr(adapter);
1105			igb_set_multi(adapter);
1106#ifdef DEVICE_POLLING
1107			if (!(ifp->if_capenable & IFCAP_POLLING))
1108#endif
1109				igb_enable_intr(adapter);
1110			IGB_CORE_UNLOCK(adapter);
1111		}
1112		break;
1113	case SIOCSIFMEDIA:
1114		/* Check SOL/IDER usage */
1115		IGB_CORE_LOCK(adapter);
1116		if (e1000_check_reset_block(&adapter->hw)) {
1117			IGB_CORE_UNLOCK(adapter);
1118			device_printf(adapter->dev, "Media change is"
1119			    " blocked due to SOL/IDER session.\n");
1120			break;
1121		}
1122		IGB_CORE_UNLOCK(adapter);
1123	case SIOCGIFMEDIA:
1124		IOCTL_DEBUGOUT("ioctl rcv'd: \
1125		    SIOCxIFMEDIA (Get/Set Interface Media)");
1126		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1127		break;
1128	case SIOCSIFCAP:
1129	    {
1130		int mask, reinit;
1131
1132		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1133		reinit = 0;
1134		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1135#ifdef DEVICE_POLLING
1136		if (mask & IFCAP_POLLING) {
1137			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1138				error = ether_poll_register(igb_poll, ifp);
1139				if (error)
1140					return (error);
1141				IGB_CORE_LOCK(adapter);
1142				igb_disable_intr(adapter);
1143				ifp->if_capenable |= IFCAP_POLLING;
1144				IGB_CORE_UNLOCK(adapter);
1145			} else {
1146				error = ether_poll_deregister(ifp);
1147				/* Enable interrupt even in error case */
1148				IGB_CORE_LOCK(adapter);
1149				igb_enable_intr(adapter);
1150				ifp->if_capenable &= ~IFCAP_POLLING;
1151				IGB_CORE_UNLOCK(adapter);
1152			}
1153		}
1154#endif
1155		if (mask & IFCAP_HWCSUM) {
1156			ifp->if_capenable ^= IFCAP_HWCSUM;
1157			reinit = 1;
1158		}
1159		if (mask & IFCAP_TSO4) {
1160			ifp->if_capenable ^= IFCAP_TSO4;
1161			reinit = 1;
1162		}
1163		if (mask & IFCAP_VLAN_HWTAGGING) {
1164			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1165			reinit = 1;
1166		}
1167		if (mask & IFCAP_VLAN_HWFILTER) {
1168			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1169			reinit = 1;
1170		}
1171		if (mask & IFCAP_VLAN_HWTSO) {
1172			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1173			reinit = 1;
1174		}
1175		if (mask & IFCAP_LRO) {
1176			ifp->if_capenable ^= IFCAP_LRO;
1177			reinit = 1;
1178		}
1179		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1180			igb_init(adapter);
1181		VLAN_CAPABILITIES(ifp);
1182		break;
1183	    }
1184
1185	default:
1186		error = ether_ioctl(ifp, command, data);
1187		break;
1188	}
1189
1190	return (error);
1191}
1192
1193
1194/*********************************************************************
1195 *  Init entry point
1196 *
1197 *  This routine is used in two ways. It is used by the stack as
1198 *  init entry point in network interface structure. It is also used
1199 *  by the driver as a hw/sw initialization routine to get to a
1200 *  consistent state.
1201 *
1202 *  return 0 on success, positive on failure
1203 **********************************************************************/
1204
1205static void
1206igb_init_locked(struct adapter *adapter)
1207{
1208	struct ifnet	*ifp = adapter->ifp;
1209	device_t	dev = adapter->dev;
1210
1211	INIT_DEBUGOUT("igb_init: begin");
1212
1213	IGB_CORE_LOCK_ASSERT(adapter);
1214
1215	igb_disable_intr(adapter);
1216	callout_stop(&adapter->timer);
1217
1218	/* Get the latest mac address, User can use a LAA */
1219        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1220              ETHER_ADDR_LEN);
1221
1222	/* Put the address into the Receive Address Array */
1223	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1224
1225	igb_reset(adapter);
1226	igb_update_link_status(adapter);
1227
1228	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1229
1230	/* Set hardware offload abilities */
1231	ifp->if_hwassist = 0;
1232	if (ifp->if_capenable & IFCAP_TXCSUM) {
1233		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1234#if __FreeBSD_version >= 800000
1235		if (adapter->hw.mac.type == e1000_82576)
1236			ifp->if_hwassist |= CSUM_SCTP;
1237#endif
1238	}
1239
1240	if (ifp->if_capenable & IFCAP_TSO4)
1241		ifp->if_hwassist |= CSUM_TSO;
1242
1243	/* Configure for OS presence */
1244	igb_init_manageability(adapter);
1245
1246	/* Prepare transmit descriptors and buffers */
1247	igb_setup_transmit_structures(adapter);
1248	igb_initialize_transmit_units(adapter);
1249
1250	/* Setup Multicast table */
1251	igb_set_multi(adapter);
1252
1253	/*
1254	** Figure out the desired mbuf pool
1255	** for doing jumbo/packetsplit
1256	*/
1257	if (adapter->max_frame_size <= 2048)
1258		adapter->rx_mbuf_sz = MCLBYTES;
1259	else if (adapter->max_frame_size <= 4096)
1260		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1261	else
1262		adapter->rx_mbuf_sz = MJUM9BYTES;
1263
1264	/* Prepare receive descriptors and buffers */
1265	if (igb_setup_receive_structures(adapter)) {
1266		device_printf(dev, "Could not setup receive structures\n");
1267		return;
1268	}
1269	igb_initialize_receive_units(adapter);
1270
1271        /* Enable VLAN support */
1272	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1273		igb_setup_vlan_hw_support(adapter);
1274
1275	/* Don't lose promiscuous settings */
1276	igb_set_promisc(adapter);
1277
1278	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1279	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1280
1281	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1282	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1283
1284	if (adapter->msix > 1) /* Set up queue routing */
1285		igb_configure_queues(adapter);
1286
1287	/* this clears any pending interrupts */
1288	E1000_READ_REG(&adapter->hw, E1000_ICR);
1289#ifdef DEVICE_POLLING
1290	/*
1291	 * Only enable interrupts if we are not polling, make sure
1292	 * they are off otherwise.
1293	 */
1294	if (ifp->if_capenable & IFCAP_POLLING)
1295		igb_disable_intr(adapter);
1296	else
1297#endif /* DEVICE_POLLING */
1298	{
1299		igb_enable_intr(adapter);
1300		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1301	}
1302
1303	/* Set Energy Efficient Ethernet */
1304
1305	e1000_set_eee_i350(&adapter->hw);
1306}
1307
1308static void
1309igb_init(void *arg)
1310{
1311	struct adapter *adapter = arg;
1312
1313	IGB_CORE_LOCK(adapter);
1314	igb_init_locked(adapter);
1315	IGB_CORE_UNLOCK(adapter);
1316}
1317
1318
1319static void
1320igb_handle_que(void *context, int pending)
1321{
1322	struct igb_queue *que = context;
1323	struct adapter *adapter = que->adapter;
1324	struct tx_ring *txr = que->txr;
1325	struct ifnet	*ifp = adapter->ifp;
1326
1327	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1328		bool	more;
1329
1330		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1331
1332		IGB_TX_LOCK(txr);
1333		if (igb_txeof(txr))
1334			more = TRUE;
1335#if __FreeBSD_version >= 800000
1336		/* Process the stack queue only if not depleted */
1337		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1338		    !drbr_empty(ifp, txr->br))
1339			igb_mq_start_locked(ifp, txr, NULL);
1340#else
1341		igb_start_locked(txr, ifp);
1342#endif
1343		IGB_TX_UNLOCK(txr);
1344		/* Do we need another? */
1345		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1346			taskqueue_enqueue(que->tq, &que->que_task);
1347			return;
1348		}
1349	}
1350
1351#ifdef DEVICE_POLLING
1352	if (ifp->if_capenable & IFCAP_POLLING)
1353		return;
1354#endif
1355	/* Reenable this interrupt */
1356	if (que->eims)
1357		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1358	else
1359		igb_enable_intr(adapter);
1360}
1361
1362/* Deal with link in a sleepable context */
1363static void
1364igb_handle_link(void *context, int pending)
1365{
1366	struct adapter *adapter = context;
1367
1368	adapter->hw.mac.get_link_status = 1;
1369	igb_update_link_status(adapter);
1370}
1371
1372/*********************************************************************
1373 *
1374 *  MSI/Legacy Deferred
1375 *  Interrupt Service routine
1376 *
1377 *********************************************************************/
1378static int
1379igb_irq_fast(void *arg)
1380{
1381	struct adapter		*adapter = arg;
1382	struct igb_queue	*que = adapter->queues;
1383	u32			reg_icr;
1384
1385
1386	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1387
1388	/* Hot eject?  */
1389	if (reg_icr == 0xffffffff)
1390		return FILTER_STRAY;
1391
1392	/* Definitely not our interrupt.  */
1393	if (reg_icr == 0x0)
1394		return FILTER_STRAY;
1395
1396	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1397		return FILTER_STRAY;
1398
1399	/*
1400	 * Mask interrupts until the taskqueue is finished running.  This is
1401	 * cheap, just assume that it is needed.  This also works around the
1402	 * MSI message reordering errata on certain systems.
1403	 */
1404	igb_disable_intr(adapter);
1405	taskqueue_enqueue(que->tq, &que->que_task);
1406
1407	/* Link status change */
1408	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1409		taskqueue_enqueue(que->tq, &adapter->link_task);
1410
1411	if (reg_icr & E1000_ICR_RXO)
1412		adapter->rx_overruns++;
1413	return FILTER_HANDLED;
1414}
1415
1416#ifdef DEVICE_POLLING
1417/*********************************************************************
1418 *
1419 *  Legacy polling routine : if using this code you MUST be sure that
1420 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1421 *
1422 *********************************************************************/
1423#if __FreeBSD_version >= 800000
1424#define POLL_RETURN_COUNT(a) (a)
1425static int
1426#else
1427#define POLL_RETURN_COUNT(a)
1428static void
1429#endif
1430igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1431{
1432	struct adapter		*adapter = ifp->if_softc;
1433	struct igb_queue	*que = adapter->queues;
1434	struct tx_ring		*txr = adapter->tx_rings;
1435	u32			reg_icr, rx_done = 0;
1436	u32			loop = IGB_MAX_LOOP;
1437	bool			more;
1438
1439	IGB_CORE_LOCK(adapter);
1440	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1441		IGB_CORE_UNLOCK(adapter);
1442		return POLL_RETURN_COUNT(rx_done);
1443	}
1444
1445	if (cmd == POLL_AND_CHECK_STATUS) {
1446		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1447		/* Link status change */
1448		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1449			igb_handle_link(adapter, 0);
1450
1451		if (reg_icr & E1000_ICR_RXO)
1452			adapter->rx_overruns++;
1453	}
1454	IGB_CORE_UNLOCK(adapter);
1455
1456	igb_rxeof(que, count, &rx_done);
1457
1458	IGB_TX_LOCK(txr);
1459	do {
1460		more = igb_txeof(txr);
1461	} while (loop-- && more);
1462#if __FreeBSD_version >= 800000
1463	if (!drbr_empty(ifp, txr->br))
1464		igb_mq_start_locked(ifp, txr, NULL);
1465#else
1466	igb_start_locked(txr, ifp);
1467#endif
1468	IGB_TX_UNLOCK(txr);
1469	return POLL_RETURN_COUNT(rx_done);
1470}
1471#endif /* DEVICE_POLLING */
1472
1473/*********************************************************************
1474 *
1475 *  MSIX Que Interrupt Service routine
1476 *
1477 **********************************************************************/
1478static void
1479igb_msix_que(void *arg)
1480{
1481	struct igb_queue *que = arg;
1482	struct adapter *adapter = que->adapter;
1483	struct tx_ring *txr = que->txr;
1484	struct rx_ring *rxr = que->rxr;
1485	u32		newitr = 0;
1486	bool		more_tx, more_rx;
1487
1488	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1489	++que->irqs;
1490
1491	IGB_TX_LOCK(txr);
1492	more_tx = igb_txeof(txr);
1493	IGB_TX_UNLOCK(txr);
1494
1495	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1496
1497	if (adapter->enable_aim == FALSE)
1498		goto no_calc;
1499	/*
1500	** Do Adaptive Interrupt Moderation:
1501        **  - Write out last calculated setting
1502	**  - Calculate based on average size over
1503	**    the last interval.
1504	*/
1505        if (que->eitr_setting)
1506                E1000_WRITE_REG(&adapter->hw,
1507                    E1000_EITR(que->msix), que->eitr_setting);
1508
1509        que->eitr_setting = 0;
1510
1511        /* Idle, do nothing */
1512        if ((txr->bytes == 0) && (rxr->bytes == 0))
1513                goto no_calc;
1514
1515        /* Used half Default if sub-gig */
1516        if (adapter->link_speed != 1000)
1517                newitr = IGB_DEFAULT_ITR / 2;
1518        else {
1519		if ((txr->bytes) && (txr->packets))
1520                	newitr = txr->bytes/txr->packets;
1521		if ((rxr->bytes) && (rxr->packets))
1522			newitr = max(newitr,
1523			    (rxr->bytes / rxr->packets));
1524                newitr += 24; /* account for hardware frame, crc */
1525		/* set an upper boundary */
1526		newitr = min(newitr, 3000);
1527		/* Be nice to the mid range */
1528                if ((newitr > 300) && (newitr < 1200))
1529                        newitr = (newitr / 3);
1530                else
1531                        newitr = (newitr / 2);
1532        }
1533        newitr &= 0x7FFC;  /* Mask invalid bits */
1534        if (adapter->hw.mac.type == e1000_82575)
1535                newitr |= newitr << 16;
1536        else
1537                newitr |= E1000_EITR_CNT_IGNR;
1538
1539        /* save for next interrupt */
1540        que->eitr_setting = newitr;
1541
1542        /* Reset state */
1543        txr->bytes = 0;
1544        txr->packets = 0;
1545        rxr->bytes = 0;
1546        rxr->packets = 0;
1547
1548no_calc:
1549	/* Schedule a clean task if needed*/
1550	if (more_tx || more_rx)
1551		taskqueue_enqueue(que->tq, &que->que_task);
1552	else
1553		/* Reenable this interrupt */
1554		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1555	return;
1556}
1557
1558
1559/*********************************************************************
1560 *
1561 *  MSIX Link Interrupt Service routine
1562 *
1563 **********************************************************************/
1564
1565static void
1566igb_msix_link(void *arg)
1567{
1568	struct adapter	*adapter = arg;
1569	u32       	icr;
1570
1571	++adapter->link_irq;
1572	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1573	if (!(icr & E1000_ICR_LSC))
1574		goto spurious;
1575	igb_handle_link(adapter, 0);
1576
1577spurious:
1578	/* Rearm */
1579	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1580	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1581	return;
1582}
1583
1584
1585/*********************************************************************
1586 *
1587 *  Media Ioctl callback
1588 *
1589 *  This routine is called whenever the user queries the status of
1590 *  the interface using ifconfig.
1591 *
1592 **********************************************************************/
1593static void
1594igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1595{
1596	struct adapter *adapter = ifp->if_softc;
1597	u_char fiber_type = IFM_1000_SX;
1598
1599	INIT_DEBUGOUT("igb_media_status: begin");
1600
1601	IGB_CORE_LOCK(adapter);
1602	igb_update_link_status(adapter);
1603
1604	ifmr->ifm_status = IFM_AVALID;
1605	ifmr->ifm_active = IFM_ETHER;
1606
1607	if (!adapter->link_active) {
1608		IGB_CORE_UNLOCK(adapter);
1609		return;
1610	}
1611
1612	ifmr->ifm_status |= IFM_ACTIVE;
1613
1614	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1615	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1616		ifmr->ifm_active |= fiber_type | IFM_FDX;
1617	else {
1618		switch (adapter->link_speed) {
1619		case 10:
1620			ifmr->ifm_active |= IFM_10_T;
1621			break;
1622		case 100:
1623			ifmr->ifm_active |= IFM_100_TX;
1624			break;
1625		case 1000:
1626			ifmr->ifm_active |= IFM_1000_T;
1627			break;
1628		}
1629		if (adapter->link_duplex == FULL_DUPLEX)
1630			ifmr->ifm_active |= IFM_FDX;
1631		else
1632			ifmr->ifm_active |= IFM_HDX;
1633	}
1634	IGB_CORE_UNLOCK(adapter);
1635}
1636
1637/*********************************************************************
1638 *
1639 *  Media Ioctl callback
1640 *
1641 *  This routine is called when the user changes speed/duplex using
1642 *  media/mediopt option with ifconfig.
1643 *
1644 **********************************************************************/
1645static int
1646igb_media_change(struct ifnet *ifp)
1647{
1648	struct adapter *adapter = ifp->if_softc;
1649	struct ifmedia  *ifm = &adapter->media;
1650
1651	INIT_DEBUGOUT("igb_media_change: begin");
1652
1653	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1654		return (EINVAL);
1655
1656	IGB_CORE_LOCK(adapter);
1657	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1658	case IFM_AUTO:
1659		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1660		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1661		break;
1662	case IFM_1000_LX:
1663	case IFM_1000_SX:
1664	case IFM_1000_T:
1665		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1666		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1667		break;
1668	case IFM_100_TX:
1669		adapter->hw.mac.autoneg = FALSE;
1670		adapter->hw.phy.autoneg_advertised = 0;
1671		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1672			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1673		else
1674			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1675		break;
1676	case IFM_10_T:
1677		adapter->hw.mac.autoneg = FALSE;
1678		adapter->hw.phy.autoneg_advertised = 0;
1679		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1680			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1681		else
1682			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1683		break;
1684	default:
1685		device_printf(adapter->dev, "Unsupported media type\n");
1686	}
1687
1688	igb_init_locked(adapter);
1689	IGB_CORE_UNLOCK(adapter);
1690
1691	return (0);
1692}
1693
1694
1695/*********************************************************************
1696 *
1697 *  This routine maps the mbufs to Advanced TX descriptors.
1698 *
1699 **********************************************************************/
1700static int
1701igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1702{
1703	struct adapter		*adapter = txr->adapter;
1704	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1705	bus_dmamap_t		map;
1706	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1707	union e1000_adv_tx_desc	*txd = NULL;
1708	struct mbuf		*m_head = *m_headp;
1709	struct ether_vlan_header *eh = NULL;
1710	struct ip		*ip = NULL;
1711	struct tcphdr		*th = NULL;
1712	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1713	int			ehdrlen, poff;
1714	int			nsegs, i, first, last = 0;
1715	int			error, do_tso, remap = 1;
1716
1717	/* Set basic descriptor constants */
1718	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1719	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1720	if (m_head->m_flags & M_VLANTAG)
1721		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1722
1723retry:
1724	m_head = *m_headp;
1725	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1726	hdrlen = ehdrlen = poff = 0;
1727
1728	/*
1729	 * Intel recommends entire IP/TCP header length reside in a single
1730	 * buffer. If multiple descriptors are used to describe the IP and
1731	 * TCP header, each descriptor should describe one or more
1732	 * complete headers; descriptors referencing only parts of headers
1733	 * are not supported. If all layer headers are not coalesced into
1734	 * a single buffer, each buffer should not cross a 4KB boundary,
1735	 * or be larger than the maximum read request size.
1736	 * Controller also requires modifing IP/TCP header to make TSO work
1737	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1738	 * IP/TCP header into a single buffer to meet the requirement of
1739	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1740	 * which also has similiar restrictions.
1741	 */
1742	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1743		if (do_tso || (m_head->m_next != NULL &&
1744		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1745			if (M_WRITABLE(*m_headp) == 0) {
1746				m_head = m_dup(*m_headp, M_DONTWAIT);
1747				m_freem(*m_headp);
1748				if (m_head == NULL) {
1749					*m_headp = NULL;
1750					return (ENOBUFS);
1751				}
1752				*m_headp = m_head;
1753			}
1754		}
1755		/*
1756		 * Assume IPv4, we don't have TSO/checksum offload support
1757		 * for IPv6 yet.
1758		 */
1759		ehdrlen = sizeof(struct ether_header);
1760		m_head = m_pullup(m_head, ehdrlen);
1761		if (m_head == NULL) {
1762			*m_headp = NULL;
1763			return (ENOBUFS);
1764		}
1765		eh = mtod(m_head, struct ether_vlan_header *);
1766		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1767			ehdrlen = sizeof(struct ether_vlan_header);
1768			m_head = m_pullup(m_head, ehdrlen);
1769			if (m_head == NULL) {
1770				*m_headp = NULL;
1771				return (ENOBUFS);
1772			}
1773		}
1774		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1775		if (m_head == NULL) {
1776			*m_headp = NULL;
1777			return (ENOBUFS);
1778		}
1779		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1780		poff = ehdrlen + (ip->ip_hl << 2);
1781		if (do_tso) {
1782			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1783			if (m_head == NULL) {
1784				*m_headp = NULL;
1785				return (ENOBUFS);
1786			}
1787			/*
1788			 * The pseudo TCP checksum does not include TCP payload
1789			 * length so driver should recompute the checksum here
1790			 * what hardware expect to see. This is adherence of
1791			 * Microsoft's Large Send specification.
1792			 */
1793			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1794			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1795			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1796			/* Keep track of the full header length */
1797			hdrlen = poff + (th->th_off << 2);
1798		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1799			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1800			if (m_head == NULL) {
1801				*m_headp = NULL;
1802				return (ENOBUFS);
1803			}
1804			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1805			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1806			if (m_head == NULL) {
1807				*m_headp = NULL;
1808				return (ENOBUFS);
1809			}
1810			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1811			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1812		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1813			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1814			if (m_head == NULL) {
1815				*m_headp = NULL;
1816				return (ENOBUFS);
1817			}
1818			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1819		}
1820		*m_headp = m_head;
1821	}
1822
1823	/*
1824	 * Map the packet for DMA
1825	 *
1826	 * Capture the first descriptor index,
1827	 * this descriptor will have the index
1828	 * of the EOP which is the only one that
1829	 * now gets a DONE bit writeback.
1830	 */
1831	first = txr->next_avail_desc;
1832	tx_buffer = &txr->tx_buffers[first];
1833	tx_buffer_mapped = tx_buffer;
1834	map = tx_buffer->map;
1835
1836	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1837	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1838
1839	/*
1840	 * There are two types of errors we can (try) to handle:
1841	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1842	 *   out of segments.  Defragment the mbuf chain and try again.
1843	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1844	 *   at this point in time.  Defer sending and try again later.
1845	 * All other errors, in particular EINVAL, are fatal and prevent the
1846	 * mbuf chain from ever going through.  Drop it and report error.
1847	 */
1848	if (error == EFBIG && remap) {
1849		struct mbuf *m;
1850
1851		m = m_defrag(*m_headp, M_DONTWAIT);
1852		if (m == NULL) {
1853			adapter->mbuf_defrag_failed++;
1854			m_freem(*m_headp);
1855			*m_headp = NULL;
1856			return (ENOBUFS);
1857		}
1858		*m_headp = m;
1859
1860		/* Try it again, but only once */
1861		remap = 0;
1862		goto retry;
1863	} else if (error == ENOMEM) {
1864		adapter->no_tx_dma_setup++;
1865		return (error);
1866	} else if (error != 0) {
1867		adapter->no_tx_dma_setup++;
1868		m_freem(*m_headp);
1869		*m_headp = NULL;
1870		return (error);
1871	}
1872
1873	/*
1874	** Make sure we don't overrun the ring,
1875	** we need nsegs descriptors and one for
1876	** the context descriptor used for the
1877	** offloads.
1878	*/
1879        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1880                txr->no_desc_avail++;
1881		bus_dmamap_unload(txr->txtag, map);
1882		return (ENOBUFS);
1883        }
1884	m_head = *m_headp;
1885
1886	/* Do hardware assists:
1887         * Set up the context descriptor, used
1888         * when any hardware offload is done.
1889         * This includes CSUM, VLAN, and TSO.
1890         * It will use the first descriptor.
1891         */
1892
1893	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1894		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1895			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1896			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1897			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1898		} else
1899			return (ENXIO);
1900	} else if (igb_tx_ctx_setup(txr, m_head))
1901			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1902
1903	/* Calculate payload length */
1904	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1905	    << E1000_ADVTXD_PAYLEN_SHIFT);
1906
1907	/* 82575 needs the queue index added */
1908	if (adapter->hw.mac.type == e1000_82575)
1909		olinfo_status |= txr->me << 4;
1910
1911	/* Set up our transmit descriptors */
1912	i = txr->next_avail_desc;
1913	for (int j = 0; j < nsegs; j++) {
1914		bus_size_t seg_len;
1915		bus_addr_t seg_addr;
1916
1917		tx_buffer = &txr->tx_buffers[i];
1918		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1919		seg_addr = segs[j].ds_addr;
1920		seg_len  = segs[j].ds_len;
1921
1922		txd->read.buffer_addr = htole64(seg_addr);
1923		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1924		txd->read.olinfo_status = htole32(olinfo_status);
1925		last = i;
1926		if (++i == adapter->num_tx_desc)
1927			i = 0;
1928		tx_buffer->m_head = NULL;
1929		tx_buffer->next_eop = -1;
1930	}
1931
1932	txr->next_avail_desc = i;
1933	txr->tx_avail -= nsegs;
1934        tx_buffer->m_head = m_head;
1935
1936	/*
1937	** Here we swap the map so the last descriptor,
1938	** which gets the completion interrupt has the
1939	** real map, and the first descriptor gets the
1940	** unused map from this descriptor.
1941	*/
1942	tx_buffer_mapped->map = tx_buffer->map;
1943	tx_buffer->map = map;
1944        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1945
1946        /*
1947         * Last Descriptor of Packet
1948	 * needs End Of Packet (EOP)
1949	 * and Report Status (RS)
1950         */
1951        txd->read.cmd_type_len |=
1952	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1953	/*
1954	 * Keep track in the first buffer which
1955	 * descriptor will be written back
1956	 */
1957	tx_buffer = &txr->tx_buffers[first];
1958	tx_buffer->next_eop = last;
1959	/* Update the watchdog time early and often */
1960	txr->watchdog_time = ticks;
1961
1962	/*
1963	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1964	 * that this frame is available to transmit.
1965	 */
1966	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1967	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1968	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1969	++txr->tx_packets;
1970
1971	return (0);
1972}
1973static void
1974igb_set_promisc(struct adapter *adapter)
1975{
1976	struct ifnet	*ifp = adapter->ifp;
1977	struct e1000_hw *hw = &adapter->hw;
1978	u32		reg;
1979
1980	if (adapter->vf_ifp) {
1981		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1982		return;
1983	}
1984
1985	reg = E1000_READ_REG(hw, E1000_RCTL);
1986	if (ifp->if_flags & IFF_PROMISC) {
1987		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1988		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1989	} else if (ifp->if_flags & IFF_ALLMULTI) {
1990		reg |= E1000_RCTL_MPE;
1991		reg &= ~E1000_RCTL_UPE;
1992		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1993	}
1994}
1995
1996static void
1997igb_disable_promisc(struct adapter *adapter)
1998{
1999	struct e1000_hw *hw = &adapter->hw;
2000	u32		reg;
2001
2002	if (adapter->vf_ifp) {
2003		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2004		return;
2005	}
2006	reg = E1000_READ_REG(hw, E1000_RCTL);
2007	reg &=  (~E1000_RCTL_UPE);
2008	reg &=  (~E1000_RCTL_MPE);
2009	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2010}
2011
2012
2013/*********************************************************************
2014 *  Multicast Update
2015 *
2016 *  This routine is called whenever multicast address list is updated.
2017 *
2018 **********************************************************************/
2019
2020static void
2021igb_set_multi(struct adapter *adapter)
2022{
2023	struct ifnet	*ifp = adapter->ifp;
2024	struct ifmultiaddr *ifma;
2025	u32 reg_rctl = 0;
2026	u8  *mta;
2027
2028	int mcnt = 0;
2029
2030	IOCTL_DEBUGOUT("igb_set_multi: begin");
2031
2032	mta = adapter->mta;
2033	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2034	    MAX_NUM_MULTICAST_ADDRESSES);
2035
2036#if __FreeBSD_version < 800000
2037	IF_ADDR_LOCK(ifp);
2038#else
2039	if_maddr_rlock(ifp);
2040#endif
2041	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2042		if (ifma->ifma_addr->sa_family != AF_LINK)
2043			continue;
2044
2045		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2046			break;
2047
2048		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2049		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2050		mcnt++;
2051	}
2052#if __FreeBSD_version < 800000
2053	IF_ADDR_UNLOCK(ifp);
2054#else
2055	if_maddr_runlock(ifp);
2056#endif
2057
2058	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2059		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2060		reg_rctl |= E1000_RCTL_MPE;
2061		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2062	} else
2063		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2064}
2065
2066
2067/*********************************************************************
2068 *  Timer routine:
2069 *  	This routine checks for link status,
2070 *	updates statistics, and does the watchdog.
2071 *
2072 **********************************************************************/
2073
2074static void
2075igb_local_timer(void *arg)
2076{
2077	struct adapter		*adapter = arg;
2078	device_t		dev = adapter->dev;
2079	struct ifnet		*ifp = adapter->ifp;
2080	struct tx_ring		*txr = adapter->tx_rings;
2081	struct igb_queue	*que = adapter->queues;
2082	int			hung = 0, busy = 0;
2083
2084
2085	IGB_CORE_LOCK_ASSERT(adapter);
2086
2087	igb_update_link_status(adapter);
2088	igb_update_stats_counters(adapter);
2089
2090        /*
2091        ** Check the TX queues status
2092	**	- central locked handling of OACTIVE
2093	**	- watchdog only if all queues show hung
2094        */
2095	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2096		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2097		    (adapter->pause_frames == 0))
2098			++hung;
2099		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2100			++busy;
2101		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2102			taskqueue_enqueue(que->tq, &que->que_task);
2103	}
2104	if (hung == adapter->num_queues)
2105		goto timeout;
2106	if (busy == adapter->num_queues)
2107		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2108	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2109	    (busy < adapter->num_queues))
2110		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2111
2112	adapter->pause_frames = 0;
2113	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2114#ifndef DEVICE_POLLING
2115	/* Schedule all queue interrupts - deadlock protection */
2116	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2117#endif
2118	return;
2119
2120timeout:
2121	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2122	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2123            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2124            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2125	device_printf(dev,"TX(%d) desc avail = %d,"
2126            "Next TX to Clean = %d\n",
2127            txr->me, txr->tx_avail, txr->next_to_clean);
2128	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2129	adapter->watchdog_events++;
2130	igb_init_locked(adapter);
2131}
2132
2133static void
2134igb_update_link_status(struct adapter *adapter)
2135{
2136	struct e1000_hw *hw = &adapter->hw;
2137	struct ifnet *ifp = adapter->ifp;
2138	device_t dev = adapter->dev;
2139	struct tx_ring *txr = adapter->tx_rings;
2140	u32 link_check, thstat, ctrl;
2141
2142	link_check = thstat = ctrl = 0;
2143
2144	/* Get the cached link value or read for real */
2145        switch (hw->phy.media_type) {
2146        case e1000_media_type_copper:
2147                if (hw->mac.get_link_status) {
2148			/* Do the work to read phy */
2149                        e1000_check_for_link(hw);
2150                        link_check = !hw->mac.get_link_status;
2151                } else
2152                        link_check = TRUE;
2153                break;
2154        case e1000_media_type_fiber:
2155                e1000_check_for_link(hw);
2156                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2157                                 E1000_STATUS_LU);
2158                break;
2159        case e1000_media_type_internal_serdes:
2160                e1000_check_for_link(hw);
2161                link_check = adapter->hw.mac.serdes_has_link;
2162                break;
2163	/* VF device is type_unknown */
2164        case e1000_media_type_unknown:
2165                e1000_check_for_link(hw);
2166		link_check = !hw->mac.get_link_status;
2167		/* Fall thru */
2168        default:
2169                break;
2170        }
2171
2172	/* Check for thermal downshift or shutdown */
2173	if (hw->mac.type == e1000_i350) {
2174		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2175		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2176	}
2177
2178	/* Now we check if a transition has happened */
2179	if (link_check && (adapter->link_active == 0)) {
2180		e1000_get_speed_and_duplex(&adapter->hw,
2181		    &adapter->link_speed, &adapter->link_duplex);
2182		if (bootverbose)
2183			device_printf(dev, "Link is up %d Mbps %s\n",
2184			    adapter->link_speed,
2185			    ((adapter->link_duplex == FULL_DUPLEX) ?
2186			    "Full Duplex" : "Half Duplex"));
2187		adapter->link_active = 1;
2188		ifp->if_baudrate = adapter->link_speed * 1000000;
2189		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2190		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2191			device_printf(dev, "Link: thermal downshift\n");
2192		/* This can sleep */
2193		if_link_state_change(ifp, LINK_STATE_UP);
2194	} else if (!link_check && (adapter->link_active == 1)) {
2195		ifp->if_baudrate = adapter->link_speed = 0;
2196		adapter->link_duplex = 0;
2197		if (bootverbose)
2198			device_printf(dev, "Link is Down\n");
2199		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2200		    (thstat & E1000_THSTAT_PWR_DOWN))
2201			device_printf(dev, "Link: thermal shutdown\n");
2202		adapter->link_active = 0;
2203		/* This can sleep */
2204		if_link_state_change(ifp, LINK_STATE_DOWN);
2205		/* Reset queue state */
2206		for (int i = 0; i < adapter->num_queues; i++, txr++)
2207			txr->queue_status = IGB_QUEUE_IDLE;
2208	}
2209}
2210
2211/*********************************************************************
2212 *
2213 *  This routine disables all traffic on the adapter by issuing a
2214 *  global reset on the MAC and deallocates TX/RX buffers.
2215 *
2216 **********************************************************************/
2217
2218static void
2219igb_stop(void *arg)
2220{
2221	struct adapter	*adapter = arg;
2222	struct ifnet	*ifp = adapter->ifp;
2223	struct tx_ring *txr = adapter->tx_rings;
2224
2225	IGB_CORE_LOCK_ASSERT(adapter);
2226
2227	INIT_DEBUGOUT("igb_stop: begin");
2228
2229	igb_disable_intr(adapter);
2230
2231	callout_stop(&adapter->timer);
2232
2233	/* Tell the stack that the interface is no longer active */
2234	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2235	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2236
2237	/* Disarm watchdog timer. */
2238	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2239		IGB_TX_LOCK(txr);
2240		txr->queue_status = IGB_QUEUE_IDLE;
2241		IGB_TX_UNLOCK(txr);
2242	}
2243
2244	e1000_reset_hw(&adapter->hw);
2245	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2246
2247	e1000_led_off(&adapter->hw);
2248	e1000_cleanup_led(&adapter->hw);
2249}
2250
2251
2252/*********************************************************************
2253 *
2254 *  Determine hardware revision.
2255 *
2256 **********************************************************************/
2257static void
2258igb_identify_hardware(struct adapter *adapter)
2259{
2260	device_t dev = adapter->dev;
2261
2262	/* Make sure our PCI config space has the necessary stuff set */
2263	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2264	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2265	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2266		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2267		    "bits were not set!\n");
2268		adapter->hw.bus.pci_cmd_word |=
2269		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2270		pci_write_config(dev, PCIR_COMMAND,
2271		    adapter->hw.bus.pci_cmd_word, 2);
2272	}
2273
2274	/* Save off the information about this board */
2275	adapter->hw.vendor_id = pci_get_vendor(dev);
2276	adapter->hw.device_id = pci_get_device(dev);
2277	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2278	adapter->hw.subsystem_vendor_id =
2279	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2280	adapter->hw.subsystem_device_id =
2281	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2282
2283	/* Set MAC type early for PCI setup */
2284	e1000_set_mac_type(&adapter->hw);
2285
2286	/* Are we a VF device? */
2287	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2288	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2289		adapter->vf_ifp = 1;
2290	else
2291		adapter->vf_ifp = 0;
2292}
2293
2294static int
2295igb_allocate_pci_resources(struct adapter *adapter)
2296{
2297	device_t	dev = adapter->dev;
2298	int		rid;
2299
2300	rid = PCIR_BAR(0);
2301	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2302	    &rid, RF_ACTIVE);
2303	if (adapter->pci_mem == NULL) {
2304		device_printf(dev, "Unable to allocate bus resource: memory\n");
2305		return (ENXIO);
2306	}
2307	adapter->osdep.mem_bus_space_tag =
2308	    rman_get_bustag(adapter->pci_mem);
2309	adapter->osdep.mem_bus_space_handle =
2310	    rman_get_bushandle(adapter->pci_mem);
2311	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2312
2313	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2314
2315	/* This will setup either MSI/X or MSI */
2316	adapter->msix = igb_setup_msix(adapter);
2317	adapter->hw.back = &adapter->osdep;
2318
2319	return (0);
2320}
2321
2322/*********************************************************************
2323 *
2324 *  Setup the Legacy or MSI Interrupt handler
2325 *
2326 **********************************************************************/
2327static int
2328igb_allocate_legacy(struct adapter *adapter)
2329{
2330	device_t		dev = adapter->dev;
2331	struct igb_queue	*que = adapter->queues;
2332	int			error, rid = 0;
2333
2334	/* Turn off all interrupts */
2335	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2336
2337	/* MSI RID is 1 */
2338	if (adapter->msix == 1)
2339		rid = 1;
2340
2341	/* We allocate a single interrupt resource */
2342	adapter->res = bus_alloc_resource_any(dev,
2343	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2344	if (adapter->res == NULL) {
2345		device_printf(dev, "Unable to allocate bus resource: "
2346		    "interrupt\n");
2347		return (ENXIO);
2348	}
2349
2350	/*
2351	 * Try allocating a fast interrupt and the associated deferred
2352	 * processing contexts.
2353	 */
2354	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2355	/* Make tasklet for deferred link handling */
2356	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2357	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2358	    taskqueue_thread_enqueue, &que->tq);
2359	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2360	    device_get_nameunit(adapter->dev));
2361	if ((error = bus_setup_intr(dev, adapter->res,
2362	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2363	    adapter, &adapter->tag)) != 0) {
2364		device_printf(dev, "Failed to register fast interrupt "
2365			    "handler: %d\n", error);
2366		taskqueue_free(que->tq);
2367		que->tq = NULL;
2368		return (error);
2369	}
2370
2371	return (0);
2372}
2373
2374
2375/*********************************************************************
2376 *
2377 *  Setup the MSIX Queue Interrupt handlers:
2378 *
2379 **********************************************************************/
2380static int
2381igb_allocate_msix(struct adapter *adapter)
2382{
2383	device_t		dev = adapter->dev;
2384	struct igb_queue	*que = adapter->queues;
2385	int			error, rid, vector = 0;
2386
2387	/* Be sure to start with all interrupts disabled */
2388	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2389	E1000_WRITE_FLUSH(&adapter->hw);
2390
2391	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2392		rid = vector +1;
2393		que->res = bus_alloc_resource_any(dev,
2394		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2395		if (que->res == NULL) {
2396			device_printf(dev,
2397			    "Unable to allocate bus resource: "
2398			    "MSIX Queue Interrupt\n");
2399			return (ENXIO);
2400		}
2401		error = bus_setup_intr(dev, que->res,
2402	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2403		    igb_msix_que, que, &que->tag);
2404		if (error) {
2405			que->res = NULL;
2406			device_printf(dev, "Failed to register Queue handler");
2407			return (error);
2408		}
2409#if __FreeBSD_version >= 800504
2410		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2411#endif
2412		que->msix = vector;
2413		if (adapter->hw.mac.type == e1000_82575)
2414			que->eims = E1000_EICR_TX_QUEUE0 << i;
2415		else
2416			que->eims = 1 << vector;
2417		/*
2418		** Bind the msix vector, and thus the
2419		** rings to the corresponding cpu.
2420		*/
2421		if (adapter->num_queues > 1)
2422			bus_bind_intr(dev, que->res, i);
2423		/* Make tasklet for deferred handling */
2424		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2425		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2426		    taskqueue_thread_enqueue, &que->tq);
2427		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2428		    device_get_nameunit(adapter->dev));
2429	}
2430
2431	/* And Link */
2432	rid = vector + 1;
2433	adapter->res = bus_alloc_resource_any(dev,
2434	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2435	if (adapter->res == NULL) {
2436		device_printf(dev,
2437		    "Unable to allocate bus resource: "
2438		    "MSIX Link Interrupt\n");
2439		return (ENXIO);
2440	}
2441	if ((error = bus_setup_intr(dev, adapter->res,
2442	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2443	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2444		device_printf(dev, "Failed to register Link handler");
2445		return (error);
2446	}
2447#if __FreeBSD_version >= 800504
2448	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2449#endif
2450	adapter->linkvec = vector;
2451
2452	return (0);
2453}
2454
2455
2456static void
2457igb_configure_queues(struct adapter *adapter)
2458{
2459	struct	e1000_hw	*hw = &adapter->hw;
2460	struct	igb_queue	*que;
2461	u32			tmp, ivar = 0, newitr = 0;
2462
2463	/* First turn on RSS capability */
2464	if (adapter->hw.mac.type != e1000_82575)
2465		E1000_WRITE_REG(hw, E1000_GPIE,
2466		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2467		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2468
2469	/* Turn on MSIX */
2470	switch (adapter->hw.mac.type) {
2471	case e1000_82580:
2472	case e1000_i350:
2473	case e1000_vfadapt:
2474	case e1000_vfadapt_i350:
2475		/* RX entries */
2476		for (int i = 0; i < adapter->num_queues; i++) {
2477			u32 index = i >> 1;
2478			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2479			que = &adapter->queues[i];
2480			if (i & 1) {
2481				ivar &= 0xFF00FFFF;
2482				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2483			} else {
2484				ivar &= 0xFFFFFF00;
2485				ivar |= que->msix | E1000_IVAR_VALID;
2486			}
2487			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2488		}
2489		/* TX entries */
2490		for (int i = 0; i < adapter->num_queues; i++) {
2491			u32 index = i >> 1;
2492			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2493			que = &adapter->queues[i];
2494			if (i & 1) {
2495				ivar &= 0x00FFFFFF;
2496				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2497			} else {
2498				ivar &= 0xFFFF00FF;
2499				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2500			}
2501			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2502			adapter->que_mask |= que->eims;
2503		}
2504
2505		/* And for the link interrupt */
2506		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2507		adapter->link_mask = 1 << adapter->linkvec;
2508		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2509		break;
2510	case e1000_82576:
2511		/* RX entries */
2512		for (int i = 0; i < adapter->num_queues; i++) {
2513			u32 index = i & 0x7; /* Each IVAR has two entries */
2514			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2515			que = &adapter->queues[i];
2516			if (i < 8) {
2517				ivar &= 0xFFFFFF00;
2518				ivar |= que->msix | E1000_IVAR_VALID;
2519			} else {
2520				ivar &= 0xFF00FFFF;
2521				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2522			}
2523			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2524			adapter->que_mask |= que->eims;
2525		}
2526		/* TX entries */
2527		for (int i = 0; i < adapter->num_queues; i++) {
2528			u32 index = i & 0x7; /* Each IVAR has two entries */
2529			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2530			que = &adapter->queues[i];
2531			if (i < 8) {
2532				ivar &= 0xFFFF00FF;
2533				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2534			} else {
2535				ivar &= 0x00FFFFFF;
2536				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2537			}
2538			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2539			adapter->que_mask |= que->eims;
2540		}
2541
2542		/* And for the link interrupt */
2543		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2544		adapter->link_mask = 1 << adapter->linkvec;
2545		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2546		break;
2547
2548	case e1000_82575:
2549                /* enable MSI-X support*/
2550		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2551                tmp |= E1000_CTRL_EXT_PBA_CLR;
2552                /* Auto-Mask interrupts upon ICR read. */
2553                tmp |= E1000_CTRL_EXT_EIAME;
2554                tmp |= E1000_CTRL_EXT_IRCA;
2555                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2556
2557		/* Queues */
2558		for (int i = 0; i < adapter->num_queues; i++) {
2559			que = &adapter->queues[i];
2560			tmp = E1000_EICR_RX_QUEUE0 << i;
2561			tmp |= E1000_EICR_TX_QUEUE0 << i;
2562			que->eims = tmp;
2563			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2564			    i, que->eims);
2565			adapter->que_mask |= que->eims;
2566		}
2567
2568		/* Link */
2569		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2570		    E1000_EIMS_OTHER);
2571		adapter->link_mask |= E1000_EIMS_OTHER;
2572	default:
2573		break;
2574	}
2575
2576	/* Set the starting interrupt rate */
2577	if (igb_max_interrupt_rate > 0)
2578		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2579
2580        if (hw->mac.type == e1000_82575)
2581                newitr |= newitr << 16;
2582        else
2583                newitr |= E1000_EITR_CNT_IGNR;
2584
2585	for (int i = 0; i < adapter->num_queues; i++) {
2586		que = &adapter->queues[i];
2587		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2588	}
2589
2590	return;
2591}
2592
2593
2594static void
2595igb_free_pci_resources(struct adapter *adapter)
2596{
2597	struct		igb_queue *que = adapter->queues;
2598	device_t	dev = adapter->dev;
2599	int		rid;
2600
2601	/*
2602	** There is a slight possibility of a failure mode
2603	** in attach that will result in entering this function
2604	** before interrupt resources have been initialized, and
2605	** in that case we do not want to execute the loops below
2606	** We can detect this reliably by the state of the adapter
2607	** res pointer.
2608	*/
2609	if (adapter->res == NULL)
2610		goto mem;
2611
2612	/*
2613	 * First release all the interrupt resources:
2614	 */
2615	for (int i = 0; i < adapter->num_queues; i++, que++) {
2616		rid = que->msix + 1;
2617		if (que->tag != NULL) {
2618			bus_teardown_intr(dev, que->res, que->tag);
2619			que->tag = NULL;
2620		}
2621		if (que->res != NULL)
2622			bus_release_resource(dev,
2623			    SYS_RES_IRQ, rid, que->res);
2624	}
2625
2626	/* Clean the Legacy or Link interrupt last */
2627	if (adapter->linkvec) /* we are doing MSIX */
2628		rid = adapter->linkvec + 1;
2629	else
2630		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2631
2632	if (adapter->tag != NULL) {
2633		bus_teardown_intr(dev, adapter->res, adapter->tag);
2634		adapter->tag = NULL;
2635	}
2636	if (adapter->res != NULL)
2637		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2638
2639mem:
2640	if (adapter->msix)
2641		pci_release_msi(dev);
2642
2643	if (adapter->msix_mem != NULL)
2644		bus_release_resource(dev, SYS_RES_MEMORY,
2645		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2646
2647	if (adapter->pci_mem != NULL)
2648		bus_release_resource(dev, SYS_RES_MEMORY,
2649		    PCIR_BAR(0), adapter->pci_mem);
2650
2651}
2652
2653/*
2654 * Setup Either MSI/X or MSI
2655 */
2656static int
2657igb_setup_msix(struct adapter *adapter)
2658{
2659	device_t dev = adapter->dev;
2660	int rid, want, queues, msgs;
2661
2662	/* tuneable override */
2663	if (igb_enable_msix == 0)
2664		goto msi;
2665
2666	/* First try MSI/X */
2667	rid = PCIR_BAR(IGB_MSIX_BAR);
2668	adapter->msix_mem = bus_alloc_resource_any(dev,
2669	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2670       	if (!adapter->msix_mem) {
2671		/* May not be enabled */
2672		device_printf(adapter->dev,
2673		    "Unable to map MSIX table \n");
2674		goto msi;
2675	}
2676
2677	msgs = pci_msix_count(dev);
2678	if (msgs == 0) { /* system has msix disabled */
2679		bus_release_resource(dev, SYS_RES_MEMORY,
2680		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2681		adapter->msix_mem = NULL;
2682		goto msi;
2683	}
2684
2685	/* Figure out a reasonable auto config value */
2686	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2687
2688	/* Manual override */
2689	if (igb_num_queues != 0)
2690		queues = igb_num_queues;
2691	if (queues > 8)  /* max queues */
2692		queues = 8;
2693
2694	/* Can have max of 4 queues on 82575 */
2695	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2696		queues = 4;
2697
2698	/* Limit the VF devices to one queue */
2699	if (adapter->vf_ifp)
2700		queues = 1;
2701
2702	/*
2703	** One vector (RX/TX pair) per queue
2704	** plus an additional for Link interrupt
2705	*/
2706	want = queues + 1;
2707	if (msgs >= want)
2708		msgs = want;
2709	else {
2710               	device_printf(adapter->dev,
2711		    "MSIX Configuration Problem, "
2712		    "%d vectors configured, but %d queues wanted!\n",
2713		    msgs, want);
2714		return (ENXIO);
2715	}
2716	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2717               	device_printf(adapter->dev,
2718		    "Using MSIX interrupts with %d vectors\n", msgs);
2719		adapter->num_queues = queues;
2720		return (msgs);
2721	}
2722msi:
2723       	msgs = pci_msi_count(dev);
2724       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2725               	device_printf(adapter->dev,"Using MSI interrupt\n");
2726	return (msgs);
2727}
2728
2729/*********************************************************************
2730 *
2731 *  Set up an fresh starting state
2732 *
2733 **********************************************************************/
2734static void
2735igb_reset(struct adapter *adapter)
2736{
2737	device_t	dev = adapter->dev;
2738	struct e1000_hw *hw = &adapter->hw;
2739	struct e1000_fc_info *fc = &hw->fc;
2740	struct ifnet	*ifp = adapter->ifp;
2741	u32		pba = 0;
2742	u16		hwm;
2743
2744	INIT_DEBUGOUT("igb_reset: begin");
2745
2746	/* Let the firmware know the OS is in control */
2747	igb_get_hw_control(adapter);
2748
2749	/*
2750	 * Packet Buffer Allocation (PBA)
2751	 * Writing PBA sets the receive portion of the buffer
2752	 * the remainder is used for the transmit buffer.
2753	 */
2754	switch (hw->mac.type) {
2755	case e1000_82575:
2756		pba = E1000_PBA_32K;
2757		break;
2758	case e1000_82576:
2759	case e1000_vfadapt:
2760		pba = E1000_READ_REG(hw, E1000_RXPBS);
2761		pba &= E1000_RXPBS_SIZE_MASK_82576;
2762		break;
2763	case e1000_82580:
2764	case e1000_i350:
2765	case e1000_vfadapt_i350:
2766		pba = E1000_READ_REG(hw, E1000_RXPBS);
2767		pba = e1000_rxpbs_adjust_82580(pba);
2768		break;
2769	default:
2770		break;
2771	}
2772
2773	/* Special needs in case of Jumbo frames */
2774	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2775		u32 tx_space, min_tx, min_rx;
2776		pba = E1000_READ_REG(hw, E1000_PBA);
2777		tx_space = pba >> 16;
2778		pba &= 0xffff;
2779		min_tx = (adapter->max_frame_size +
2780		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2781		min_tx = roundup2(min_tx, 1024);
2782		min_tx >>= 10;
2783                min_rx = adapter->max_frame_size;
2784                min_rx = roundup2(min_rx, 1024);
2785                min_rx >>= 10;
2786		if (tx_space < min_tx &&
2787		    ((min_tx - tx_space) < pba)) {
2788			pba = pba - (min_tx - tx_space);
2789			/*
2790                         * if short on rx space, rx wins
2791                         * and must trump tx adjustment
2792			 */
2793                        if (pba < min_rx)
2794                                pba = min_rx;
2795		}
2796		E1000_WRITE_REG(hw, E1000_PBA, pba);
2797	}
2798
2799	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2800
2801	/*
2802	 * These parameters control the automatic generation (Tx) and
2803	 * response (Rx) to Ethernet PAUSE frames.
2804	 * - High water mark should allow for at least two frames to be
2805	 *   received after sending an XOFF.
2806	 * - Low water mark works best when it is very near the high water mark.
2807	 *   This allows the receiver to restart by sending XON when it has
2808	 *   drained a bit.
2809	 */
2810	hwm = min(((pba << 10) * 9 / 10),
2811	    ((pba << 10) - 2 * adapter->max_frame_size));
2812
2813	if (hw->mac.type < e1000_82576) {
2814		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2815		fc->low_water = fc->high_water - 8;
2816	} else {
2817		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2818		fc->low_water = fc->high_water - 16;
2819	}
2820
2821	fc->pause_time = IGB_FC_PAUSE_TIME;
2822	fc->send_xon = TRUE;
2823	if (adapter->fc)
2824		fc->requested_mode = adapter->fc;
2825	else
2826		fc->requested_mode = e1000_fc_default;
2827
2828	/* Issue a global reset */
2829	e1000_reset_hw(hw);
2830	E1000_WRITE_REG(hw, E1000_WUC, 0);
2831
2832	if (e1000_init_hw(hw) < 0)
2833		device_printf(dev, "Hardware Initialization Failed\n");
2834
2835	/* Setup DMA Coalescing */
2836	if (hw->mac.type == e1000_i350) {
2837		u32 reg = ~E1000_DMACR_DMAC_EN;
2838
2839		if (adapter->dmac == 0) { /* Disabling it */
2840			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2841			goto reset_out;
2842		}
2843
2844		hwm = (pba - 4) << 10;
2845		reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2846		    & E1000_DMACR_DMACTHR_MASK);
2847
2848		/* transition to L0x or L1 if available..*/
2849		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2850
2851		/* timer = value in adapter->dmac in 32usec intervals */
2852		reg |= (adapter->dmac >> 5);
2853		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2854
2855		/* No lower threshold */
2856		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2857
2858		/* set hwm to PBA -  2 * max frame size */
2859		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2860
2861		/* Set the interval before transition */
2862		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2863		reg |= 0x800000FF; /* 255 usec */
2864		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2865
2866		/* free space in tx packet buffer to wake from DMA coal */
2867		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2868		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2869
2870		/* make low power state decision controlled by DMA coal */
2871		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2872		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2873		    reg | E1000_PCIEMISC_LX_DECISION);
2874		device_printf(dev, "DMA Coalescing enabled\n");
2875	}
2876
2877reset_out:
2878	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2879	e1000_get_phy_info(hw);
2880	e1000_check_for_link(hw);
2881	return;
2882}
2883
2884/*********************************************************************
2885 *
2886 *  Setup networking device structure and register an interface.
2887 *
2888 **********************************************************************/
2889static int
2890igb_setup_interface(device_t dev, struct adapter *adapter)
2891{
2892	struct ifnet   *ifp;
2893
2894	INIT_DEBUGOUT("igb_setup_interface: begin");
2895
2896	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2897	if (ifp == NULL) {
2898		device_printf(dev, "can not allocate ifnet structure\n");
2899		return (-1);
2900	}
2901	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2902	ifp->if_mtu = ETHERMTU;
2903	ifp->if_init =  igb_init;
2904	ifp->if_softc = adapter;
2905	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2906	ifp->if_ioctl = igb_ioctl;
2907	ifp->if_start = igb_start;
2908#if __FreeBSD_version >= 800000
2909	ifp->if_transmit = igb_mq_start;
2910	ifp->if_qflush = igb_qflush;
2911#endif
2912	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2913	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2914	IFQ_SET_READY(&ifp->if_snd);
2915
2916	ether_ifattach(ifp, adapter->hw.mac.addr);
2917
2918	ifp->if_capabilities = ifp->if_capenable = 0;
2919
2920	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2921	ifp->if_capabilities |= IFCAP_TSO4;
2922	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2923	ifp->if_capenable = ifp->if_capabilities;
2924
2925	/* Don't enable LRO by default */
2926	ifp->if_capabilities |= IFCAP_LRO;
2927
2928#ifdef DEVICE_POLLING
2929	ifp->if_capabilities |= IFCAP_POLLING;
2930#endif
2931
2932	/*
2933	 * Tell the upper layer(s) we
2934	 * support full VLAN capability.
2935	 */
2936	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2937	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2938			     |  IFCAP_VLAN_HWTSO
2939			     |  IFCAP_VLAN_MTU;
2940	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
2941			  |  IFCAP_VLAN_HWTSO
2942			  |  IFCAP_VLAN_MTU;
2943
2944	/*
2945	** Don't turn this on by default, if vlans are
2946	** created on another pseudo device (eg. lagg)
2947	** then vlan events are not passed thru, breaking
2948	** operation, but with HW FILTER off it works. If
2949	** using vlans directly on the igb driver you can
2950	** enable this and get full hardware tag filtering.
2951	*/
2952	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2953
2954	/*
2955	 * Specify the media types supported by this adapter and register
2956	 * callbacks to update media and link information
2957	 */
2958	ifmedia_init(&adapter->media, IFM_IMASK,
2959	    igb_media_change, igb_media_status);
2960	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2961	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2962		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2963			    0, NULL);
2964		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2965	} else {
2966		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2967		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2968			    0, NULL);
2969		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2970			    0, NULL);
2971		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2972			    0, NULL);
2973		if (adapter->hw.phy.type != e1000_phy_ife) {
2974			ifmedia_add(&adapter->media,
2975				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2976			ifmedia_add(&adapter->media,
2977				IFM_ETHER | IFM_1000_T, 0, NULL);
2978		}
2979	}
2980	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2981	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2982	return (0);
2983}
2984
2985
2986/*
2987 * Manage DMA'able memory.
2988 */
2989static void
2990igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2991{
2992	if (error)
2993		return;
2994	*(bus_addr_t *) arg = segs[0].ds_addr;
2995}
2996
2997static int
2998igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2999        struct igb_dma_alloc *dma, int mapflags)
3000{
3001	int error;
3002
3003	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3004				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3005				BUS_SPACE_MAXADDR,	/* lowaddr */
3006				BUS_SPACE_MAXADDR,	/* highaddr */
3007				NULL, NULL,		/* filter, filterarg */
3008				size,			/* maxsize */
3009				1,			/* nsegments */
3010				size,			/* maxsegsize */
3011				0,			/* flags */
3012				NULL,			/* lockfunc */
3013				NULL,			/* lockarg */
3014				&dma->dma_tag);
3015	if (error) {
3016		device_printf(adapter->dev,
3017		    "%s: bus_dma_tag_create failed: %d\n",
3018		    __func__, error);
3019		goto fail_0;
3020	}
3021
3022	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3023	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3024	if (error) {
3025		device_printf(adapter->dev,
3026		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3027		    __func__, (uintmax_t)size, error);
3028		goto fail_2;
3029	}
3030
3031	dma->dma_paddr = 0;
3032	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3033	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3034	if (error || dma->dma_paddr == 0) {
3035		device_printf(adapter->dev,
3036		    "%s: bus_dmamap_load failed: %d\n",
3037		    __func__, error);
3038		goto fail_3;
3039	}
3040
3041	return (0);
3042
3043fail_3:
3044	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3045fail_2:
3046	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3047	bus_dma_tag_destroy(dma->dma_tag);
3048fail_0:
3049	dma->dma_map = NULL;
3050	dma->dma_tag = NULL;
3051
3052	return (error);
3053}
3054
3055static void
3056igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3057{
3058	if (dma->dma_tag == NULL)
3059		return;
3060	if (dma->dma_map != NULL) {
3061		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3062		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3063		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3064		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3065		dma->dma_map = NULL;
3066	}
3067	bus_dma_tag_destroy(dma->dma_tag);
3068	dma->dma_tag = NULL;
3069}
3070
3071
3072/*********************************************************************
3073 *
3074 *  Allocate memory for the transmit and receive rings, and then
3075 *  the descriptors associated with each, called only once at attach.
3076 *
3077 **********************************************************************/
3078static int
3079igb_allocate_queues(struct adapter *adapter)
3080{
3081	device_t dev = adapter->dev;
3082	struct igb_queue	*que = NULL;
3083	struct tx_ring		*txr = NULL;
3084	struct rx_ring		*rxr = NULL;
3085	int rsize, tsize, error = E1000_SUCCESS;
3086	int txconf = 0, rxconf = 0;
3087
3088	/* First allocate the top level queue structs */
3089	if (!(adapter->queues =
3090	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3091	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3092		device_printf(dev, "Unable to allocate queue memory\n");
3093		error = ENOMEM;
3094		goto fail;
3095	}
3096
3097	/* Next allocate the TX ring struct memory */
3098	if (!(adapter->tx_rings =
3099	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3100	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3101		device_printf(dev, "Unable to allocate TX ring memory\n");
3102		error = ENOMEM;
3103		goto tx_fail;
3104	}
3105
3106	/* Now allocate the RX */
3107	if (!(adapter->rx_rings =
3108	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3109	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3110		device_printf(dev, "Unable to allocate RX ring memory\n");
3111		error = ENOMEM;
3112		goto rx_fail;
3113	}
3114
3115	tsize = roundup2(adapter->num_tx_desc *
3116	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3117	/*
3118	 * Now set up the TX queues, txconf is needed to handle the
3119	 * possibility that things fail midcourse and we need to
3120	 * undo memory gracefully
3121	 */
3122	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3123		/* Set up some basics */
3124		txr = &adapter->tx_rings[i];
3125		txr->adapter = adapter;
3126		txr->me = i;
3127
3128		/* Initialize the TX lock */
3129		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3130		    device_get_nameunit(dev), txr->me);
3131		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3132
3133		if (igb_dma_malloc(adapter, tsize,
3134			&txr->txdma, BUS_DMA_NOWAIT)) {
3135			device_printf(dev,
3136			    "Unable to allocate TX Descriptor memory\n");
3137			error = ENOMEM;
3138			goto err_tx_desc;
3139		}
3140		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3141		bzero((void *)txr->tx_base, tsize);
3142
3143        	/* Now allocate transmit buffers for the ring */
3144        	if (igb_allocate_transmit_buffers(txr)) {
3145			device_printf(dev,
3146			    "Critical Failure setting up transmit buffers\n");
3147			error = ENOMEM;
3148			goto err_tx_desc;
3149        	}
3150#if __FreeBSD_version >= 800000
3151		/* Allocate a buf ring */
3152		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3153		    M_WAITOK, &txr->tx_mtx);
3154#endif
3155	}
3156
3157	/*
3158	 * Next the RX queues...
3159	 */
3160	rsize = roundup2(adapter->num_rx_desc *
3161	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3162	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3163		rxr = &adapter->rx_rings[i];
3164		rxr->adapter = adapter;
3165		rxr->me = i;
3166
3167		/* Initialize the RX lock */
3168		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3169		    device_get_nameunit(dev), txr->me);
3170		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3171
3172		if (igb_dma_malloc(adapter, rsize,
3173			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3174			device_printf(dev,
3175			    "Unable to allocate RxDescriptor memory\n");
3176			error = ENOMEM;
3177			goto err_rx_desc;
3178		}
3179		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3180		bzero((void *)rxr->rx_base, rsize);
3181
3182        	/* Allocate receive buffers for the ring*/
3183		if (igb_allocate_receive_buffers(rxr)) {
3184			device_printf(dev,
3185			    "Critical Failure setting up receive buffers\n");
3186			error = ENOMEM;
3187			goto err_rx_desc;
3188		}
3189	}
3190
3191	/*
3192	** Finally set up the queue holding structs
3193	*/
3194	for (int i = 0; i < adapter->num_queues; i++) {
3195		que = &adapter->queues[i];
3196		que->adapter = adapter;
3197		que->txr = &adapter->tx_rings[i];
3198		que->rxr = &adapter->rx_rings[i];
3199	}
3200
3201	return (0);
3202
3203err_rx_desc:
3204	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3205		igb_dma_free(adapter, &rxr->rxdma);
3206err_tx_desc:
3207	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3208		igb_dma_free(adapter, &txr->txdma);
3209	free(adapter->rx_rings, M_DEVBUF);
3210rx_fail:
3211#if __FreeBSD_version >= 800000
3212	buf_ring_free(txr->br, M_DEVBUF);
3213#endif
3214	free(adapter->tx_rings, M_DEVBUF);
3215tx_fail:
3216	free(adapter->queues, M_DEVBUF);
3217fail:
3218	return (error);
3219}
3220
3221/*********************************************************************
3222 *
3223 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3224 *  the information needed to transmit a packet on the wire. This is
3225 *  called only once at attach, setup is done every reset.
3226 *
3227 **********************************************************************/
3228static int
3229igb_allocate_transmit_buffers(struct tx_ring *txr)
3230{
3231	struct adapter *adapter = txr->adapter;
3232	device_t dev = adapter->dev;
3233	struct igb_tx_buffer *txbuf;
3234	int error, i;
3235
3236	/*
3237	 * Setup DMA descriptor areas.
3238	 */
3239	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3240			       1, 0,			/* alignment, bounds */
3241			       BUS_SPACE_MAXADDR,	/* lowaddr */
3242			       BUS_SPACE_MAXADDR,	/* highaddr */
3243			       NULL, NULL,		/* filter, filterarg */
3244			       IGB_TSO_SIZE,		/* maxsize */
3245			       IGB_MAX_SCATTER,		/* nsegments */
3246			       PAGE_SIZE,		/* maxsegsize */
3247			       0,			/* flags */
3248			       NULL,			/* lockfunc */
3249			       NULL,			/* lockfuncarg */
3250			       &txr->txtag))) {
3251		device_printf(dev,"Unable to allocate TX DMA tag\n");
3252		goto fail;
3253	}
3254
3255	if (!(txr->tx_buffers =
3256	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3257	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3258		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3259		error = ENOMEM;
3260		goto fail;
3261	}
3262
3263        /* Create the descriptor buffer dma maps */
3264	txbuf = txr->tx_buffers;
3265	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3266		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3267		if (error != 0) {
3268			device_printf(dev, "Unable to create TX DMA map\n");
3269			goto fail;
3270		}
3271	}
3272
3273	return 0;
3274fail:
3275	/* We free all, it handles case where we are in the middle */
3276	igb_free_transmit_structures(adapter);
3277	return (error);
3278}
3279
3280/*********************************************************************
3281 *
3282 *  Initialize a transmit ring.
3283 *
3284 **********************************************************************/
3285static void
3286igb_setup_transmit_ring(struct tx_ring *txr)
3287{
3288	struct adapter *adapter = txr->adapter;
3289	struct igb_tx_buffer *txbuf;
3290	int i;
3291#ifdef DEV_NETMAP
3292	struct netmap_adapter *na = NA(adapter->ifp);
3293	struct netmap_slot *slot;
3294#endif /* DEV_NETMAP */
3295
3296	/* Clear the old descriptor contents */
3297	IGB_TX_LOCK(txr);
3298#ifdef DEV_NETMAP
3299	slot = netmap_reset(na, NR_TX, txr->me, 0);
3300#endif /* DEV_NETMAP */
3301	bzero((void *)txr->tx_base,
3302	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3303	/* Reset indices */
3304	txr->next_avail_desc = 0;
3305	txr->next_to_clean = 0;
3306
3307	/* Free any existing tx buffers. */
3308        txbuf = txr->tx_buffers;
3309	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3310		if (txbuf->m_head != NULL) {
3311			bus_dmamap_sync(txr->txtag, txbuf->map,
3312			    BUS_DMASYNC_POSTWRITE);
3313			bus_dmamap_unload(txr->txtag, txbuf->map);
3314			m_freem(txbuf->m_head);
3315			txbuf->m_head = NULL;
3316		}
3317#ifdef DEV_NETMAP
3318		if (slot) {
3319			/* slot si is mapped to the i-th NIC-ring entry */
3320			int si = i + na->tx_rings[txr->me].nkr_hwofs;
3321
3322			if (si < 0)
3323				si += na->num_tx_desc;
3324			netmap_load_map(txr->txtag, txbuf->map,
3325				NMB(slot + si), na->buff_size);
3326		}
3327#endif /* DEV_NETMAP */
3328		/* clear the watch index */
3329		txbuf->next_eop = -1;
3330        }
3331
3332	/* Set number of descriptors available */
3333	txr->tx_avail = adapter->num_tx_desc;
3334
3335	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3336	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3337	IGB_TX_UNLOCK(txr);
3338}
3339
3340/*********************************************************************
3341 *
3342 *  Initialize all transmit rings.
3343 *
3344 **********************************************************************/
3345static void
3346igb_setup_transmit_structures(struct adapter *adapter)
3347{
3348	struct tx_ring *txr = adapter->tx_rings;
3349
3350	for (int i = 0; i < adapter->num_queues; i++, txr++)
3351		igb_setup_transmit_ring(txr);
3352
3353	return;
3354}
3355
3356/*********************************************************************
3357 *
3358 *  Enable transmit unit.
3359 *
3360 **********************************************************************/
3361static void
3362igb_initialize_transmit_units(struct adapter *adapter)
3363{
3364	struct tx_ring	*txr = adapter->tx_rings;
3365	struct e1000_hw *hw = &adapter->hw;
3366	u32		tctl, txdctl;
3367
3368	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3369	tctl = txdctl = 0;
3370
3371	/* Setup the Tx Descriptor Rings */
3372	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3373		u64 bus_addr = txr->txdma.dma_paddr;
3374
3375		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3376		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3377		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3378		    (uint32_t)(bus_addr >> 32));
3379		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3380		    (uint32_t)bus_addr);
3381
3382		/* Setup the HW Tx Head and Tail descriptor pointers */
3383		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3384		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3385
3386		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3387		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3388		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3389
3390		txr->queue_status = IGB_QUEUE_IDLE;
3391
3392		txdctl |= IGB_TX_PTHRESH;
3393		txdctl |= IGB_TX_HTHRESH << 8;
3394		txdctl |= IGB_TX_WTHRESH << 16;
3395		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3396		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3397	}
3398
3399	if (adapter->vf_ifp)
3400		return;
3401
3402	e1000_config_collision_dist(hw);
3403
3404	/* Program the Transmit Control Register */
3405	tctl = E1000_READ_REG(hw, E1000_TCTL);
3406	tctl &= ~E1000_TCTL_CT;
3407	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3408		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3409
3410	/* This write will effectively turn on the transmit unit. */
3411	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3412}
3413
3414/*********************************************************************
3415 *
3416 *  Free all transmit rings.
3417 *
3418 **********************************************************************/
3419static void
3420igb_free_transmit_structures(struct adapter *adapter)
3421{
3422	struct tx_ring *txr = adapter->tx_rings;
3423
3424	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3425		IGB_TX_LOCK(txr);
3426		igb_free_transmit_buffers(txr);
3427		igb_dma_free(adapter, &txr->txdma);
3428		IGB_TX_UNLOCK(txr);
3429		IGB_TX_LOCK_DESTROY(txr);
3430	}
3431	free(adapter->tx_rings, M_DEVBUF);
3432}
3433
3434/*********************************************************************
3435 *
3436 *  Free transmit ring related data structures.
3437 *
3438 **********************************************************************/
3439static void
3440igb_free_transmit_buffers(struct tx_ring *txr)
3441{
3442	struct adapter *adapter = txr->adapter;
3443	struct igb_tx_buffer *tx_buffer;
3444	int             i;
3445
3446	INIT_DEBUGOUT("free_transmit_ring: begin");
3447
3448	if (txr->tx_buffers == NULL)
3449		return;
3450
3451	tx_buffer = txr->tx_buffers;
3452	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3453		if (tx_buffer->m_head != NULL) {
3454			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3455			    BUS_DMASYNC_POSTWRITE);
3456			bus_dmamap_unload(txr->txtag,
3457			    tx_buffer->map);
3458			m_freem(tx_buffer->m_head);
3459			tx_buffer->m_head = NULL;
3460			if (tx_buffer->map != NULL) {
3461				bus_dmamap_destroy(txr->txtag,
3462				    tx_buffer->map);
3463				tx_buffer->map = NULL;
3464			}
3465		} else if (tx_buffer->map != NULL) {
3466			bus_dmamap_unload(txr->txtag,
3467			    tx_buffer->map);
3468			bus_dmamap_destroy(txr->txtag,
3469			    tx_buffer->map);
3470			tx_buffer->map = NULL;
3471		}
3472	}
3473#if __FreeBSD_version >= 800000
3474	if (txr->br != NULL)
3475		buf_ring_free(txr->br, M_DEVBUF);
3476#endif
3477	if (txr->tx_buffers != NULL) {
3478		free(txr->tx_buffers, M_DEVBUF);
3479		txr->tx_buffers = NULL;
3480	}
3481	if (txr->txtag != NULL) {
3482		bus_dma_tag_destroy(txr->txtag);
3483		txr->txtag = NULL;
3484	}
3485	return;
3486}
3487
3488/**********************************************************************
3489 *
3490 *  Setup work for hardware segmentation offload (TSO)
3491 *
3492 **********************************************************************/
3493static bool
3494igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3495	struct ip *ip, struct tcphdr *th)
3496{
3497	struct adapter *adapter = txr->adapter;
3498	struct e1000_adv_tx_context_desc *TXD;
3499	struct igb_tx_buffer        *tx_buffer;
3500	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3501	u32 mss_l4len_idx = 0;
3502	u16 vtag = 0;
3503	int ctxd, ip_hlen, tcp_hlen;
3504
3505	ctxd = txr->next_avail_desc;
3506	tx_buffer = &txr->tx_buffers[ctxd];
3507	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3508
3509	ip->ip_sum = 0;
3510	ip_hlen = ip->ip_hl << 2;
3511	tcp_hlen = th->th_off << 2;
3512
3513	/* VLAN MACLEN IPLEN */
3514	if (mp->m_flags & M_VLANTAG) {
3515		vtag = htole16(mp->m_pkthdr.ether_vtag);
3516		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3517	}
3518
3519	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3520	vlan_macip_lens |= ip_hlen;
3521	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3522
3523	/* ADV DTYPE TUCMD */
3524	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3525	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3526	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3527	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3528
3529	/* MSS L4LEN IDX */
3530	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3531	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3532	/* 82575 needs the queue index added */
3533	if (adapter->hw.mac.type == e1000_82575)
3534		mss_l4len_idx |= txr->me << 4;
3535	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3536
3537	TXD->seqnum_seed = htole32(0);
3538	tx_buffer->m_head = NULL;
3539	tx_buffer->next_eop = -1;
3540
3541	if (++ctxd == adapter->num_tx_desc)
3542		ctxd = 0;
3543
3544	txr->tx_avail--;
3545	txr->next_avail_desc = ctxd;
3546	return TRUE;
3547}
3548
3549
3550/*********************************************************************
3551 *
3552 *  Context Descriptor setup for VLAN or CSUM
3553 *
3554 **********************************************************************/
3555
3556static bool
3557igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3558{
3559	struct adapter *adapter = txr->adapter;
3560	struct e1000_adv_tx_context_desc *TXD;
3561	struct igb_tx_buffer        *tx_buffer;
3562	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3563	struct ether_vlan_header *eh;
3564	struct ip *ip = NULL;
3565	struct ip6_hdr *ip6;
3566	int  ehdrlen, ctxd, ip_hlen = 0;
3567	u16	etype, vtag = 0;
3568	u8	ipproto = 0;
3569	bool	offload = TRUE;
3570
3571	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3572		offload = FALSE;
3573
3574	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3575	ctxd = txr->next_avail_desc;
3576	tx_buffer = &txr->tx_buffers[ctxd];
3577	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3578
3579	/*
3580	** In advanced descriptors the vlan tag must
3581	** be placed into the context descriptor, thus
3582	** we need to be here just for that setup.
3583	*/
3584	if (mp->m_flags & M_VLANTAG) {
3585		vtag = htole16(mp->m_pkthdr.ether_vtag);
3586		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3587	} else if (offload == FALSE)
3588		return FALSE;
3589
3590	/*
3591	 * Determine where frame payload starts.
3592	 * Jump over vlan headers if already present,
3593	 * helpful for QinQ too.
3594	 */
3595	eh = mtod(mp, struct ether_vlan_header *);
3596	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3597		etype = ntohs(eh->evl_proto);
3598		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3599	} else {
3600		etype = ntohs(eh->evl_encap_proto);
3601		ehdrlen = ETHER_HDR_LEN;
3602	}
3603
3604	/* Set the ether header length */
3605	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3606
3607	switch (etype) {
3608		case ETHERTYPE_IP:
3609			ip = (struct ip *)(mp->m_data + ehdrlen);
3610			ip_hlen = ip->ip_hl << 2;
3611			if (mp->m_len < ehdrlen + ip_hlen) {
3612				offload = FALSE;
3613				break;
3614			}
3615			ipproto = ip->ip_p;
3616			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3617			break;
3618		case ETHERTYPE_IPV6:
3619			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3620			ip_hlen = sizeof(struct ip6_hdr);
3621			ipproto = ip6->ip6_nxt;
3622			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3623			break;
3624		default:
3625			offload = FALSE;
3626			break;
3627	}
3628
3629	vlan_macip_lens |= ip_hlen;
3630	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3631
3632	switch (ipproto) {
3633		case IPPROTO_TCP:
3634			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3635				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3636			break;
3637		case IPPROTO_UDP:
3638			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3639				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3640			break;
3641#if __FreeBSD_version >= 800000
3642		case IPPROTO_SCTP:
3643			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3644				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3645			break;
3646#endif
3647		default:
3648			offload = FALSE;
3649			break;
3650	}
3651
3652	/* 82575 needs the queue index added */
3653	if (adapter->hw.mac.type == e1000_82575)
3654		mss_l4len_idx = txr->me << 4;
3655
3656	/* Now copy bits into descriptor */
3657	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3658	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3659	TXD->seqnum_seed = htole32(0);
3660	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3661
3662	tx_buffer->m_head = NULL;
3663	tx_buffer->next_eop = -1;
3664
3665	/* We've consumed the first desc, adjust counters */
3666	if (++ctxd == adapter->num_tx_desc)
3667		ctxd = 0;
3668	txr->next_avail_desc = ctxd;
3669	--txr->tx_avail;
3670
3671        return (offload);
3672}
3673
3674
3675/**********************************************************************
3676 *
3677 *  Examine each tx_buffer in the used queue. If the hardware is done
3678 *  processing the packet then free associated resources. The
3679 *  tx_buffer is put back on the free queue.
3680 *
3681 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3682 **********************************************************************/
3683static bool
3684igb_txeof(struct tx_ring *txr)
3685{
3686	struct adapter	*adapter = txr->adapter;
3687        int first, last, done, processed;
3688        struct igb_tx_buffer *tx_buffer;
3689        struct e1000_tx_desc   *tx_desc, *eop_desc;
3690	struct ifnet   *ifp = adapter->ifp;
3691
3692	IGB_TX_LOCK_ASSERT(txr);
3693
3694#ifdef DEV_NETMAP
3695	if (ifp->if_capenable & IFCAP_NETMAP) {
3696		struct netmap_adapter *na = NA(ifp);
3697
3698		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3699		IGB_TX_UNLOCK(txr);
3700		IGB_CORE_LOCK(adapter);
3701		selwakeuppri(&na->tx_rings[na->num_queues + 1].si, PI_NET);
3702		IGB_CORE_UNLOCK(adapter);
3703		IGB_TX_LOCK(txr);
3704		return FALSE;
3705	}
3706#endif /* DEV_NETMAP */
3707        if (txr->tx_avail == adapter->num_tx_desc) {
3708		txr->queue_status = IGB_QUEUE_IDLE;
3709                return FALSE;
3710	}
3711
3712	processed = 0;
3713        first = txr->next_to_clean;
3714        tx_desc = &txr->tx_base[first];
3715        tx_buffer = &txr->tx_buffers[first];
3716	last = tx_buffer->next_eop;
3717        eop_desc = &txr->tx_base[last];
3718
3719	/*
3720	 * What this does is get the index of the
3721	 * first descriptor AFTER the EOP of the
3722	 * first packet, that way we can do the
3723	 * simple comparison on the inner while loop.
3724	 */
3725	if (++last == adapter->num_tx_desc)
3726 		last = 0;
3727	done = last;
3728
3729        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3730            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3731
3732        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3733		/* We clean the range of the packet */
3734		while (first != done) {
3735                	tx_desc->upper.data = 0;
3736                	tx_desc->lower.data = 0;
3737                	tx_desc->buffer_addr = 0;
3738                	++txr->tx_avail;
3739			++processed;
3740
3741			if (tx_buffer->m_head) {
3742				txr->bytes +=
3743				    tx_buffer->m_head->m_pkthdr.len;
3744				bus_dmamap_sync(txr->txtag,
3745				    tx_buffer->map,
3746				    BUS_DMASYNC_POSTWRITE);
3747				bus_dmamap_unload(txr->txtag,
3748				    tx_buffer->map);
3749
3750                        	m_freem(tx_buffer->m_head);
3751                        	tx_buffer->m_head = NULL;
3752                	}
3753			tx_buffer->next_eop = -1;
3754			txr->watchdog_time = ticks;
3755
3756	                if (++first == adapter->num_tx_desc)
3757				first = 0;
3758
3759	                tx_buffer = &txr->tx_buffers[first];
3760			tx_desc = &txr->tx_base[first];
3761		}
3762		++txr->packets;
3763		++ifp->if_opackets;
3764		/* See if we can continue to the next packet */
3765		last = tx_buffer->next_eop;
3766		if (last != -1) {
3767        		eop_desc = &txr->tx_base[last];
3768			/* Get new done point */
3769			if (++last == adapter->num_tx_desc) last = 0;
3770			done = last;
3771		} else
3772			break;
3773        }
3774        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3775            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3776
3777        txr->next_to_clean = first;
3778
3779	/*
3780	** Watchdog calculation, we know there's
3781	** work outstanding or the first return
3782	** would have been taken, so none processed
3783	** for too long indicates a hang.
3784	*/
3785	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3786		txr->queue_status |= IGB_QUEUE_HUNG;
3787        /*
3788         * If we have a minimum free,
3789         * clear depleted state bit
3790         */
3791        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3792                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3793
3794	/* All clean, turn off the watchdog */
3795	if (txr->tx_avail == adapter->num_tx_desc) {
3796		txr->queue_status = IGB_QUEUE_IDLE;
3797		return (FALSE);
3798        }
3799
3800	return (TRUE);
3801}
3802
3803/*********************************************************************
3804 *
3805 *  Refresh mbuf buffers for RX descriptor rings
3806 *   - now keeps its own state so discards due to resource
3807 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3808 *     it just returns, keeping its placeholder, thus it can simply
3809 *     be recalled to try again.
3810 *
3811 **********************************************************************/
3812static void
3813igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3814{
3815	struct adapter		*adapter = rxr->adapter;
3816	bus_dma_segment_t	hseg[1];
3817	bus_dma_segment_t	pseg[1];
3818	struct igb_rx_buf	*rxbuf;
3819	struct mbuf		*mh, *mp;
3820	int			i, j, nsegs, error;
3821	bool			refreshed = FALSE;
3822
3823	i = j = rxr->next_to_refresh;
3824	/*
3825	** Get one descriptor beyond
3826	** our work mark to control
3827	** the loop.
3828        */
3829	if (++j == adapter->num_rx_desc)
3830		j = 0;
3831
3832	while (j != limit) {
3833		rxbuf = &rxr->rx_buffers[i];
3834		/* No hdr mbuf used with header split off */
3835		if (rxr->hdr_split == FALSE)
3836			goto no_split;
3837		if (rxbuf->m_head == NULL) {
3838			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3839			if (mh == NULL)
3840				goto update;
3841		} else
3842			mh = rxbuf->m_head;
3843
3844		mh->m_pkthdr.len = mh->m_len = MHLEN;
3845		mh->m_len = MHLEN;
3846		mh->m_flags |= M_PKTHDR;
3847		/* Get the memory mapping */
3848		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3849		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3850		if (error != 0) {
3851			printf("Refresh mbufs: hdr dmamap load"
3852			    " failure - %d\n", error);
3853			m_free(mh);
3854			rxbuf->m_head = NULL;
3855			goto update;
3856		}
3857		rxbuf->m_head = mh;
3858		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3859		    BUS_DMASYNC_PREREAD);
3860		rxr->rx_base[i].read.hdr_addr =
3861		    htole64(hseg[0].ds_addr);
3862no_split:
3863		if (rxbuf->m_pack == NULL) {
3864			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3865			    M_PKTHDR, adapter->rx_mbuf_sz);
3866			if (mp == NULL)
3867				goto update;
3868		} else
3869			mp = rxbuf->m_pack;
3870
3871		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3872		/* Get the memory mapping */
3873		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3874		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3875		if (error != 0) {
3876			printf("Refresh mbufs: payload dmamap load"
3877			    " failure - %d\n", error);
3878			m_free(mp);
3879			rxbuf->m_pack = NULL;
3880			goto update;
3881		}
3882		rxbuf->m_pack = mp;
3883		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3884		    BUS_DMASYNC_PREREAD);
3885		rxr->rx_base[i].read.pkt_addr =
3886		    htole64(pseg[0].ds_addr);
3887		refreshed = TRUE; /* I feel wefreshed :) */
3888
3889		i = j; /* our next is precalculated */
3890		rxr->next_to_refresh = i;
3891		if (++j == adapter->num_rx_desc)
3892			j = 0;
3893	}
3894update:
3895	if (refreshed) /* update tail */
3896		E1000_WRITE_REG(&adapter->hw,
3897		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3898	return;
3899}
3900
3901
3902/*********************************************************************
3903 *
3904 *  Allocate memory for rx_buffer structures. Since we use one
3905 *  rx_buffer per received packet, the maximum number of rx_buffer's
3906 *  that we'll need is equal to the number of receive descriptors
3907 *  that we've allocated.
3908 *
3909 **********************************************************************/
3910static int
3911igb_allocate_receive_buffers(struct rx_ring *rxr)
3912{
3913	struct	adapter 	*adapter = rxr->adapter;
3914	device_t 		dev = adapter->dev;
3915	struct igb_rx_buf	*rxbuf;
3916	int             	i, bsize, error;
3917
3918	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3919	if (!(rxr->rx_buffers =
3920	    (struct igb_rx_buf *) malloc(bsize,
3921	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3922		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3923		error = ENOMEM;
3924		goto fail;
3925	}
3926
3927	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3928				   1, 0,		/* alignment, bounds */
3929				   BUS_SPACE_MAXADDR,	/* lowaddr */
3930				   BUS_SPACE_MAXADDR,	/* highaddr */
3931				   NULL, NULL,		/* filter, filterarg */
3932				   MSIZE,		/* maxsize */
3933				   1,			/* nsegments */
3934				   MSIZE,		/* maxsegsize */
3935				   0,			/* flags */
3936				   NULL,		/* lockfunc */
3937				   NULL,		/* lockfuncarg */
3938				   &rxr->htag))) {
3939		device_printf(dev, "Unable to create RX DMA tag\n");
3940		goto fail;
3941	}
3942
3943	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3944				   1, 0,		/* alignment, bounds */
3945				   BUS_SPACE_MAXADDR,	/* lowaddr */
3946				   BUS_SPACE_MAXADDR,	/* highaddr */
3947				   NULL, NULL,		/* filter, filterarg */
3948				   MJUM9BYTES,		/* maxsize */
3949				   1,			/* nsegments */
3950				   MJUM9BYTES,		/* maxsegsize */
3951				   0,			/* flags */
3952				   NULL,		/* lockfunc */
3953				   NULL,		/* lockfuncarg */
3954				   &rxr->ptag))) {
3955		device_printf(dev, "Unable to create RX payload DMA tag\n");
3956		goto fail;
3957	}
3958
3959	for (i = 0; i < adapter->num_rx_desc; i++) {
3960		rxbuf = &rxr->rx_buffers[i];
3961		error = bus_dmamap_create(rxr->htag,
3962		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3963		if (error) {
3964			device_printf(dev,
3965			    "Unable to create RX head DMA maps\n");
3966			goto fail;
3967		}
3968		error = bus_dmamap_create(rxr->ptag,
3969		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3970		if (error) {
3971			device_printf(dev,
3972			    "Unable to create RX packet DMA maps\n");
3973			goto fail;
3974		}
3975	}
3976
3977	return (0);
3978
3979fail:
3980	/* Frees all, but can handle partial completion */
3981	igb_free_receive_structures(adapter);
3982	return (error);
3983}
3984
3985
3986static void
3987igb_free_receive_ring(struct rx_ring *rxr)
3988{
3989	struct	adapter		*adapter = rxr->adapter;
3990	struct igb_rx_buf	*rxbuf;
3991
3992
3993	for (int i = 0; i < adapter->num_rx_desc; i++) {
3994		rxbuf = &rxr->rx_buffers[i];
3995		if (rxbuf->m_head != NULL) {
3996			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3997			    BUS_DMASYNC_POSTREAD);
3998			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3999			rxbuf->m_head->m_flags |= M_PKTHDR;
4000			m_freem(rxbuf->m_head);
4001		}
4002		if (rxbuf->m_pack != NULL) {
4003			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4004			    BUS_DMASYNC_POSTREAD);
4005			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4006			rxbuf->m_pack->m_flags |= M_PKTHDR;
4007			m_freem(rxbuf->m_pack);
4008		}
4009		rxbuf->m_head = NULL;
4010		rxbuf->m_pack = NULL;
4011	}
4012}
4013
4014
4015/*********************************************************************
4016 *
4017 *  Initialize a receive ring and its buffers.
4018 *
4019 **********************************************************************/
4020static int
4021igb_setup_receive_ring(struct rx_ring *rxr)
4022{
4023	struct	adapter		*adapter;
4024	struct  ifnet		*ifp;
4025	device_t		dev;
4026	struct igb_rx_buf	*rxbuf;
4027	bus_dma_segment_t	pseg[1], hseg[1];
4028	struct lro_ctrl		*lro = &rxr->lro;
4029	int			rsize, nsegs, error = 0;
4030#ifdef DEV_NETMAP
4031	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4032	struct netmap_slot *slot;
4033#endif /* DEV_NETMAP */
4034
4035	adapter = rxr->adapter;
4036	dev = adapter->dev;
4037	ifp = adapter->ifp;
4038
4039	/* Clear the ring contents */
4040	IGB_RX_LOCK(rxr);
4041#ifdef DEV_NETMAP
4042	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4043#endif /* DEV_NETMAP */
4044	rsize = roundup2(adapter->num_rx_desc *
4045	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4046	bzero((void *)rxr->rx_base, rsize);
4047
4048	/*
4049	** Free current RX buffer structures and their mbufs
4050	*/
4051	igb_free_receive_ring(rxr);
4052
4053	/* Configure for header split? */
4054	if (igb_header_split)
4055		rxr->hdr_split = TRUE;
4056
4057        /* Now replenish the ring mbufs */
4058	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4059		struct mbuf	*mh, *mp;
4060
4061		rxbuf = &rxr->rx_buffers[j];
4062#ifdef DEV_NETMAP
4063		if (slot) {
4064			/* slot sj is mapped to the i-th NIC-ring entry */
4065			int sj = j + na->rx_rings[rxr->me].nkr_hwofs;
4066			void *addr;
4067
4068			if (sj < 0)
4069				sj += na->num_rx_desc;
4070			addr = NMB(slot + sj);
4071			netmap_load_map(rxr->ptag,
4072			    rxbuf->pmap, addr, na->buff_size);
4073			/* Update descriptor */
4074			rxr->rx_base[j].read.pkt_addr = htole64(vtophys(addr));
4075			continue;
4076		}
4077#endif /* DEV_NETMAP */
4078		if (rxr->hdr_split == FALSE)
4079			goto skip_head;
4080
4081		/* First the header */
4082		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4083		if (rxbuf->m_head == NULL) {
4084			error = ENOBUFS;
4085                        goto fail;
4086		}
4087		m_adj(rxbuf->m_head, ETHER_ALIGN);
4088		mh = rxbuf->m_head;
4089		mh->m_len = mh->m_pkthdr.len = MHLEN;
4090		mh->m_flags |= M_PKTHDR;
4091		/* Get the memory mapping */
4092		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4093		    rxbuf->hmap, rxbuf->m_head, hseg,
4094		    &nsegs, BUS_DMA_NOWAIT);
4095		if (error != 0) /* Nothing elegant to do here */
4096                        goto fail;
4097		bus_dmamap_sync(rxr->htag,
4098		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4099		/* Update descriptor */
4100		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4101
4102skip_head:
4103		/* Now the payload cluster */
4104		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4105		    M_PKTHDR, adapter->rx_mbuf_sz);
4106		if (rxbuf->m_pack == NULL) {
4107			error = ENOBUFS;
4108                        goto fail;
4109		}
4110		mp = rxbuf->m_pack;
4111		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4112		/* Get the memory mapping */
4113		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4114		    rxbuf->pmap, mp, pseg,
4115		    &nsegs, BUS_DMA_NOWAIT);
4116		if (error != 0)
4117                        goto fail;
4118		bus_dmamap_sync(rxr->ptag,
4119		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4120		/* Update descriptor */
4121		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4122        }
4123
4124	/* Setup our descriptor indices */
4125	rxr->next_to_check = 0;
4126	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4127	rxr->lro_enabled = FALSE;
4128	rxr->rx_split_packets = 0;
4129	rxr->rx_bytes = 0;
4130
4131	rxr->fmp = NULL;
4132	rxr->lmp = NULL;
4133	rxr->discard = FALSE;
4134
4135	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4136	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4137
4138	/*
4139	** Now set up the LRO interface, we
4140	** also only do head split when LRO
4141	** is enabled, since so often they
4142	** are undesireable in similar setups.
4143	*/
4144	if (ifp->if_capenable & IFCAP_LRO) {
4145		error = tcp_lro_init(lro);
4146		if (error) {
4147			device_printf(dev, "LRO Initialization failed!\n");
4148			goto fail;
4149		}
4150		INIT_DEBUGOUT("RX LRO Initialized\n");
4151		rxr->lro_enabled = TRUE;
4152		lro->ifp = adapter->ifp;
4153	}
4154
4155	IGB_RX_UNLOCK(rxr);
4156	return (0);
4157
4158fail:
4159	igb_free_receive_ring(rxr);
4160	IGB_RX_UNLOCK(rxr);
4161	return (error);
4162}
4163
4164
4165/*********************************************************************
4166 *
4167 *  Initialize all receive rings.
4168 *
4169 **********************************************************************/
4170static int
4171igb_setup_receive_structures(struct adapter *adapter)
4172{
4173	struct rx_ring *rxr = adapter->rx_rings;
4174	int i;
4175
4176	for (i = 0; i < adapter->num_queues; i++, rxr++)
4177		if (igb_setup_receive_ring(rxr))
4178			goto fail;
4179
4180	return (0);
4181fail:
4182	/*
4183	 * Free RX buffers allocated so far, we will only handle
4184	 * the rings that completed, the failing case will have
4185	 * cleaned up for itself. 'i' is the endpoint.
4186	 */
4187	for (int j = 0; j > i; ++j) {
4188		rxr = &adapter->rx_rings[i];
4189		IGB_RX_LOCK(rxr);
4190		igb_free_receive_ring(rxr);
4191		IGB_RX_UNLOCK(rxr);
4192	}
4193
4194	return (ENOBUFS);
4195}
4196
4197/*********************************************************************
4198 *
4199 *  Enable receive unit.
4200 *
4201 **********************************************************************/
4202static void
4203igb_initialize_receive_units(struct adapter *adapter)
4204{
4205	struct rx_ring	*rxr = adapter->rx_rings;
4206	struct ifnet	*ifp = adapter->ifp;
4207	struct e1000_hw *hw = &adapter->hw;
4208	u32		rctl, rxcsum, psize, srrctl = 0;
4209
4210	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4211
4212	/*
4213	 * Make sure receives are disabled while setting
4214	 * up the descriptor ring
4215	 */
4216	rctl = E1000_READ_REG(hw, E1000_RCTL);
4217	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4218
4219	/*
4220	** Set up for header split
4221	*/
4222	if (igb_header_split) {
4223		/* Use a standard mbuf for the header */
4224		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4225		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4226	} else
4227		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4228
4229	/*
4230	** Set up for jumbo frames
4231	*/
4232	if (ifp->if_mtu > ETHERMTU) {
4233		rctl |= E1000_RCTL_LPE;
4234		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4235			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4236			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4237		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4238			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4239			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4240		}
4241		/* Set maximum packet len */
4242		psize = adapter->max_frame_size;
4243		/* are we on a vlan? */
4244		if (adapter->ifp->if_vlantrunk != NULL)
4245			psize += VLAN_TAG_SIZE;
4246		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4247	} else {
4248		rctl &= ~E1000_RCTL_LPE;
4249		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4250		rctl |= E1000_RCTL_SZ_2048;
4251	}
4252
4253	/* Setup the Base and Length of the Rx Descriptor Rings */
4254	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4255		u64 bus_addr = rxr->rxdma.dma_paddr;
4256		u32 rxdctl;
4257
4258		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4259		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4260		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4261		    (uint32_t)(bus_addr >> 32));
4262		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4263		    (uint32_t)bus_addr);
4264		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4265		/* Enable this Queue */
4266		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4267		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4268		rxdctl &= 0xFFF00000;
4269		rxdctl |= IGB_RX_PTHRESH;
4270		rxdctl |= IGB_RX_HTHRESH << 8;
4271		rxdctl |= IGB_RX_WTHRESH << 16;
4272		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4273	}
4274
4275	/*
4276	** Setup for RX MultiQueue
4277	*/
4278	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4279	if (adapter->num_queues >1) {
4280		u32 random[10], mrqc, shift = 0;
4281		union igb_reta {
4282			u32 dword;
4283			u8  bytes[4];
4284		} reta;
4285
4286		arc4rand(&random, sizeof(random), 0);
4287		if (adapter->hw.mac.type == e1000_82575)
4288			shift = 6;
4289		/* Warning FM follows */
4290		for (int i = 0; i < 128; i++) {
4291			reta.bytes[i & 3] =
4292			    (i % adapter->num_queues) << shift;
4293			if ((i & 3) == 3)
4294				E1000_WRITE_REG(hw,
4295				    E1000_RETA(i >> 2), reta.dword);
4296		}
4297		/* Now fill in hash table */
4298		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4299		for (int i = 0; i < 10; i++)
4300			E1000_WRITE_REG_ARRAY(hw,
4301			    E1000_RSSRK(0), i, random[i]);
4302
4303		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4304		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4305		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4306		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4307		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4308		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4309		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4310		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4311
4312		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4313
4314		/*
4315		** NOTE: Receive Full-Packet Checksum Offload
4316		** is mutually exclusive with Multiqueue. However
4317		** this is not the same as TCP/IP checksums which
4318		** still work.
4319		*/
4320		rxcsum |= E1000_RXCSUM_PCSD;
4321#if __FreeBSD_version >= 800000
4322		/* For SCTP Offload */
4323		if ((hw->mac.type == e1000_82576)
4324		    && (ifp->if_capenable & IFCAP_RXCSUM))
4325			rxcsum |= E1000_RXCSUM_CRCOFL;
4326#endif
4327	} else {
4328		/* Non RSS setup */
4329		if (ifp->if_capenable & IFCAP_RXCSUM) {
4330			rxcsum |= E1000_RXCSUM_IPPCSE;
4331#if __FreeBSD_version >= 800000
4332			if (adapter->hw.mac.type == e1000_82576)
4333				rxcsum |= E1000_RXCSUM_CRCOFL;
4334#endif
4335		} else
4336			rxcsum &= ~E1000_RXCSUM_TUOFL;
4337	}
4338	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4339
4340	/* Setup the Receive Control Register */
4341	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4342	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4343		   E1000_RCTL_RDMTS_HALF |
4344		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4345	/* Strip CRC bytes. */
4346	rctl |= E1000_RCTL_SECRC;
4347	/* Make sure VLAN Filters are off */
4348	rctl &= ~E1000_RCTL_VFE;
4349	/* Don't store bad packets */
4350	rctl &= ~E1000_RCTL_SBP;
4351
4352	/* Enable Receives */
4353	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4354
4355	/*
4356	 * Setup the HW Rx Head and Tail Descriptor Pointers
4357	 *   - needs to be after enable
4358	 */
4359	for (int i = 0; i < adapter->num_queues; i++) {
4360		rxr = &adapter->rx_rings[i];
4361		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4362#ifdef DEV_NETMAP
4363		/*
4364		 * an init() while a netmap client is active must
4365		 * preserve the rx buffers passed to userspace.
4366		 * In this driver it means we adjust RDT to
4367		 * somthing different from next_to_refresh
4368		 * (which is not used in netmap mode).
4369		 */
4370		if (ifp->if_capenable & IFCAP_NETMAP) {
4371			struct netmap_adapter *na = NA(adapter->ifp);
4372			struct netmap_kring *kring = &na->rx_rings[i];
4373			int t = rxr->next_to_refresh - kring->nr_hwavail;
4374
4375			if (t >= adapter->num_rx_desc)
4376				t -= adapter->num_rx_desc;
4377			else if (t < 0)
4378				t += adapter->num_rx_desc;
4379			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4380		} else
4381#endif /* DEV_NETMAP */
4382		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4383	}
4384	return;
4385}
4386
4387/*********************************************************************
4388 *
4389 *  Free receive rings.
4390 *
4391 **********************************************************************/
4392static void
4393igb_free_receive_structures(struct adapter *adapter)
4394{
4395	struct rx_ring *rxr = adapter->rx_rings;
4396
4397	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4398		struct lro_ctrl	*lro = &rxr->lro;
4399		igb_free_receive_buffers(rxr);
4400		tcp_lro_free(lro);
4401		igb_dma_free(adapter, &rxr->rxdma);
4402	}
4403
4404	free(adapter->rx_rings, M_DEVBUF);
4405}
4406
4407/*********************************************************************
4408 *
4409 *  Free receive ring data structures.
4410 *
4411 **********************************************************************/
4412static void
4413igb_free_receive_buffers(struct rx_ring *rxr)
4414{
4415	struct adapter		*adapter = rxr->adapter;
4416	struct igb_rx_buf	*rxbuf;
4417	int i;
4418
4419	INIT_DEBUGOUT("free_receive_structures: begin");
4420
4421	/* Cleanup any existing buffers */
4422	if (rxr->rx_buffers != NULL) {
4423		for (i = 0; i < adapter->num_rx_desc; i++) {
4424			rxbuf = &rxr->rx_buffers[i];
4425			if (rxbuf->m_head != NULL) {
4426				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4427				    BUS_DMASYNC_POSTREAD);
4428				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4429				rxbuf->m_head->m_flags |= M_PKTHDR;
4430				m_freem(rxbuf->m_head);
4431			}
4432			if (rxbuf->m_pack != NULL) {
4433				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4434				    BUS_DMASYNC_POSTREAD);
4435				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4436				rxbuf->m_pack->m_flags |= M_PKTHDR;
4437				m_freem(rxbuf->m_pack);
4438			}
4439			rxbuf->m_head = NULL;
4440			rxbuf->m_pack = NULL;
4441			if (rxbuf->hmap != NULL) {
4442				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4443				rxbuf->hmap = NULL;
4444			}
4445			if (rxbuf->pmap != NULL) {
4446				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4447				rxbuf->pmap = NULL;
4448			}
4449		}
4450		if (rxr->rx_buffers != NULL) {
4451			free(rxr->rx_buffers, M_DEVBUF);
4452			rxr->rx_buffers = NULL;
4453		}
4454	}
4455
4456	if (rxr->htag != NULL) {
4457		bus_dma_tag_destroy(rxr->htag);
4458		rxr->htag = NULL;
4459	}
4460	if (rxr->ptag != NULL) {
4461		bus_dma_tag_destroy(rxr->ptag);
4462		rxr->ptag = NULL;
4463	}
4464}
4465
4466static __inline void
4467igb_rx_discard(struct rx_ring *rxr, int i)
4468{
4469	struct igb_rx_buf	*rbuf;
4470
4471	rbuf = &rxr->rx_buffers[i];
4472
4473	/* Partially received? Free the chain */
4474	if (rxr->fmp != NULL) {
4475		rxr->fmp->m_flags |= M_PKTHDR;
4476		m_freem(rxr->fmp);
4477		rxr->fmp = NULL;
4478		rxr->lmp = NULL;
4479	}
4480
4481	/*
4482	** With advanced descriptors the writeback
4483	** clobbers the buffer addrs, so its easier
4484	** to just free the existing mbufs and take
4485	** the normal refresh path to get new buffers
4486	** and mapping.
4487	*/
4488	if (rbuf->m_head) {
4489		m_free(rbuf->m_head);
4490		rbuf->m_head = NULL;
4491	}
4492
4493	if (rbuf->m_pack) {
4494		m_free(rbuf->m_pack);
4495		rbuf->m_pack = NULL;
4496	}
4497
4498	return;
4499}
4500
4501static __inline void
4502igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4503{
4504
4505	/*
4506	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4507	 * should be computed by hardware. Also it should not have VLAN tag in
4508	 * ethernet header.
4509	 */
4510	if (rxr->lro_enabled &&
4511	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4512	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4513	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4514	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4515	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4516	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4517		/*
4518		 * Send to the stack if:
4519		 **  - LRO not enabled, or
4520		 **  - no LRO resources, or
4521		 **  - lro enqueue fails
4522		 */
4523		if (rxr->lro.lro_cnt != 0)
4524			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4525				return;
4526	}
4527	IGB_RX_UNLOCK(rxr);
4528	(*ifp->if_input)(ifp, m);
4529	IGB_RX_LOCK(rxr);
4530}
4531
4532/*********************************************************************
4533 *
4534 *  This routine executes in interrupt context. It replenishes
4535 *  the mbufs in the descriptor and sends data which has been
4536 *  dma'ed into host memory to upper layer.
4537 *
4538 *  We loop at most count times if count is > 0, or until done if
4539 *  count < 0.
4540 *
4541 *  Return TRUE if more to clean, FALSE otherwise
4542 *********************************************************************/
4543static bool
4544igb_rxeof(struct igb_queue *que, int count, int *done)
4545{
4546	struct adapter		*adapter = que->adapter;
4547	struct rx_ring		*rxr = que->rxr;
4548	struct ifnet		*ifp = adapter->ifp;
4549	struct lro_ctrl		*lro = &rxr->lro;
4550	struct lro_entry	*queued;
4551	int			i, processed = 0, rxdone = 0;
4552	u32			ptype, staterr = 0;
4553	union e1000_adv_rx_desc	*cur;
4554
4555	IGB_RX_LOCK(rxr);
4556	/* Sync the ring. */
4557	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4558	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4559
4560#ifdef DEV_NETMAP
4561	if (ifp->if_capenable & IFCAP_NETMAP) {
4562		struct netmap_adapter *na = NA(ifp);
4563
4564		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4565		IGB_RX_UNLOCK(rxr);
4566		IGB_CORE_LOCK(adapter);
4567		selwakeuppri(&na->rx_rings[na->num_queues + 1].si, PI_NET);
4568		IGB_CORE_UNLOCK(adapter);
4569		return (0);
4570	}
4571#endif /* DEV_NETMAP */
4572
4573	/* Main clean loop */
4574	for (i = rxr->next_to_check; count != 0;) {
4575		struct mbuf		*sendmp, *mh, *mp;
4576		struct igb_rx_buf	*rxbuf;
4577		u16			hlen, plen, hdr, vtag;
4578		bool			eop = FALSE;
4579
4580		cur = &rxr->rx_base[i];
4581		staterr = le32toh(cur->wb.upper.status_error);
4582		if ((staterr & E1000_RXD_STAT_DD) == 0)
4583			break;
4584		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4585			break;
4586		count--;
4587		sendmp = mh = mp = NULL;
4588		cur->wb.upper.status_error = 0;
4589		rxbuf = &rxr->rx_buffers[i];
4590		plen = le16toh(cur->wb.upper.length);
4591		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4592		if ((adapter->hw.mac.type == e1000_i350) &&
4593		    (staterr & E1000_RXDEXT_STATERR_LB))
4594			vtag = be16toh(cur->wb.upper.vlan);
4595		else
4596			vtag = le16toh(cur->wb.upper.vlan);
4597		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4598		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4599
4600		/* Make sure all segments of a bad packet are discarded */
4601		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4602		    (rxr->discard)) {
4603			ifp->if_ierrors++;
4604			++rxr->rx_discarded;
4605			if (!eop) /* Catch subsequent segs */
4606				rxr->discard = TRUE;
4607			else
4608				rxr->discard = FALSE;
4609			igb_rx_discard(rxr, i);
4610			goto next_desc;
4611		}
4612
4613		/*
4614		** The way the hardware is configured to
4615		** split, it will ONLY use the header buffer
4616		** when header split is enabled, otherwise we
4617		** get normal behavior, ie, both header and
4618		** payload are DMA'd into the payload buffer.
4619		**
4620		** The fmp test is to catch the case where a
4621		** packet spans multiple descriptors, in that
4622		** case only the first header is valid.
4623		*/
4624		if (rxr->hdr_split && rxr->fmp == NULL) {
4625			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4626			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4627			if (hlen > IGB_HDR_BUF)
4628				hlen = IGB_HDR_BUF;
4629			mh = rxr->rx_buffers[i].m_head;
4630			mh->m_len = hlen;
4631			/* clear buf pointer for refresh */
4632			rxbuf->m_head = NULL;
4633			/*
4634			** Get the payload length, this
4635			** could be zero if its a small
4636			** packet.
4637			*/
4638			if (plen > 0) {
4639				mp = rxr->rx_buffers[i].m_pack;
4640				mp->m_len = plen;
4641				mh->m_next = mp;
4642				/* clear buf pointer */
4643				rxbuf->m_pack = NULL;
4644				rxr->rx_split_packets++;
4645			}
4646		} else {
4647			/*
4648			** Either no header split, or a
4649			** secondary piece of a fragmented
4650			** split packet.
4651			*/
4652			mh = rxr->rx_buffers[i].m_pack;
4653			mh->m_len = plen;
4654			/* clear buf info for refresh */
4655			rxbuf->m_pack = NULL;
4656		}
4657
4658		++processed; /* So we know when to refresh */
4659
4660		/* Initial frame - setup */
4661		if (rxr->fmp == NULL) {
4662			mh->m_pkthdr.len = mh->m_len;
4663			/* Save the head of the chain */
4664			rxr->fmp = mh;
4665			rxr->lmp = mh;
4666			if (mp != NULL) {
4667				/* Add payload if split */
4668				mh->m_pkthdr.len += mp->m_len;
4669				rxr->lmp = mh->m_next;
4670			}
4671		} else {
4672			/* Chain mbuf's together */
4673			rxr->lmp->m_next = mh;
4674			rxr->lmp = rxr->lmp->m_next;
4675			rxr->fmp->m_pkthdr.len += mh->m_len;
4676		}
4677
4678		if (eop) {
4679			rxr->fmp->m_pkthdr.rcvif = ifp;
4680			ifp->if_ipackets++;
4681			rxr->rx_packets++;
4682			/* capture data for AIM */
4683			rxr->packets++;
4684			rxr->bytes += rxr->fmp->m_pkthdr.len;
4685			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4686
4687			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4688				igb_rx_checksum(staterr, rxr->fmp, ptype);
4689
4690			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4691			    (staterr & E1000_RXD_STAT_VP) != 0) {
4692				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4693				rxr->fmp->m_flags |= M_VLANTAG;
4694			}
4695#if __FreeBSD_version >= 800000
4696			rxr->fmp->m_pkthdr.flowid = que->msix;
4697			rxr->fmp->m_flags |= M_FLOWID;
4698#endif
4699			sendmp = rxr->fmp;
4700			/* Make sure to set M_PKTHDR. */
4701			sendmp->m_flags |= M_PKTHDR;
4702			rxr->fmp = NULL;
4703			rxr->lmp = NULL;
4704		}
4705
4706next_desc:
4707		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4708		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4709
4710		/* Advance our pointers to the next descriptor. */
4711		if (++i == adapter->num_rx_desc)
4712			i = 0;
4713		/*
4714		** Send to the stack or LRO
4715		*/
4716		if (sendmp != NULL) {
4717			rxr->next_to_check = i;
4718			igb_rx_input(rxr, ifp, sendmp, ptype);
4719			i = rxr->next_to_check;
4720			rxdone++;
4721		}
4722
4723		/* Every 8 descriptors we go to refresh mbufs */
4724		if (processed == 8) {
4725                        igb_refresh_mbufs(rxr, i);
4726                        processed = 0;
4727		}
4728	}
4729
4730	/* Catch any remainders */
4731	if (igb_rx_unrefreshed(rxr))
4732		igb_refresh_mbufs(rxr, i);
4733
4734	rxr->next_to_check = i;
4735
4736	/*
4737	 * Flush any outstanding LRO work
4738	 */
4739	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4740		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4741		tcp_lro_flush(lro, queued);
4742	}
4743
4744	if (done != NULL)
4745		*done = rxdone;
4746
4747	IGB_RX_UNLOCK(rxr);
4748	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4749}
4750
4751/*********************************************************************
4752 *
4753 *  Verify that the hardware indicated that the checksum is valid.
4754 *  Inform the stack about the status of checksum so that stack
4755 *  doesn't spend time verifying the checksum.
4756 *
4757 *********************************************************************/
4758static void
4759igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4760{
4761	u16 status = (u16)staterr;
4762	u8  errors = (u8) (staterr >> 24);
4763	int sctp;
4764
4765	/* Ignore Checksum bit is set */
4766	if (status & E1000_RXD_STAT_IXSM) {
4767		mp->m_pkthdr.csum_flags = 0;
4768		return;
4769	}
4770
4771	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4772	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4773		sctp = 1;
4774	else
4775		sctp = 0;
4776	if (status & E1000_RXD_STAT_IPCS) {
4777		/* Did it pass? */
4778		if (!(errors & E1000_RXD_ERR_IPE)) {
4779			/* IP Checksum Good */
4780			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4781			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4782		} else
4783			mp->m_pkthdr.csum_flags = 0;
4784	}
4785
4786	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4787		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4788#if __FreeBSD_version >= 800000
4789		if (sctp) /* reassign */
4790			type = CSUM_SCTP_VALID;
4791#endif
4792		/* Did it pass? */
4793		if (!(errors & E1000_RXD_ERR_TCPE)) {
4794			mp->m_pkthdr.csum_flags |= type;
4795			if (sctp == 0)
4796				mp->m_pkthdr.csum_data = htons(0xffff);
4797		}
4798	}
4799	return;
4800}
4801
4802/*
4803 * This routine is run via an vlan
4804 * config EVENT
4805 */
4806static void
4807igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4808{
4809	struct adapter	*adapter = ifp->if_softc;
4810	u32		index, bit;
4811
4812	if (ifp->if_softc !=  arg)   /* Not our event */
4813		return;
4814
4815	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4816                return;
4817
4818	IGB_CORE_LOCK(adapter);
4819	index = (vtag >> 5) & 0x7F;
4820	bit = vtag & 0x1F;
4821	adapter->shadow_vfta[index] |= (1 << bit);
4822	++adapter->num_vlans;
4823	/* Change hw filter setting */
4824	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4825		igb_setup_vlan_hw_support(adapter);
4826	IGB_CORE_UNLOCK(adapter);
4827}
4828
4829/*
4830 * This routine is run via an vlan
4831 * unconfig EVENT
4832 */
4833static void
4834igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4835{
4836	struct adapter	*adapter = ifp->if_softc;
4837	u32		index, bit;
4838
4839	if (ifp->if_softc !=  arg)
4840		return;
4841
4842	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4843                return;
4844
4845	IGB_CORE_LOCK(adapter);
4846	index = (vtag >> 5) & 0x7F;
4847	bit = vtag & 0x1F;
4848	adapter->shadow_vfta[index] &= ~(1 << bit);
4849	--adapter->num_vlans;
4850	/* Change hw filter setting */
4851	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4852		igb_setup_vlan_hw_support(adapter);
4853	IGB_CORE_UNLOCK(adapter);
4854}
4855
4856static void
4857igb_setup_vlan_hw_support(struct adapter *adapter)
4858{
4859	struct e1000_hw *hw = &adapter->hw;
4860	struct ifnet	*ifp = adapter->ifp;
4861	u32             reg;
4862
4863	if (adapter->vf_ifp) {
4864		e1000_rlpml_set_vf(hw,
4865		    adapter->max_frame_size + VLAN_TAG_SIZE);
4866		return;
4867	}
4868
4869	reg = E1000_READ_REG(hw, E1000_CTRL);
4870	reg |= E1000_CTRL_VME;
4871	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4872
4873	/* Enable the Filter Table */
4874	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4875		reg = E1000_READ_REG(hw, E1000_RCTL);
4876		reg &= ~E1000_RCTL_CFIEN;
4877		reg |= E1000_RCTL_VFE;
4878		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4879	}
4880
4881	/* Update the frame size */
4882	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4883	    adapter->max_frame_size + VLAN_TAG_SIZE);
4884
4885	/* Don't bother with table if no vlans */
4886	if ((adapter->num_vlans == 0) ||
4887	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4888                return;
4889	/*
4890	** A soft reset zero's out the VFTA, so
4891	** we need to repopulate it now.
4892	*/
4893	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4894                if (adapter->shadow_vfta[i] != 0) {
4895			if (adapter->vf_ifp)
4896				e1000_vfta_set_vf(hw,
4897				    adapter->shadow_vfta[i], TRUE);
4898			else
4899				e1000_write_vfta(hw,
4900				    i, adapter->shadow_vfta[i]);
4901		}
4902}
4903
4904static void
4905igb_enable_intr(struct adapter *adapter)
4906{
4907	/* With RSS set up what to auto clear */
4908	if (adapter->msix_mem) {
4909		u32 mask = (adapter->que_mask | adapter->link_mask);
4910		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4911		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4912		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4913		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4914		    E1000_IMS_LSC);
4915	} else {
4916		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4917		    IMS_ENABLE_MASK);
4918	}
4919	E1000_WRITE_FLUSH(&adapter->hw);
4920
4921	return;
4922}
4923
4924static void
4925igb_disable_intr(struct adapter *adapter)
4926{
4927	if (adapter->msix_mem) {
4928		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4929		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4930	}
4931	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4932	E1000_WRITE_FLUSH(&adapter->hw);
4933	return;
4934}
4935
4936/*
4937 * Bit of a misnomer, what this really means is
4938 * to enable OS management of the system... aka
4939 * to disable special hardware management features
4940 */
4941static void
4942igb_init_manageability(struct adapter *adapter)
4943{
4944	if (adapter->has_manage) {
4945		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4946		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4947
4948		/* disable hardware interception of ARP */
4949		manc &= ~(E1000_MANC_ARP_EN);
4950
4951                /* enable receiving management packets to the host */
4952		manc |= E1000_MANC_EN_MNG2HOST;
4953		manc2h |= 1 << 5;  /* Mng Port 623 */
4954		manc2h |= 1 << 6;  /* Mng Port 664 */
4955		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4956		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4957	}
4958}
4959
4960/*
4961 * Give control back to hardware management
4962 * controller if there is one.
4963 */
4964static void
4965igb_release_manageability(struct adapter *adapter)
4966{
4967	if (adapter->has_manage) {
4968		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4969
4970		/* re-enable hardware interception of ARP */
4971		manc |= E1000_MANC_ARP_EN;
4972		manc &= ~E1000_MANC_EN_MNG2HOST;
4973
4974		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4975	}
4976}
4977
4978/*
4979 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4980 * For ASF and Pass Through versions of f/w this means that
4981 * the driver is loaded.
4982 *
4983 */
4984static void
4985igb_get_hw_control(struct adapter *adapter)
4986{
4987	u32 ctrl_ext;
4988
4989	if (adapter->vf_ifp)
4990		return;
4991
4992	/* Let firmware know the driver has taken over */
4993	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4994	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4995	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4996}
4997
4998/*
4999 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5000 * For ASF and Pass Through versions of f/w this means that the
5001 * driver is no longer loaded.
5002 *
5003 */
5004static void
5005igb_release_hw_control(struct adapter *adapter)
5006{
5007	u32 ctrl_ext;
5008
5009	if (adapter->vf_ifp)
5010		return;
5011
5012	/* Let firmware taken over control of h/w */
5013	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5014	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5015	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5016}
5017
5018static int
5019igb_is_valid_ether_addr(uint8_t *addr)
5020{
5021	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5022
5023	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5024		return (FALSE);
5025	}
5026
5027	return (TRUE);
5028}
5029
5030
5031/*
5032 * Enable PCI Wake On Lan capability
5033 */
5034static void
5035igb_enable_wakeup(device_t dev)
5036{
5037	u16     cap, status;
5038	u8      id;
5039
5040	/* First find the capabilities pointer*/
5041	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5042	/* Read the PM Capabilities */
5043	id = pci_read_config(dev, cap, 1);
5044	if (id != PCIY_PMG)     /* Something wrong */
5045		return;
5046	/* OK, we have the power capabilities, so
5047	   now get the status register */
5048	cap += PCIR_POWER_STATUS;
5049	status = pci_read_config(dev, cap, 2);
5050	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5051	pci_write_config(dev, cap, status, 2);
5052	return;
5053}
5054
5055static void
5056igb_led_func(void *arg, int onoff)
5057{
5058	struct adapter	*adapter = arg;
5059
5060	IGB_CORE_LOCK(adapter);
5061	if (onoff) {
5062		e1000_setup_led(&adapter->hw);
5063		e1000_led_on(&adapter->hw);
5064	} else {
5065		e1000_led_off(&adapter->hw);
5066		e1000_cleanup_led(&adapter->hw);
5067	}
5068	IGB_CORE_UNLOCK(adapter);
5069}
5070
5071/**********************************************************************
5072 *
5073 *  Update the board statistics counters.
5074 *
5075 **********************************************************************/
5076static void
5077igb_update_stats_counters(struct adapter *adapter)
5078{
5079	struct ifnet		*ifp;
5080        struct e1000_hw		*hw = &adapter->hw;
5081	struct e1000_hw_stats	*stats;
5082
5083	/*
5084	** The virtual function adapter has only a
5085	** small controlled set of stats, do only
5086	** those and return.
5087	*/
5088	if (adapter->vf_ifp) {
5089		igb_update_vf_stats_counters(adapter);
5090		return;
5091	}
5092
5093	stats = (struct e1000_hw_stats	*)adapter->stats;
5094
5095	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5096	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5097		stats->symerrs +=
5098		    E1000_READ_REG(hw,E1000_SYMERRS);
5099		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5100	}
5101
5102	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5103	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5104	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5105	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5106
5107	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5108	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5109	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5110	stats->dc += E1000_READ_REG(hw, E1000_DC);
5111	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5112	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5113	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5114	/*
5115	** For watchdog management we need to know if we have been
5116	** paused during the last interval, so capture that here.
5117	*/
5118        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5119        stats->xoffrxc += adapter->pause_frames;
5120	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5121	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5122	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5123	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5124	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5125	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5126	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5127	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5128	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5129	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5130	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5131	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5132
5133	/* For the 64-bit byte counters the low dword must be read first. */
5134	/* Both registers clear on the read of the high dword */
5135
5136	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5137	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5138	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5139	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5140
5141	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5142	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5143	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5144	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5145	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5146
5147	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5148	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5149
5150	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5151	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5152	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5153	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5154	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5155	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5156	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5157	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5158	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5159	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5160
5161	/* Interrupt Counts */
5162
5163	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5164	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5165	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5166	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5167	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5168	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5169	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5170	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5171	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5172
5173	/* Host to Card Statistics */
5174
5175	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5176	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5177	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5178	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5179	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5180	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5181	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5182	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5183	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5184	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5185	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5186	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5187	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5188	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5189
5190	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5191	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5192	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5193	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5194	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5195	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5196
5197	ifp = adapter->ifp;
5198	ifp->if_collisions = stats->colc;
5199
5200	/* Rx Errors */
5201	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5202	    stats->crcerrs + stats->algnerrc +
5203	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5204
5205	/* Tx Errors */
5206	ifp->if_oerrors = stats->ecol +
5207	    stats->latecol + adapter->watchdog_events;
5208
5209	/* Driver specific counters */
5210	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5211	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5212	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5213	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5214	adapter->packet_buf_alloc_tx =
5215	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5216	adapter->packet_buf_alloc_rx =
5217	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5218}
5219
5220
5221/**********************************************************************
5222 *
5223 *  Initialize the VF board statistics counters.
5224 *
5225 **********************************************************************/
5226static void
5227igb_vf_init_stats(struct adapter *adapter)
5228{
5229        struct e1000_hw *hw = &adapter->hw;
5230	struct e1000_vf_stats	*stats;
5231
5232	stats = (struct e1000_vf_stats	*)adapter->stats;
5233	if (stats == NULL)
5234		return;
5235        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5236        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5237        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5238        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5239        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5240}
5241
5242/**********************************************************************
5243 *
5244 *  Update the VF board statistics counters.
5245 *
5246 **********************************************************************/
5247static void
5248igb_update_vf_stats_counters(struct adapter *adapter)
5249{
5250	struct e1000_hw *hw = &adapter->hw;
5251	struct e1000_vf_stats	*stats;
5252
5253	if (adapter->link_speed == 0)
5254		return;
5255
5256	stats = (struct e1000_vf_stats	*)adapter->stats;
5257
5258	UPDATE_VF_REG(E1000_VFGPRC,
5259	    stats->last_gprc, stats->gprc);
5260	UPDATE_VF_REG(E1000_VFGORC,
5261	    stats->last_gorc, stats->gorc);
5262	UPDATE_VF_REG(E1000_VFGPTC,
5263	    stats->last_gptc, stats->gptc);
5264	UPDATE_VF_REG(E1000_VFGOTC,
5265	    stats->last_gotc, stats->gotc);
5266	UPDATE_VF_REG(E1000_VFMPRC,
5267	    stats->last_mprc, stats->mprc);
5268}
5269
5270/* Export a single 32-bit register via a read-only sysctl. */
5271static int
5272igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5273{
5274	struct adapter *adapter;
5275	u_int val;
5276
5277	adapter = oidp->oid_arg1;
5278	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5279	return (sysctl_handle_int(oidp, &val, 0, req));
5280}
5281
5282/*
5283**  Tuneable interrupt rate handler
5284*/
5285static int
5286igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5287{
5288	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5289	int			error;
5290	u32			reg, usec, rate;
5291
5292	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5293	usec = ((reg & 0x7FFC) >> 2);
5294	if (usec > 0)
5295		rate = 1000000 / usec;
5296	else
5297		rate = 0;
5298	error = sysctl_handle_int(oidp, &rate, 0, req);
5299	if (error || !req->newptr)
5300		return error;
5301	return 0;
5302}
5303
5304/*
5305 * Add sysctl variables, one per statistic, to the system.
5306 */
5307static void
5308igb_add_hw_stats(struct adapter *adapter)
5309{
5310	device_t dev = adapter->dev;
5311
5312	struct tx_ring *txr = adapter->tx_rings;
5313	struct rx_ring *rxr = adapter->rx_rings;
5314
5315	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5316	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5317	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5318	struct e1000_hw_stats *stats = adapter->stats;
5319
5320	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5321	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5322
5323#define QUEUE_NAME_LEN 32
5324	char namebuf[QUEUE_NAME_LEN];
5325
5326	/* Driver Statistics */
5327	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5328			CTLFLAG_RD, &adapter->link_irq, 0,
5329			"Link MSIX IRQ Handled");
5330	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5331			CTLFLAG_RD, &adapter->dropped_pkts,
5332			"Driver dropped packets");
5333	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5334			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5335			"Driver tx dma failure in xmit");
5336	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5337			CTLFLAG_RD, &adapter->rx_overruns,
5338			"RX overruns");
5339	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5340			CTLFLAG_RD, &adapter->watchdog_events,
5341			"Watchdog timeouts");
5342
5343	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5344			CTLFLAG_RD, &adapter->device_control,
5345			"Device Control Register");
5346	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5347			CTLFLAG_RD, &adapter->rx_control,
5348			"Receiver Control Register");
5349	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5350			CTLFLAG_RD, &adapter->int_mask,
5351			"Interrupt Mask");
5352	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5353			CTLFLAG_RD, &adapter->eint_mask,
5354			"Extended Interrupt Mask");
5355	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5356			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5357			"Transmit Buffer Packet Allocation");
5358	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5359			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5360			"Receive Buffer Packet Allocation");
5361	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5362			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5363			"Flow Control High Watermark");
5364	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5365			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5366			"Flow Control Low Watermark");
5367
5368	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5369		struct lro_ctrl *lro = &rxr->lro;
5370
5371		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5372		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5373					    CTLFLAG_RD, NULL, "Queue Name");
5374		queue_list = SYSCTL_CHILDREN(queue_node);
5375
5376		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5377				CTLFLAG_RD, &adapter->queues[i],
5378				sizeof(&adapter->queues[i]),
5379				igb_sysctl_interrupt_rate_handler,
5380				"IU", "Interrupt Rate");
5381
5382		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5383				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5384				igb_sysctl_reg_handler, "IU",
5385 				"Transmit Descriptor Head");
5386		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5387				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5388				igb_sysctl_reg_handler, "IU",
5389 				"Transmit Descriptor Tail");
5390		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5391				CTLFLAG_RD, &txr->no_desc_avail,
5392				"Queue No Descriptor Available");
5393		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5394				CTLFLAG_RD, &txr->tx_packets,
5395				"Queue Packets Transmitted");
5396
5397		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5398				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5399				igb_sysctl_reg_handler, "IU",
5400				"Receive Descriptor Head");
5401		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5402				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5403				igb_sysctl_reg_handler, "IU",
5404				"Receive Descriptor Tail");
5405		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5406				CTLFLAG_RD, &rxr->rx_packets,
5407				"Queue Packets Received");
5408		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5409				CTLFLAG_RD, &rxr->rx_bytes,
5410				"Queue Bytes Received");
5411		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5412				CTLFLAG_RD, &lro->lro_queued, 0,
5413				"LRO Queued");
5414		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5415				CTLFLAG_RD, &lro->lro_flushed, 0,
5416				"LRO Flushed");
5417	}
5418
5419	/* MAC stats get their own sub node */
5420
5421	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5422				    CTLFLAG_RD, NULL, "MAC Statistics");
5423	stat_list = SYSCTL_CHILDREN(stat_node);
5424
5425	/*
5426	** VF adapter has a very limited set of stats
5427	** since its not managing the metal, so to speak.
5428	*/
5429	if (adapter->vf_ifp) {
5430	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5431			CTLFLAG_RD, &stats->gprc,
5432			"Good Packets Received");
5433	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5434			CTLFLAG_RD, &stats->gptc,
5435			"Good Packets Transmitted");
5436 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5437 			CTLFLAG_RD, &stats->gorc,
5438 			"Good Octets Received");
5439 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5440 			CTLFLAG_RD, &stats->gotc,
5441 			"Good Octets Transmitted");
5442	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5443			CTLFLAG_RD, &stats->mprc,
5444			"Multicast Packets Received");
5445		return;
5446	}
5447
5448	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5449			CTLFLAG_RD, &stats->ecol,
5450			"Excessive collisions");
5451	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5452			CTLFLAG_RD, &stats->scc,
5453			"Single collisions");
5454	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5455			CTLFLAG_RD, &stats->mcc,
5456			"Multiple collisions");
5457	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5458			CTLFLAG_RD, &stats->latecol,
5459			"Late collisions");
5460	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5461			CTLFLAG_RD, &stats->colc,
5462			"Collision Count");
5463	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5464			CTLFLAG_RD, &stats->symerrs,
5465			"Symbol Errors");
5466	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5467			CTLFLAG_RD, &stats->sec,
5468			"Sequence Errors");
5469	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5470			CTLFLAG_RD, &stats->dc,
5471			"Defer Count");
5472	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5473			CTLFLAG_RD, &stats->mpc,
5474			"Missed Packets");
5475	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5476			CTLFLAG_RD, &stats->rnbc,
5477			"Receive No Buffers");
5478	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5479			CTLFLAG_RD, &stats->ruc,
5480			"Receive Undersize");
5481	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5482			CTLFLAG_RD, &stats->rfc,
5483			"Fragmented Packets Received ");
5484	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5485			CTLFLAG_RD, &stats->roc,
5486			"Oversized Packets Received");
5487	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5488			CTLFLAG_RD, &stats->rjc,
5489			"Recevied Jabber");
5490	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5491			CTLFLAG_RD, &stats->rxerrc,
5492			"Receive Errors");
5493	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5494			CTLFLAG_RD, &stats->crcerrs,
5495			"CRC errors");
5496	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5497			CTLFLAG_RD, &stats->algnerrc,
5498			"Alignment Errors");
5499	/* On 82575 these are collision counts */
5500	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5501			CTLFLAG_RD, &stats->cexterr,
5502			"Collision/Carrier extension errors");
5503	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5504			CTLFLAG_RD, &stats->xonrxc,
5505			"XON Received");
5506	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5507			CTLFLAG_RD, &stats->xontxc,
5508			"XON Transmitted");
5509	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5510			CTLFLAG_RD, &stats->xoffrxc,
5511			"XOFF Received");
5512	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5513			CTLFLAG_RD, &stats->xofftxc,
5514			"XOFF Transmitted");
5515	/* Packet Reception Stats */
5516	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5517			CTLFLAG_RD, &stats->tpr,
5518			"Total Packets Received ");
5519	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5520			CTLFLAG_RD, &stats->gprc,
5521			"Good Packets Received");
5522	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5523			CTLFLAG_RD, &stats->bprc,
5524			"Broadcast Packets Received");
5525	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5526			CTLFLAG_RD, &stats->mprc,
5527			"Multicast Packets Received");
5528	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5529			CTLFLAG_RD, &stats->prc64,
5530			"64 byte frames received ");
5531	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5532			CTLFLAG_RD, &stats->prc127,
5533			"65-127 byte frames received");
5534	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5535			CTLFLAG_RD, &stats->prc255,
5536			"128-255 byte frames received");
5537	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5538			CTLFLAG_RD, &stats->prc511,
5539			"256-511 byte frames received");
5540	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5541			CTLFLAG_RD, &stats->prc1023,
5542			"512-1023 byte frames received");
5543	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5544			CTLFLAG_RD, &stats->prc1522,
5545			"1023-1522 byte frames received");
5546 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5547 			CTLFLAG_RD, &stats->gorc,
5548 			"Good Octets Received");
5549
5550	/* Packet Transmission Stats */
5551 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5552 			CTLFLAG_RD, &stats->gotc,
5553 			"Good Octets Transmitted");
5554	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5555			CTLFLAG_RD, &stats->tpt,
5556			"Total Packets Transmitted");
5557	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5558			CTLFLAG_RD, &stats->gptc,
5559			"Good Packets Transmitted");
5560	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5561			CTLFLAG_RD, &stats->bptc,
5562			"Broadcast Packets Transmitted");
5563	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5564			CTLFLAG_RD, &stats->mptc,
5565			"Multicast Packets Transmitted");
5566	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5567			CTLFLAG_RD, &stats->ptc64,
5568			"64 byte frames transmitted ");
5569	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5570			CTLFLAG_RD, &stats->ptc127,
5571			"65-127 byte frames transmitted");
5572	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5573			CTLFLAG_RD, &stats->ptc255,
5574			"128-255 byte frames transmitted");
5575	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5576			CTLFLAG_RD, &stats->ptc511,
5577			"256-511 byte frames transmitted");
5578	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5579			CTLFLAG_RD, &stats->ptc1023,
5580			"512-1023 byte frames transmitted");
5581	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5582			CTLFLAG_RD, &stats->ptc1522,
5583			"1024-1522 byte frames transmitted");
5584	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5585			CTLFLAG_RD, &stats->tsctc,
5586			"TSO Contexts Transmitted");
5587	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5588			CTLFLAG_RD, &stats->tsctfc,
5589			"TSO Contexts Failed");
5590
5591
5592	/* Interrupt Stats */
5593
5594	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5595				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5596	int_list = SYSCTL_CHILDREN(int_node);
5597
5598	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5599			CTLFLAG_RD, &stats->iac,
5600			"Interrupt Assertion Count");
5601
5602	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5603			CTLFLAG_RD, &stats->icrxptc,
5604			"Interrupt Cause Rx Pkt Timer Expire Count");
5605
5606	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5607			CTLFLAG_RD, &stats->icrxatc,
5608			"Interrupt Cause Rx Abs Timer Expire Count");
5609
5610	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5611			CTLFLAG_RD, &stats->ictxptc,
5612			"Interrupt Cause Tx Pkt Timer Expire Count");
5613
5614	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5615			CTLFLAG_RD, &stats->ictxatc,
5616			"Interrupt Cause Tx Abs Timer Expire Count");
5617
5618	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5619			CTLFLAG_RD, &stats->ictxqec,
5620			"Interrupt Cause Tx Queue Empty Count");
5621
5622	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5623			CTLFLAG_RD, &stats->ictxqmtc,
5624			"Interrupt Cause Tx Queue Min Thresh Count");
5625
5626	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5627			CTLFLAG_RD, &stats->icrxdmtc,
5628			"Interrupt Cause Rx Desc Min Thresh Count");
5629
5630	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5631			CTLFLAG_RD, &stats->icrxoc,
5632			"Interrupt Cause Receiver Overrun Count");
5633
5634	/* Host to Card Stats */
5635
5636	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5637				    CTLFLAG_RD, NULL,
5638				    "Host to Card Statistics");
5639
5640	host_list = SYSCTL_CHILDREN(host_node);
5641
5642	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5643			CTLFLAG_RD, &stats->cbtmpc,
5644			"Circuit Breaker Tx Packet Count");
5645
5646	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5647			CTLFLAG_RD, &stats->htdpmc,
5648			"Host Transmit Discarded Packets");
5649
5650	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5651			CTLFLAG_RD, &stats->rpthc,
5652			"Rx Packets To Host");
5653
5654	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5655			CTLFLAG_RD, &stats->cbrmpc,
5656			"Circuit Breaker Rx Packet Count");
5657
5658	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5659			CTLFLAG_RD, &stats->cbrdpc,
5660			"Circuit Breaker Rx Dropped Count");
5661
5662	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5663			CTLFLAG_RD, &stats->hgptc,
5664			"Host Good Packets Tx Count");
5665
5666	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5667			CTLFLAG_RD, &stats->htcbdpc,
5668			"Host Tx Circuit Breaker Dropped Count");
5669
5670	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5671			CTLFLAG_RD, &stats->hgorc,
5672			"Host Good Octets Received Count");
5673
5674	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5675			CTLFLAG_RD, &stats->hgotc,
5676			"Host Good Octets Transmit Count");
5677
5678	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5679			CTLFLAG_RD, &stats->lenerrs,
5680			"Length Errors");
5681
5682	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5683			CTLFLAG_RD, &stats->scvpc,
5684			"SerDes/SGMII Code Violation Pkt Count");
5685
5686	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5687			CTLFLAG_RD, &stats->hrmpc,
5688			"Header Redirection Missed Packet Count");
5689}
5690
5691
5692/**********************************************************************
5693 *
5694 *  This routine provides a way to dump out the adapter eeprom,
5695 *  often a useful debug/service tool. This only dumps the first
5696 *  32 words, stuff that matters is in that extent.
5697 *
5698 **********************************************************************/
5699static int
5700igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5701{
5702	struct adapter *adapter;
5703	int error;
5704	int result;
5705
5706	result = -1;
5707	error = sysctl_handle_int(oidp, &result, 0, req);
5708
5709	if (error || !req->newptr)
5710		return (error);
5711
5712	/*
5713	 * This value will cause a hex dump of the
5714	 * first 32 16-bit words of the EEPROM to
5715	 * the screen.
5716	 */
5717	if (result == 1) {
5718		adapter = (struct adapter *)arg1;
5719		igb_print_nvm_info(adapter);
5720        }
5721
5722	return (error);
5723}
5724
5725static void
5726igb_print_nvm_info(struct adapter *adapter)
5727{
5728	u16	eeprom_data;
5729	int	i, j, row = 0;
5730
5731	/* Its a bit crude, but it gets the job done */
5732	printf("\nInterface EEPROM Dump:\n");
5733	printf("Offset\n0x0000  ");
5734	for (i = 0, j = 0; i < 32; i++, j++) {
5735		if (j == 8) { /* Make the offset block */
5736			j = 0; ++row;
5737			printf("\n0x00%x0  ",row);
5738		}
5739		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5740		printf("%04x ", eeprom_data);
5741	}
5742	printf("\n");
5743}
5744
5745static void
5746igb_set_sysctl_value(struct adapter *adapter, const char *name,
5747	const char *description, int *limit, int value)
5748{
5749	*limit = value;
5750	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5751	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5752	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5753}
5754
5755/*
5756** Set flow control using sysctl:
5757** Flow control values:
5758** 	0 - off
5759**	1 - rx pause
5760**	2 - tx pause
5761**	3 - full
5762*/
5763static int
5764igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5765{
5766	int		error;
5767	static int	input = 3; /* default is full */
5768	struct adapter	*adapter = (struct adapter *) arg1;
5769
5770	error = sysctl_handle_int(oidp, &input, 0, req);
5771
5772	if ((error) || (req->newptr == NULL))
5773		return (error);
5774
5775	switch (input) {
5776		case e1000_fc_rx_pause:
5777		case e1000_fc_tx_pause:
5778		case e1000_fc_full:
5779		case e1000_fc_none:
5780			adapter->hw.fc.requested_mode = input;
5781			adapter->fc = input;
5782			break;
5783		default:
5784			/* Do nothing */
5785			return (error);
5786	}
5787
5788	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5789	e1000_force_mac_fc(&adapter->hw);
5790	return (error);
5791}
5792
5793/*
5794** Manage DMA Coalesce:
5795** Control values:
5796** 	0/1 - off/on
5797**	Legal timer values are:
5798**	250,500,1000-10000 in thousands
5799*/
5800static int
5801igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5802{
5803	struct adapter *adapter = (struct adapter *) arg1;
5804	int		error;
5805
5806	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5807
5808	if ((error) || (req->newptr == NULL))
5809		return (error);
5810
5811	switch (adapter->dmac) {
5812		case 0:
5813			/*Disabling */
5814			break;
5815		case 1: /* Just enable and use default */
5816			adapter->dmac = 1000;
5817			break;
5818		case 250:
5819		case 500:
5820		case 1000:
5821		case 2000:
5822		case 3000:
5823		case 4000:
5824		case 5000:
5825		case 6000:
5826		case 7000:
5827		case 8000:
5828		case 9000:
5829		case 10000:
5830			/* Legal values - allow */
5831			break;
5832		default:
5833			/* Do nothing, illegal value */
5834			adapter->dmac = 0;
5835			return (error);
5836	}
5837	/* Reinit the interface */
5838	igb_init(adapter);
5839	return (error);
5840}
5841