if_igb.c revision 234154
1/******************************************************************************
2
3  Copyright (c) 2001-2011, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 234154 2012-04-11 21:33:45Z jhb $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#include "opt_inet6.h"
40#include "opt_altq.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#if __FreeBSD_version >= 800000
46#include <sys/buf_ring.h>
47#endif
48#include <sys/bus.h>
49#include <sys/endian.h>
50#include <sys/kernel.h>
51#include <sys/kthread.h>
52#include <sys/malloc.h>
53#include <sys/mbuf.h>
54#include <sys/module.h>
55#include <sys/rman.h>
56#include <sys/socket.h>
57#include <sys/sockio.h>
58#include <sys/sysctl.h>
59#include <sys/taskqueue.h>
60#include <sys/eventhandler.h>
61#include <sys/pcpu.h>
62#include <sys/smp.h>
63#include <machine/smp.h>
64#include <machine/bus.h>
65#include <machine/resource.h>
66
67#include <net/bpf.h>
68#include <net/ethernet.h>
69#include <net/if.h>
70#include <net/if_arp.h>
71#include <net/if_dl.h>
72#include <net/if_media.h>
73
74#include <net/if_types.h>
75#include <net/if_vlan_var.h>
76
77#include <netinet/in_systm.h>
78#include <netinet/in.h>
79#include <netinet/if_ether.h>
80#include <netinet/ip.h>
81#include <netinet/ip6.h>
82#include <netinet/tcp.h>
83#include <netinet/tcp_lro.h>
84#include <netinet/udp.h>
85
86#include <machine/in_cksum.h>
87#include <dev/led/led.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 2.3.1";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
133						PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
138	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
139	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
140						PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
142						PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
145	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
147						PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
153	/* required last entry */
154	{ 0, 0, 0, 0, 0}
155};
156
157/*********************************************************************
158 *  Table of branding strings for all supported NICs.
159 *********************************************************************/
160
161static char *igb_strings[] = {
162	"Intel(R) PRO/1000 Network Connection"
163};
164
165/*********************************************************************
166 *  Function prototypes
167 *********************************************************************/
168static int	igb_probe(device_t);
169static int	igb_attach(device_t);
170static int	igb_detach(device_t);
171static int	igb_shutdown(device_t);
172static int	igb_suspend(device_t);
173static int	igb_resume(device_t);
174#if __FreeBSD_version >= 800000
175static int	igb_mq_start(struct ifnet *, struct mbuf *);
176static int	igb_mq_start_locked(struct ifnet *,
177		    struct tx_ring *, struct mbuf *);
178static void	igb_qflush(struct ifnet *);
179static void	igb_deferred_mq_start(void *, int);
180#else
181static void	igb_start(struct ifnet *);
182static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
183#endif
184static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
185static void	igb_init(void *);
186static void	igb_init_locked(struct adapter *);
187static void	igb_stop(void *);
188static void	igb_media_status(struct ifnet *, struct ifmediareq *);
189static int	igb_media_change(struct ifnet *);
190static void	igb_identify_hardware(struct adapter *);
191static int	igb_allocate_pci_resources(struct adapter *);
192static int	igb_allocate_msix(struct adapter *);
193static int	igb_allocate_legacy(struct adapter *);
194static int	igb_setup_msix(struct adapter *);
195static void	igb_free_pci_resources(struct adapter *);
196static void	igb_local_timer(void *);
197static void	igb_reset(struct adapter *);
198static int	igb_setup_interface(device_t, struct adapter *);
199static int	igb_allocate_queues(struct adapter *);
200static void	igb_configure_queues(struct adapter *);
201
202static int	igb_allocate_transmit_buffers(struct tx_ring *);
203static void	igb_setup_transmit_structures(struct adapter *);
204static void	igb_setup_transmit_ring(struct tx_ring *);
205static void	igb_initialize_transmit_units(struct adapter *);
206static void	igb_free_transmit_structures(struct adapter *);
207static void	igb_free_transmit_buffers(struct tx_ring *);
208
209static int	igb_allocate_receive_buffers(struct rx_ring *);
210static int	igb_setup_receive_structures(struct adapter *);
211static int	igb_setup_receive_ring(struct rx_ring *);
212static void	igb_initialize_receive_units(struct adapter *);
213static void	igb_free_receive_structures(struct adapter *);
214static void	igb_free_receive_buffers(struct rx_ring *);
215static void	igb_free_receive_ring(struct rx_ring *);
216
217static void	igb_enable_intr(struct adapter *);
218static void	igb_disable_intr(struct adapter *);
219static void	igb_update_stats_counters(struct adapter *);
220static bool	igb_txeof(struct tx_ring *);
221
222static __inline	void igb_rx_discard(struct rx_ring *, int);
223static __inline void igb_rx_input(struct rx_ring *,
224		    struct ifnet *, struct mbuf *, u32);
225
226static bool	igb_rxeof(struct igb_queue *, int, int *);
227static void	igb_rx_checksum(u32, struct mbuf *, u32);
228static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
229static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
230		    struct ip *, struct tcphdr *);
231static void	igb_set_promisc(struct adapter *);
232static void	igb_disable_promisc(struct adapter *);
233static void	igb_set_multi(struct adapter *);
234static void	igb_update_link_status(struct adapter *);
235static void	igb_refresh_mbufs(struct rx_ring *, int);
236
237static void	igb_register_vlan(void *, struct ifnet *, u16);
238static void	igb_unregister_vlan(void *, struct ifnet *, u16);
239static void	igb_setup_vlan_hw_support(struct adapter *);
240
241static int	igb_xmit(struct tx_ring *, struct mbuf **);
242static int	igb_dma_malloc(struct adapter *, bus_size_t,
243		    struct igb_dma_alloc *, int);
244static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
245static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
246static void	igb_print_nvm_info(struct adapter *);
247static int 	igb_is_valid_ether_addr(u8 *);
248static void     igb_add_hw_stats(struct adapter *);
249
250static void	igb_vf_init_stats(struct adapter *);
251static void	igb_update_vf_stats_counters(struct adapter *);
252
253/* Management and WOL Support */
254static void	igb_init_manageability(struct adapter *);
255static void	igb_release_manageability(struct adapter *);
256static void     igb_get_hw_control(struct adapter *);
257static void     igb_release_hw_control(struct adapter *);
258static void     igb_enable_wakeup(device_t);
259static void     igb_led_func(void *, int);
260
261static int	igb_irq_fast(void *);
262static void	igb_msix_que(void *);
263static void	igb_msix_link(void *);
264static void	igb_handle_que(void *context, int pending);
265static void	igb_handle_link(void *context, int pending);
266static void	igb_handle_link_locked(struct adapter *);
267
268static void	igb_set_sysctl_value(struct adapter *, const char *,
269		    const char *, int *, int);
270static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
271static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
272
273#ifdef DEVICE_POLLING
274static poll_handler_t igb_poll;
275#endif /* POLLING */
276
277/*********************************************************************
278 *  FreeBSD Device Interface Entry Points
279 *********************************************************************/
280
281static device_method_t igb_methods[] = {
282	/* Device interface */
283	DEVMETHOD(device_probe, igb_probe),
284	DEVMETHOD(device_attach, igb_attach),
285	DEVMETHOD(device_detach, igb_detach),
286	DEVMETHOD(device_shutdown, igb_shutdown),
287	DEVMETHOD(device_suspend, igb_suspend),
288	DEVMETHOD(device_resume, igb_resume),
289	{0, 0}
290};
291
292static driver_t igb_driver = {
293	"igb", igb_methods, sizeof(struct adapter),
294};
295
296static devclass_t igb_devclass;
297DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
298MODULE_DEPEND(igb, pci, 1, 1, 1);
299MODULE_DEPEND(igb, ether, 1, 1, 1);
300
301/*********************************************************************
302 *  Tunable default values.
303 *********************************************************************/
304
305static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
306
307/* Descriptor defaults */
308static int igb_rxd = IGB_DEFAULT_RXD;
309static int igb_txd = IGB_DEFAULT_TXD;
310TUNABLE_INT("hw.igb.rxd", &igb_rxd);
311TUNABLE_INT("hw.igb.txd", &igb_txd);
312SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
313    "Number of receive descriptors per queue");
314SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
315    "Number of transmit descriptors per queue");
316
317/*
318** AIM: Adaptive Interrupt Moderation
319** which means that the interrupt rate
320** is varied over time based on the
321** traffic for that interrupt vector
322*/
323static int igb_enable_aim = TRUE;
324TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
325SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RW, &igb_enable_aim, 0,
326    "Enable adaptive interrupt moderation");
327
328/*
329 * MSIX should be the default for best performance,
330 * but this allows it to be forced off for testing.
331 */
332static int igb_enable_msix = 1;
333TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
334SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
335    "Enable MSI-X interrupts");
336
337/*
338** Tuneable Interrupt rate
339*/
340static int igb_max_interrupt_rate = 8000;
341TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
342SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
343    &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
344
345/*
346** Header split causes the packet header to
347** be dma'd to a seperate mbuf from the payload.
348** this can have memory alignment benefits. But
349** another plus is that small packets often fit
350** into the header and thus use no cluster. Its
351** a very workload dependent type feature.
352*/
353static int igb_header_split = FALSE;
354TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
355SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
356    "Enable receive mbuf header split");
357
358/*
359** This will autoconfigure based on
360** the number of CPUs if left at 0.
361*/
362static int igb_num_queues = 0;
363TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
364SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
365    "Number of queues to configure, 0 indicates autoconfigure");
366
367/* How many packets rxeof tries to clean at a time */
368static int igb_rx_process_limit = 100;
369TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
370SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
371    &igb_rx_process_limit, 0,
372    "Maximum number of received packets to process at a time, -1 means unlimited");
373
374#ifdef DEV_NETMAP	/* see ixgbe.c for details */
375#include <dev/netmap/if_igb_netmap.h>
376#endif /* DEV_NETMAP */
377/*********************************************************************
378 *  Device identification routine
379 *
380 *  igb_probe determines if the driver should be loaded on
381 *  adapter based on PCI vendor/device id of the adapter.
382 *
383 *  return BUS_PROBE_DEFAULT on success, positive on failure
384 *********************************************************************/
385
386static int
387igb_probe(device_t dev)
388{
389	char		adapter_name[60];
390	uint16_t	pci_vendor_id = 0;
391	uint16_t	pci_device_id = 0;
392	uint16_t	pci_subvendor_id = 0;
393	uint16_t	pci_subdevice_id = 0;
394	igb_vendor_info_t *ent;
395
396	INIT_DEBUGOUT("igb_probe: begin");
397
398	pci_vendor_id = pci_get_vendor(dev);
399	if (pci_vendor_id != IGB_VENDOR_ID)
400		return (ENXIO);
401
402	pci_device_id = pci_get_device(dev);
403	pci_subvendor_id = pci_get_subvendor(dev);
404	pci_subdevice_id = pci_get_subdevice(dev);
405
406	ent = igb_vendor_info_array;
407	while (ent->vendor_id != 0) {
408		if ((pci_vendor_id == ent->vendor_id) &&
409		    (pci_device_id == ent->device_id) &&
410
411		    ((pci_subvendor_id == ent->subvendor_id) ||
412		    (ent->subvendor_id == PCI_ANY_ID)) &&
413
414		    ((pci_subdevice_id == ent->subdevice_id) ||
415		    (ent->subdevice_id == PCI_ANY_ID))) {
416			sprintf(adapter_name, "%s %s",
417				igb_strings[ent->index],
418				igb_driver_version);
419			device_set_desc_copy(dev, adapter_name);
420			return (BUS_PROBE_DEFAULT);
421		}
422		ent++;
423	}
424
425	return (ENXIO);
426}
427
428/*********************************************************************
429 *  Device initialization routine
430 *
431 *  The attach entry point is called when the driver is being loaded.
432 *  This routine identifies the type of hardware, allocates all resources
433 *  and initializes the hardware.
434 *
435 *  return 0 on success, positive on failure
436 *********************************************************************/
437
438static int
439igb_attach(device_t dev)
440{
441	struct adapter	*adapter;
442	int		error = 0;
443	u16		eeprom_data;
444
445	INIT_DEBUGOUT("igb_attach: begin");
446
447	if (resource_disabled("igb", device_get_unit(dev))) {
448		device_printf(dev, "Disabled by device hint\n");
449		return (ENXIO);
450	}
451
452	adapter = device_get_softc(dev);
453	adapter->dev = adapter->osdep.dev = dev;
454	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
455
456	/* SYSCTL stuff */
457	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
458	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
459	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
460	    igb_sysctl_nvm_info, "I", "NVM Information");
461
462	igb_set_sysctl_value(adapter, "enable_aim",
463	    "Interrupt Moderation", &adapter->enable_aim,
464	    igb_enable_aim);
465
466	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
467	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
468	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
469	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
470
471	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
472
473	/* Determine hardware and mac info */
474	igb_identify_hardware(adapter);
475
476	/* Setup PCI resources */
477	if (igb_allocate_pci_resources(adapter)) {
478		device_printf(dev, "Allocation of PCI resources failed\n");
479		error = ENXIO;
480		goto err_pci;
481	}
482
483	/* Do Shared Code initialization */
484	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
485		device_printf(dev, "Setup of Shared code failed\n");
486		error = ENXIO;
487		goto err_pci;
488	}
489
490	e1000_get_bus_info(&adapter->hw);
491
492	/* Sysctl for limiting the amount of work done in the taskqueue */
493	igb_set_sysctl_value(adapter, "rx_processing_limit",
494	    "max number of rx packets to process",
495	    &adapter->rx_process_limit, igb_rx_process_limit);
496
497	/*
498	 * Validate number of transmit and receive descriptors. It
499	 * must not exceed hardware maximum, and must be multiple
500	 * of E1000_DBA_ALIGN.
501	 */
502	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
503	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
504		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
505		    IGB_DEFAULT_TXD, igb_txd);
506		adapter->num_tx_desc = IGB_DEFAULT_TXD;
507	} else
508		adapter->num_tx_desc = igb_txd;
509	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
510	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
511		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
512		    IGB_DEFAULT_RXD, igb_rxd);
513		adapter->num_rx_desc = IGB_DEFAULT_RXD;
514	} else
515		adapter->num_rx_desc = igb_rxd;
516
517	adapter->hw.mac.autoneg = DO_AUTO_NEG;
518	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
519	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
520
521	/* Copper options */
522	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
523		adapter->hw.phy.mdix = AUTO_ALL_MODES;
524		adapter->hw.phy.disable_polarity_correction = FALSE;
525		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
526	}
527
528	/*
529	 * Set the frame limits assuming
530	 * standard ethernet sized frames.
531	 */
532	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
533	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
534
535	/*
536	** Allocate and Setup Queues
537	*/
538	if (igb_allocate_queues(adapter)) {
539		error = ENOMEM;
540		goto err_pci;
541	}
542
543	/* Allocate the appropriate stats memory */
544	if (adapter->vf_ifp) {
545		adapter->stats =
546		    (struct e1000_vf_stats *)malloc(sizeof \
547		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
548		igb_vf_init_stats(adapter);
549	} else
550		adapter->stats =
551		    (struct e1000_hw_stats *)malloc(sizeof \
552		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
553	if (adapter->stats == NULL) {
554		device_printf(dev, "Can not allocate stats memory\n");
555		error = ENOMEM;
556		goto err_late;
557	}
558
559	/* Allocate multicast array memory. */
560	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
561	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
562	if (adapter->mta == NULL) {
563		device_printf(dev, "Can not allocate multicast setup array\n");
564		error = ENOMEM;
565		goto err_late;
566	}
567
568	/* Some adapter-specific advanced features */
569	if (adapter->hw.mac.type >= e1000_i350) {
570		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
571		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
572		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
573		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
574		igb_set_sysctl_value(adapter, "eee_disabled",
575		    "enable Energy Efficient Ethernet",
576		    &adapter->hw.dev_spec._82575.eee_disable,
577		    TRUE);
578		e1000_set_eee_i350(&adapter->hw);
579	}
580
581	/*
582	** Start from a known state, this is
583	** important in reading the nvm and
584	** mac from that.
585	*/
586	e1000_reset_hw(&adapter->hw);
587
588	/* Make sure we have a good EEPROM before we read from it */
589	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
590		/*
591		** Some PCI-E parts fail the first check due to
592		** the link being in sleep state, call it again,
593		** if it fails a second time its a real issue.
594		*/
595		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
596			device_printf(dev,
597			    "The EEPROM Checksum Is Not Valid\n");
598			error = EIO;
599			goto err_late;
600		}
601	}
602
603	/*
604	** Copy the permanent MAC address out of the EEPROM
605	*/
606	if (e1000_read_mac_addr(&adapter->hw) < 0) {
607		device_printf(dev, "EEPROM read error while reading MAC"
608		    " address\n");
609		error = EIO;
610		goto err_late;
611	}
612	/* Check its sanity */
613	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
614		device_printf(dev, "Invalid MAC address\n");
615		error = EIO;
616		goto err_late;
617	}
618
619	/* Setup OS specific network interface */
620	if (igb_setup_interface(dev, adapter) != 0)
621		goto err_late;
622
623	/* Now get a good starting state */
624	igb_reset(adapter);
625
626	/* Initialize statistics */
627	igb_update_stats_counters(adapter);
628
629	adapter->hw.mac.get_link_status = 1;
630	igb_update_link_status(adapter);
631
632	/* Indicate SOL/IDER usage */
633	if (e1000_check_reset_block(&adapter->hw))
634		device_printf(dev,
635		    "PHY reset is blocked due to SOL/IDER session.\n");
636
637	/* Determine if we have to control management hardware */
638	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
639
640	/*
641	 * Setup Wake-on-Lan
642	 */
643	/* APME bit in EEPROM is mapped to WUC.APME */
644	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
645	if (eeprom_data)
646		adapter->wol = E1000_WUFC_MAG;
647
648	/* Register for VLAN events */
649	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
650	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
651	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
652	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
653
654	igb_add_hw_stats(adapter);
655
656	/* Tell the stack that the interface is not active */
657	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
658	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
659
660	adapter->led_dev = led_create(igb_led_func, adapter,
661	    device_get_nameunit(dev));
662
663	/*
664	** Configure Interrupts
665	*/
666	if ((adapter->msix > 1) && (igb_enable_msix))
667		error = igb_allocate_msix(adapter);
668	else /* MSI or Legacy */
669		error = igb_allocate_legacy(adapter);
670	if (error)
671		goto err_late;
672
673#ifdef DEV_NETMAP
674	igb_netmap_attach(adapter);
675#endif /* DEV_NETMAP */
676	INIT_DEBUGOUT("igb_attach: end");
677
678	return (0);
679
680err_late:
681	igb_detach(dev);
682	igb_free_transmit_structures(adapter);
683	igb_free_receive_structures(adapter);
684	igb_release_hw_control(adapter);
685err_pci:
686	igb_free_pci_resources(adapter);
687	if (adapter->ifp != NULL)
688		if_free(adapter->ifp);
689	free(adapter->mta, M_DEVBUF);
690	IGB_CORE_LOCK_DESTROY(adapter);
691
692	return (error);
693}
694
695/*********************************************************************
696 *  Device removal routine
697 *
698 *  The detach entry point is called when the driver is being removed.
699 *  This routine stops the adapter and deallocates all the resources
700 *  that were allocated for driver operation.
701 *
702 *  return 0 on success, positive on failure
703 *********************************************************************/
704
705static int
706igb_detach(device_t dev)
707{
708	struct adapter	*adapter = device_get_softc(dev);
709	struct ifnet	*ifp = adapter->ifp;
710
711	INIT_DEBUGOUT("igb_detach: begin");
712
713	/* Make sure VLANS are not using driver */
714	if (adapter->ifp->if_vlantrunk != NULL) {
715		device_printf(dev,"Vlan in use, detach first\n");
716		return (EBUSY);
717	}
718
719	ether_ifdetach(adapter->ifp);
720
721	if (adapter->led_dev != NULL)
722		led_destroy(adapter->led_dev);
723
724#ifdef DEVICE_POLLING
725	if (ifp->if_capenable & IFCAP_POLLING)
726		ether_poll_deregister(ifp);
727#endif
728
729	IGB_CORE_LOCK(adapter);
730	adapter->in_detach = 1;
731	igb_stop(adapter);
732	IGB_CORE_UNLOCK(adapter);
733
734	e1000_phy_hw_reset(&adapter->hw);
735
736	/* Give control back to firmware */
737	igb_release_manageability(adapter);
738	igb_release_hw_control(adapter);
739
740	if (adapter->wol) {
741		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
742		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
743		igb_enable_wakeup(dev);
744	}
745
746	/* Unregister VLAN events */
747	if (adapter->vlan_attach != NULL)
748		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
749	if (adapter->vlan_detach != NULL)
750		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
751
752	callout_drain(&adapter->timer);
753
754#ifdef DEV_NETMAP
755	netmap_detach(adapter->ifp);
756#endif /* DEV_NETMAP */
757	igb_free_pci_resources(adapter);
758	bus_generic_detach(dev);
759	if_free(ifp);
760
761	igb_free_transmit_structures(adapter);
762	igb_free_receive_structures(adapter);
763	if (adapter->mta != NULL)
764		free(adapter->mta, M_DEVBUF);
765
766	IGB_CORE_LOCK_DESTROY(adapter);
767
768	return (0);
769}
770
771/*********************************************************************
772 *
773 *  Shutdown entry point
774 *
775 **********************************************************************/
776
777static int
778igb_shutdown(device_t dev)
779{
780	return igb_suspend(dev);
781}
782
783/*
784 * Suspend/resume device methods.
785 */
786static int
787igb_suspend(device_t dev)
788{
789	struct adapter *adapter = device_get_softc(dev);
790
791	IGB_CORE_LOCK(adapter);
792
793	igb_stop(adapter);
794
795        igb_release_manageability(adapter);
796	igb_release_hw_control(adapter);
797
798        if (adapter->wol) {
799                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
800                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
801                igb_enable_wakeup(dev);
802        }
803
804	IGB_CORE_UNLOCK(adapter);
805
806	return bus_generic_suspend(dev);
807}
808
809static int
810igb_resume(device_t dev)
811{
812	struct adapter *adapter = device_get_softc(dev);
813	struct tx_ring	*txr = adapter->tx_rings;
814	struct ifnet *ifp = adapter->ifp;
815
816	IGB_CORE_LOCK(adapter);
817	igb_init_locked(adapter);
818	igb_init_manageability(adapter);
819
820	if ((ifp->if_flags & IFF_UP) &&
821	    (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
822		for (int i = 0; i < adapter->num_queues; i++, txr++) {
823			IGB_TX_LOCK(txr);
824#if __FreeBSD_version >= 800000
825			/* Process the stack queue only if not depleted */
826			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
827			    !drbr_empty(ifp, txr->br))
828				igb_mq_start_locked(ifp, txr, NULL);
829#else
830			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
831				igb_start_locked(txr, ifp);
832#endif
833			IGB_TX_UNLOCK(txr);
834		}
835	}
836	IGB_CORE_UNLOCK(adapter);
837
838	return bus_generic_resume(dev);
839}
840
841
842/*********************************************************************
843 *  Transmit entry point
844 *
845 *  igb_start is called by the stack to initiate a transmit.
846 *  The driver will remain in this routine as long as there are
847 *  packets to transmit and transmit resources are available.
848 *  In case resources are not available stack is notified and
849 *  the packet is requeued.
850 **********************************************************************/
851
852static void
853igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
854{
855	struct adapter	*adapter = ifp->if_softc;
856	struct mbuf	*m_head;
857
858	IGB_TX_LOCK_ASSERT(txr);
859
860	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
861	    IFF_DRV_RUNNING)
862		return;
863	if (!adapter->link_active)
864		return;
865
866	/* Call cleanup if number of TX descriptors low */
867	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
868		igb_txeof(txr);
869
870	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
871		if (txr->tx_avail <= IGB_MAX_SCATTER) {
872			txr->queue_status |= IGB_QUEUE_DEPLETED;
873			break;
874		}
875		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
876		if (m_head == NULL)
877			break;
878		/*
879		 *  Encapsulation can modify our pointer, and or make it
880		 *  NULL on failure.  In that event, we can't requeue.
881		 */
882		if (igb_xmit(txr, &m_head)) {
883			if (m_head != NULL)
884				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
885			if (txr->tx_avail <= IGB_MAX_SCATTER)
886				txr->queue_status |= IGB_QUEUE_DEPLETED;
887			break;
888		}
889
890		/* Send a copy of the frame to the BPF listener */
891		ETHER_BPF_MTAP(ifp, m_head);
892
893		/* Set watchdog on */
894		txr->watchdog_time = ticks;
895		txr->queue_status |= IGB_QUEUE_WORKING;
896	}
897}
898
899/*
900 * Legacy TX driver routine, called from the
901 * stack, always uses tx[0], and spins for it.
902 * Should not be used with multiqueue tx
903 */
904static void
905igb_start(struct ifnet *ifp)
906{
907	struct adapter	*adapter = ifp->if_softc;
908	struct tx_ring	*txr = adapter->tx_rings;
909
910	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
911		IGB_TX_LOCK(txr);
912		igb_start_locked(txr, ifp);
913		IGB_TX_UNLOCK(txr);
914	}
915	return;
916}
917
918#if __FreeBSD_version >= 800000
919/*
920** Multiqueue Transmit driver
921**
922*/
923static int
924igb_mq_start(struct ifnet *ifp, struct mbuf *m)
925{
926	struct adapter		*adapter = ifp->if_softc;
927	struct igb_queue	*que;
928	struct tx_ring		*txr;
929	int 			i, err = 0;
930	bool			moveable = TRUE;
931
932	/* Which queue to use */
933	if ((m->m_flags & M_FLOWID) != 0) {
934		i = m->m_pkthdr.flowid % adapter->num_queues;
935		moveable = FALSE;
936	} else
937		i = curcpu % adapter->num_queues;
938
939	txr = &adapter->tx_rings[i];
940	que = &adapter->queues[i];
941	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
942	    IGB_TX_TRYLOCK(txr)) {
943		err = igb_mq_start_locked(ifp, txr, m);
944		IGB_TX_UNLOCK(txr);
945	} else {
946		err = drbr_enqueue(ifp, txr->br, m);
947		taskqueue_enqueue(que->tq, &txr->txq_task);
948	}
949
950	return (err);
951}
952
953static int
954igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
955{
956	struct adapter  *adapter = txr->adapter;
957        struct mbuf     *next;
958        int             err = 0, enq;
959
960	IGB_TX_LOCK_ASSERT(txr);
961
962	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
963	    (txr->queue_status == IGB_QUEUE_DEPLETED) ||
964	    adapter->link_active == 0) {
965		if (m != NULL)
966			err = drbr_enqueue(ifp, txr->br, m);
967		return (err);
968	}
969
970	enq = 0;
971	if (m == NULL) {
972		next = drbr_dequeue(ifp, txr->br);
973	} else if (drbr_needs_enqueue(ifp, txr->br)) {
974		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
975			return (err);
976		next = drbr_dequeue(ifp, txr->br);
977	} else
978		next = m;
979
980	/* Process the queue */
981	while (next != NULL) {
982		if ((err = igb_xmit(txr, &next)) != 0) {
983			if (next != NULL)
984				err = drbr_enqueue(ifp, txr->br, next);
985			break;
986		}
987		enq++;
988		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
989		ETHER_BPF_MTAP(ifp, next);
990		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
991			break;
992		next = drbr_dequeue(ifp, txr->br);
993	}
994	if (enq > 0) {
995		/* Set the watchdog */
996		txr->queue_status |= IGB_QUEUE_WORKING;
997		txr->watchdog_time = ticks;
998	}
999	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1000		igb_txeof(txr);
1001	if (txr->tx_avail <= IGB_MAX_SCATTER)
1002		txr->queue_status |= IGB_QUEUE_DEPLETED;
1003	return (err);
1004}
1005
1006/*
1007 * Called from a taskqueue to drain queued transmit packets.
1008 */
1009static void
1010igb_deferred_mq_start(void *arg, int pending)
1011{
1012	struct tx_ring *txr = arg;
1013	struct adapter *adapter = txr->adapter;
1014	struct ifnet *ifp = adapter->ifp;
1015
1016	IGB_TX_LOCK(txr);
1017	if (!drbr_empty(ifp, txr->br))
1018		igb_mq_start_locked(ifp, txr, NULL);
1019	IGB_TX_UNLOCK(txr);
1020}
1021
1022/*
1023** Flush all ring buffers
1024*/
1025static void
1026igb_qflush(struct ifnet *ifp)
1027{
1028	struct adapter	*adapter = ifp->if_softc;
1029	struct tx_ring	*txr = adapter->tx_rings;
1030	struct mbuf	*m;
1031
1032	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1033		IGB_TX_LOCK(txr);
1034		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1035			m_freem(m);
1036		IGB_TX_UNLOCK(txr);
1037	}
1038	if_qflush(ifp);
1039}
1040#endif /* __FreeBSD_version >= 800000 */
1041
1042/*********************************************************************
1043 *  Ioctl entry point
1044 *
1045 *  igb_ioctl is called when the user wants to configure the
1046 *  interface.
1047 *
1048 *  return 0 on success, positive on failure
1049 **********************************************************************/
1050
1051static int
1052igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1053{
1054	struct adapter	*adapter = ifp->if_softc;
1055	struct ifreq	*ifr = (struct ifreq *)data;
1056#if defined(INET) || defined(INET6)
1057	struct ifaddr	*ifa = (struct ifaddr *)data;
1058#endif
1059	bool		avoid_reset = FALSE;
1060	int		error = 0;
1061
1062	if (adapter->in_detach)
1063		return (error);
1064
1065	switch (command) {
1066	case SIOCSIFADDR:
1067#ifdef INET
1068		if (ifa->ifa_addr->sa_family == AF_INET)
1069			avoid_reset = TRUE;
1070#endif
1071#ifdef INET6
1072		if (ifa->ifa_addr->sa_family == AF_INET6)
1073			avoid_reset = TRUE;
1074#endif
1075		/*
1076		** Calling init results in link renegotiation,
1077		** so we avoid doing it when possible.
1078		*/
1079		if (avoid_reset) {
1080			ifp->if_flags |= IFF_UP;
1081			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1082				igb_init(adapter);
1083#ifdef INET
1084			if (!(ifp->if_flags & IFF_NOARP))
1085				arp_ifinit(ifp, ifa);
1086#endif
1087		} else
1088			error = ether_ioctl(ifp, command, data);
1089		break;
1090	case SIOCSIFMTU:
1091	    {
1092		int max_frame_size;
1093
1094		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1095
1096		IGB_CORE_LOCK(adapter);
1097		max_frame_size = 9234;
1098		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1099		    ETHER_CRC_LEN) {
1100			IGB_CORE_UNLOCK(adapter);
1101			error = EINVAL;
1102			break;
1103		}
1104
1105		ifp->if_mtu = ifr->ifr_mtu;
1106		adapter->max_frame_size =
1107		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1108		igb_init_locked(adapter);
1109		IGB_CORE_UNLOCK(adapter);
1110		break;
1111	    }
1112	case SIOCSIFFLAGS:
1113		IOCTL_DEBUGOUT("ioctl rcv'd:\
1114		    SIOCSIFFLAGS (Set Interface Flags)");
1115		IGB_CORE_LOCK(adapter);
1116		if (ifp->if_flags & IFF_UP) {
1117			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1118				if ((ifp->if_flags ^ adapter->if_flags) &
1119				    (IFF_PROMISC | IFF_ALLMULTI)) {
1120					igb_disable_promisc(adapter);
1121					igb_set_promisc(adapter);
1122				}
1123			} else
1124				igb_init_locked(adapter);
1125		} else
1126			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1127				igb_stop(adapter);
1128		adapter->if_flags = ifp->if_flags;
1129		IGB_CORE_UNLOCK(adapter);
1130		break;
1131	case SIOCADDMULTI:
1132	case SIOCDELMULTI:
1133		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1134		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1135			IGB_CORE_LOCK(adapter);
1136			igb_disable_intr(adapter);
1137			igb_set_multi(adapter);
1138#ifdef DEVICE_POLLING
1139			if (!(ifp->if_capenable & IFCAP_POLLING))
1140#endif
1141				igb_enable_intr(adapter);
1142			IGB_CORE_UNLOCK(adapter);
1143		}
1144		break;
1145	case SIOCSIFMEDIA:
1146		/* Check SOL/IDER usage */
1147		IGB_CORE_LOCK(adapter);
1148		if (e1000_check_reset_block(&adapter->hw)) {
1149			IGB_CORE_UNLOCK(adapter);
1150			device_printf(adapter->dev, "Media change is"
1151			    " blocked due to SOL/IDER session.\n");
1152			break;
1153		}
1154		IGB_CORE_UNLOCK(adapter);
1155	case SIOCGIFMEDIA:
1156		IOCTL_DEBUGOUT("ioctl rcv'd: \
1157		    SIOCxIFMEDIA (Get/Set Interface Media)");
1158		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1159		break;
1160	case SIOCSIFCAP:
1161	    {
1162		int mask, reinit;
1163
1164		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1165		reinit = 0;
1166		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1167#ifdef DEVICE_POLLING
1168		if (mask & IFCAP_POLLING) {
1169			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1170				error = ether_poll_register(igb_poll, ifp);
1171				if (error)
1172					return (error);
1173				IGB_CORE_LOCK(adapter);
1174				igb_disable_intr(adapter);
1175				ifp->if_capenable |= IFCAP_POLLING;
1176				IGB_CORE_UNLOCK(adapter);
1177			} else {
1178				error = ether_poll_deregister(ifp);
1179				/* Enable interrupt even in error case */
1180				IGB_CORE_LOCK(adapter);
1181				igb_enable_intr(adapter);
1182				ifp->if_capenable &= ~IFCAP_POLLING;
1183				IGB_CORE_UNLOCK(adapter);
1184			}
1185		}
1186#endif
1187		if (mask & IFCAP_HWCSUM) {
1188			ifp->if_capenable ^= IFCAP_HWCSUM;
1189			reinit = 1;
1190		}
1191		if (mask & IFCAP_TSO4) {
1192			ifp->if_capenable ^= IFCAP_TSO4;
1193			reinit = 1;
1194		}
1195		if (mask & IFCAP_VLAN_HWTAGGING) {
1196			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1197			reinit = 1;
1198		}
1199		if (mask & IFCAP_VLAN_HWFILTER) {
1200			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1201			reinit = 1;
1202		}
1203		if (mask & IFCAP_VLAN_HWTSO) {
1204			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1205			reinit = 1;
1206		}
1207		if (mask & IFCAP_LRO) {
1208			ifp->if_capenable ^= IFCAP_LRO;
1209			reinit = 1;
1210		}
1211		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1212			igb_init(adapter);
1213		VLAN_CAPABILITIES(ifp);
1214		break;
1215	    }
1216
1217	default:
1218		error = ether_ioctl(ifp, command, data);
1219		break;
1220	}
1221
1222	return (error);
1223}
1224
1225
1226/*********************************************************************
1227 *  Init entry point
1228 *
1229 *  This routine is used in two ways. It is used by the stack as
1230 *  init entry point in network interface structure. It is also used
1231 *  by the driver as a hw/sw initialization routine to get to a
1232 *  consistent state.
1233 *
1234 *  return 0 on success, positive on failure
1235 **********************************************************************/
1236
1237static void
1238igb_init_locked(struct adapter *adapter)
1239{
1240	struct ifnet	*ifp = adapter->ifp;
1241	device_t	dev = adapter->dev;
1242
1243	INIT_DEBUGOUT("igb_init: begin");
1244
1245	IGB_CORE_LOCK_ASSERT(adapter);
1246
1247	igb_disable_intr(adapter);
1248	callout_stop(&adapter->timer);
1249
1250	/* Get the latest mac address, User can use a LAA */
1251        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1252              ETHER_ADDR_LEN);
1253
1254	/* Put the address into the Receive Address Array */
1255	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1256
1257	igb_reset(adapter);
1258	igb_update_link_status(adapter);
1259
1260	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1261
1262	/* Set hardware offload abilities */
1263	ifp->if_hwassist = 0;
1264	if (ifp->if_capenable & IFCAP_TXCSUM) {
1265		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1266#if __FreeBSD_version >= 800000
1267		if (adapter->hw.mac.type == e1000_82576)
1268			ifp->if_hwassist |= CSUM_SCTP;
1269#endif
1270	}
1271
1272	if (ifp->if_capenable & IFCAP_TSO4)
1273		ifp->if_hwassist |= CSUM_TSO;
1274
1275	/* Configure for OS presence */
1276	igb_init_manageability(adapter);
1277
1278	/* Prepare transmit descriptors and buffers */
1279	igb_setup_transmit_structures(adapter);
1280	igb_initialize_transmit_units(adapter);
1281
1282	/* Setup Multicast table */
1283	igb_set_multi(adapter);
1284
1285	/*
1286	** Figure out the desired mbuf pool
1287	** for doing jumbo/packetsplit
1288	*/
1289	if (adapter->max_frame_size <= 2048)
1290		adapter->rx_mbuf_sz = MCLBYTES;
1291	else if (adapter->max_frame_size <= 4096)
1292		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1293	else
1294		adapter->rx_mbuf_sz = MJUM9BYTES;
1295
1296	/* Prepare receive descriptors and buffers */
1297	if (igb_setup_receive_structures(adapter)) {
1298		device_printf(dev, "Could not setup receive structures\n");
1299		return;
1300	}
1301	igb_initialize_receive_units(adapter);
1302
1303        /* Enable VLAN support */
1304	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1305		igb_setup_vlan_hw_support(adapter);
1306
1307	/* Don't lose promiscuous settings */
1308	igb_set_promisc(adapter);
1309
1310	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1311	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1312
1313	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1314	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1315
1316	if (adapter->msix > 1) /* Set up queue routing */
1317		igb_configure_queues(adapter);
1318
1319	/* this clears any pending interrupts */
1320	E1000_READ_REG(&adapter->hw, E1000_ICR);
1321#ifdef DEVICE_POLLING
1322	/*
1323	 * Only enable interrupts if we are not polling, make sure
1324	 * they are off otherwise.
1325	 */
1326	if (ifp->if_capenable & IFCAP_POLLING)
1327		igb_disable_intr(adapter);
1328	else
1329#endif /* DEVICE_POLLING */
1330	{
1331		igb_enable_intr(adapter);
1332		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1333	}
1334
1335	/* Set Energy Efficient Ethernet */
1336
1337	e1000_set_eee_i350(&adapter->hw);
1338}
1339
1340static void
1341igb_init(void *arg)
1342{
1343	struct adapter *adapter = arg;
1344
1345	IGB_CORE_LOCK(adapter);
1346	igb_init_locked(adapter);
1347	IGB_CORE_UNLOCK(adapter);
1348}
1349
1350
1351static void
1352igb_handle_que(void *context, int pending)
1353{
1354	struct igb_queue *que = context;
1355	struct adapter *adapter = que->adapter;
1356	struct tx_ring *txr = que->txr;
1357	struct ifnet	*ifp = adapter->ifp;
1358
1359	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1360		bool	more;
1361
1362		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1363
1364		IGB_TX_LOCK(txr);
1365		igb_txeof(txr);
1366#if __FreeBSD_version >= 800000
1367		/* Process the stack queue only if not depleted */
1368		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1369		    !drbr_empty(ifp, txr->br))
1370			igb_mq_start_locked(ifp, txr, NULL);
1371#else
1372		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1373			igb_start_locked(txr, ifp);
1374#endif
1375		IGB_TX_UNLOCK(txr);
1376		/* Do we need another? */
1377		if (more) {
1378			taskqueue_enqueue(que->tq, &que->que_task);
1379			return;
1380		}
1381	}
1382
1383#ifdef DEVICE_POLLING
1384	if (ifp->if_capenable & IFCAP_POLLING)
1385		return;
1386#endif
1387	/* Reenable this interrupt */
1388	if (que->eims)
1389		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1390	else
1391		igb_enable_intr(adapter);
1392}
1393
1394/* Deal with link in a sleepable context */
1395static void
1396igb_handle_link(void *context, int pending)
1397{
1398	struct adapter *adapter = context;
1399
1400	IGB_CORE_LOCK(adapter);
1401	igb_handle_link_locked(adapter);
1402	IGB_CORE_UNLOCK(adapter);
1403}
1404
1405static void
1406igb_handle_link_locked(struct adapter *adapter)
1407{
1408	struct tx_ring	*txr = adapter->tx_rings;
1409	struct ifnet *ifp = adapter->ifp;
1410
1411	IGB_CORE_LOCK_ASSERT(adapter);
1412	adapter->hw.mac.get_link_status = 1;
1413	igb_update_link_status(adapter);
1414	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1415		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1416			IGB_TX_LOCK(txr);
1417#if __FreeBSD_version >= 800000
1418			/* Process the stack queue only if not depleted */
1419			if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1420			    !drbr_empty(ifp, txr->br))
1421				igb_mq_start_locked(ifp, txr, NULL);
1422#else
1423			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1424				igb_start_locked(txr, ifp);
1425#endif
1426			IGB_TX_UNLOCK(txr);
1427		}
1428	}
1429}
1430
1431/*********************************************************************
1432 *
1433 *  MSI/Legacy Deferred
1434 *  Interrupt Service routine
1435 *
1436 *********************************************************************/
1437static int
1438igb_irq_fast(void *arg)
1439{
1440	struct adapter		*adapter = arg;
1441	struct igb_queue	*que = adapter->queues;
1442	u32			reg_icr;
1443
1444
1445	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1446
1447	/* Hot eject?  */
1448	if (reg_icr == 0xffffffff)
1449		return FILTER_STRAY;
1450
1451	/* Definitely not our interrupt.  */
1452	if (reg_icr == 0x0)
1453		return FILTER_STRAY;
1454
1455	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1456		return FILTER_STRAY;
1457
1458	/*
1459	 * Mask interrupts until the taskqueue is finished running.  This is
1460	 * cheap, just assume that it is needed.  This also works around the
1461	 * MSI message reordering errata on certain systems.
1462	 */
1463	igb_disable_intr(adapter);
1464	taskqueue_enqueue(que->tq, &que->que_task);
1465
1466	/* Link status change */
1467	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1468		taskqueue_enqueue(que->tq, &adapter->link_task);
1469
1470	if (reg_icr & E1000_ICR_RXO)
1471		adapter->rx_overruns++;
1472	return FILTER_HANDLED;
1473}
1474
1475#ifdef DEVICE_POLLING
1476/*********************************************************************
1477 *
1478 *  Legacy polling routine : if using this code you MUST be sure that
1479 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1480 *
1481 *********************************************************************/
1482#if __FreeBSD_version >= 800000
1483#define POLL_RETURN_COUNT(a) (a)
1484static int
1485#else
1486#define POLL_RETURN_COUNT(a)
1487static void
1488#endif
1489igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1490{
1491	struct adapter		*adapter = ifp->if_softc;
1492	struct igb_queue	*que = adapter->queues;
1493	struct tx_ring		*txr = adapter->tx_rings;
1494	u32			reg_icr, rx_done = 0;
1495	u32			loop = IGB_MAX_LOOP;
1496	bool			more;
1497
1498	IGB_CORE_LOCK(adapter);
1499	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1500		IGB_CORE_UNLOCK(adapter);
1501		return POLL_RETURN_COUNT(rx_done);
1502	}
1503
1504	if (cmd == POLL_AND_CHECK_STATUS) {
1505		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1506		/* Link status change */
1507		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1508			igb_handle_link_locked(adapter);
1509
1510		if (reg_icr & E1000_ICR_RXO)
1511			adapter->rx_overruns++;
1512	}
1513	IGB_CORE_UNLOCK(adapter);
1514
1515	igb_rxeof(que, count, &rx_done);
1516
1517	IGB_TX_LOCK(txr);
1518	do {
1519		more = igb_txeof(txr);
1520	} while (loop-- && more);
1521#if __FreeBSD_version >= 800000
1522	if (!drbr_empty(ifp, txr->br))
1523		igb_mq_start_locked(ifp, txr, NULL);
1524#else
1525	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1526		igb_start_locked(txr, ifp);
1527#endif
1528	IGB_TX_UNLOCK(txr);
1529	return POLL_RETURN_COUNT(rx_done);
1530}
1531#endif /* DEVICE_POLLING */
1532
1533/*********************************************************************
1534 *
1535 *  MSIX Que Interrupt Service routine
1536 *
1537 **********************************************************************/
1538static void
1539igb_msix_que(void *arg)
1540{
1541	struct igb_queue *que = arg;
1542	struct adapter *adapter = que->adapter;
1543	struct ifnet   *ifp = adapter->ifp;
1544	struct tx_ring *txr = que->txr;
1545	struct rx_ring *rxr = que->rxr;
1546	u32		newitr = 0;
1547	bool		more_rx;
1548
1549	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1550	++que->irqs;
1551
1552	IGB_TX_LOCK(txr);
1553	igb_txeof(txr);
1554#if __FreeBSD_version >= 800000
1555	/* Process the stack queue only if not depleted */
1556	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1557	    !drbr_empty(ifp, txr->br))
1558		igb_mq_start_locked(ifp, txr, NULL);
1559#else
1560	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1561		igb_start_locked(txr, ifp);
1562#endif
1563	IGB_TX_UNLOCK(txr);
1564
1565	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1566
1567	if (adapter->enable_aim == FALSE)
1568		goto no_calc;
1569	/*
1570	** Do Adaptive Interrupt Moderation:
1571        **  - Write out last calculated setting
1572	**  - Calculate based on average size over
1573	**    the last interval.
1574	*/
1575        if (que->eitr_setting)
1576                E1000_WRITE_REG(&adapter->hw,
1577                    E1000_EITR(que->msix), que->eitr_setting);
1578
1579        que->eitr_setting = 0;
1580
1581        /* Idle, do nothing */
1582        if ((txr->bytes == 0) && (rxr->bytes == 0))
1583                goto no_calc;
1584
1585        /* Used half Default if sub-gig */
1586        if (adapter->link_speed != 1000)
1587                newitr = IGB_DEFAULT_ITR / 2;
1588        else {
1589		if ((txr->bytes) && (txr->packets))
1590                	newitr = txr->bytes/txr->packets;
1591		if ((rxr->bytes) && (rxr->packets))
1592			newitr = max(newitr,
1593			    (rxr->bytes / rxr->packets));
1594                newitr += 24; /* account for hardware frame, crc */
1595		/* set an upper boundary */
1596		newitr = min(newitr, 3000);
1597		/* Be nice to the mid range */
1598                if ((newitr > 300) && (newitr < 1200))
1599                        newitr = (newitr / 3);
1600                else
1601                        newitr = (newitr / 2);
1602        }
1603        newitr &= 0x7FFC;  /* Mask invalid bits */
1604        if (adapter->hw.mac.type == e1000_82575)
1605                newitr |= newitr << 16;
1606        else
1607                newitr |= E1000_EITR_CNT_IGNR;
1608
1609        /* save for next interrupt */
1610        que->eitr_setting = newitr;
1611
1612        /* Reset state */
1613        txr->bytes = 0;
1614        txr->packets = 0;
1615        rxr->bytes = 0;
1616        rxr->packets = 0;
1617
1618no_calc:
1619	/* Schedule a clean task if needed*/
1620	if (more_rx)
1621		taskqueue_enqueue(que->tq, &que->que_task);
1622	else
1623		/* Reenable this interrupt */
1624		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1625	return;
1626}
1627
1628
1629/*********************************************************************
1630 *
1631 *  MSIX Link Interrupt Service routine
1632 *
1633 **********************************************************************/
1634
1635static void
1636igb_msix_link(void *arg)
1637{
1638	struct adapter	*adapter = arg;
1639	u32       	icr;
1640
1641	++adapter->link_irq;
1642	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1643	if (!(icr & E1000_ICR_LSC))
1644		goto spurious;
1645	igb_handle_link(adapter, 0);
1646
1647spurious:
1648	/* Rearm */
1649	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1650	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1651	return;
1652}
1653
1654
1655/*********************************************************************
1656 *
1657 *  Media Ioctl callback
1658 *
1659 *  This routine is called whenever the user queries the status of
1660 *  the interface using ifconfig.
1661 *
1662 **********************************************************************/
1663static void
1664igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1665{
1666	struct adapter *adapter = ifp->if_softc;
1667	u_char fiber_type = IFM_1000_SX;
1668
1669	INIT_DEBUGOUT("igb_media_status: begin");
1670
1671	IGB_CORE_LOCK(adapter);
1672	igb_update_link_status(adapter);
1673
1674	ifmr->ifm_status = IFM_AVALID;
1675	ifmr->ifm_active = IFM_ETHER;
1676
1677	if (!adapter->link_active) {
1678		IGB_CORE_UNLOCK(adapter);
1679		return;
1680	}
1681
1682	ifmr->ifm_status |= IFM_ACTIVE;
1683
1684	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1685	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1686		ifmr->ifm_active |= fiber_type | IFM_FDX;
1687	else {
1688		switch (adapter->link_speed) {
1689		case 10:
1690			ifmr->ifm_active |= IFM_10_T;
1691			break;
1692		case 100:
1693			ifmr->ifm_active |= IFM_100_TX;
1694			break;
1695		case 1000:
1696			ifmr->ifm_active |= IFM_1000_T;
1697			break;
1698		}
1699		if (adapter->link_duplex == FULL_DUPLEX)
1700			ifmr->ifm_active |= IFM_FDX;
1701		else
1702			ifmr->ifm_active |= IFM_HDX;
1703	}
1704	IGB_CORE_UNLOCK(adapter);
1705}
1706
1707/*********************************************************************
1708 *
1709 *  Media Ioctl callback
1710 *
1711 *  This routine is called when the user changes speed/duplex using
1712 *  media/mediopt option with ifconfig.
1713 *
1714 **********************************************************************/
1715static int
1716igb_media_change(struct ifnet *ifp)
1717{
1718	struct adapter *adapter = ifp->if_softc;
1719	struct ifmedia  *ifm = &adapter->media;
1720
1721	INIT_DEBUGOUT("igb_media_change: begin");
1722
1723	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1724		return (EINVAL);
1725
1726	IGB_CORE_LOCK(adapter);
1727	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1728	case IFM_AUTO:
1729		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1730		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1731		break;
1732	case IFM_1000_LX:
1733	case IFM_1000_SX:
1734	case IFM_1000_T:
1735		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1736		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1737		break;
1738	case IFM_100_TX:
1739		adapter->hw.mac.autoneg = FALSE;
1740		adapter->hw.phy.autoneg_advertised = 0;
1741		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1742			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1743		else
1744			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1745		break;
1746	case IFM_10_T:
1747		adapter->hw.mac.autoneg = FALSE;
1748		adapter->hw.phy.autoneg_advertised = 0;
1749		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1750			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1751		else
1752			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1753		break;
1754	default:
1755		device_printf(adapter->dev, "Unsupported media type\n");
1756	}
1757
1758	igb_init_locked(adapter);
1759	IGB_CORE_UNLOCK(adapter);
1760
1761	return (0);
1762}
1763
1764
1765/*********************************************************************
1766 *
1767 *  This routine maps the mbufs to Advanced TX descriptors.
1768 *
1769 **********************************************************************/
1770static int
1771igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1772{
1773	struct adapter		*adapter = txr->adapter;
1774	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1775	bus_dmamap_t		map;
1776	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1777	union e1000_adv_tx_desc	*txd = NULL;
1778	struct mbuf		*m_head = *m_headp;
1779	struct ether_vlan_header *eh = NULL;
1780	struct ip		*ip = NULL;
1781	struct tcphdr		*th = NULL;
1782	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1783	int			ehdrlen, poff;
1784	int			nsegs, i, first, last = 0;
1785	int			error, do_tso, remap = 1;
1786
1787	/* Set basic descriptor constants */
1788	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1789	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1790	if (m_head->m_flags & M_VLANTAG)
1791		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1792
1793retry:
1794	m_head = *m_headp;
1795	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1796	hdrlen = ehdrlen = poff = 0;
1797
1798	/*
1799	 * Intel recommends entire IP/TCP header length reside in a single
1800	 * buffer. If multiple descriptors are used to describe the IP and
1801	 * TCP header, each descriptor should describe one or more
1802	 * complete headers; descriptors referencing only parts of headers
1803	 * are not supported. If all layer headers are not coalesced into
1804	 * a single buffer, each buffer should not cross a 4KB boundary,
1805	 * or be larger than the maximum read request size.
1806	 * Controller also requires modifing IP/TCP header to make TSO work
1807	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1808	 * IP/TCP header into a single buffer to meet the requirement of
1809	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1810	 * which also has similiar restrictions.
1811	 */
1812	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1813		if (do_tso || (m_head->m_next != NULL &&
1814		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1815			if (M_WRITABLE(*m_headp) == 0) {
1816				m_head = m_dup(*m_headp, M_DONTWAIT);
1817				m_freem(*m_headp);
1818				if (m_head == NULL) {
1819					*m_headp = NULL;
1820					return (ENOBUFS);
1821				}
1822				*m_headp = m_head;
1823			}
1824		}
1825		/*
1826		 * Assume IPv4, we don't have TSO/checksum offload support
1827		 * for IPv6 yet.
1828		 */
1829		ehdrlen = sizeof(struct ether_header);
1830		m_head = m_pullup(m_head, ehdrlen);
1831		if (m_head == NULL) {
1832			*m_headp = NULL;
1833			return (ENOBUFS);
1834		}
1835		eh = mtod(m_head, struct ether_vlan_header *);
1836		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1837			ehdrlen = sizeof(struct ether_vlan_header);
1838			m_head = m_pullup(m_head, ehdrlen);
1839			if (m_head == NULL) {
1840				*m_headp = NULL;
1841				return (ENOBUFS);
1842			}
1843		}
1844		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1845		if (m_head == NULL) {
1846			*m_headp = NULL;
1847			return (ENOBUFS);
1848		}
1849		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1850		poff = ehdrlen + (ip->ip_hl << 2);
1851		if (do_tso) {
1852			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1853			if (m_head == NULL) {
1854				*m_headp = NULL;
1855				return (ENOBUFS);
1856			}
1857			/*
1858			 * The pseudo TCP checksum does not include TCP payload
1859			 * length so driver should recompute the checksum here
1860			 * what hardware expect to see. This is adherence of
1861			 * Microsoft's Large Send specification.
1862			 */
1863			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1864			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1865			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1866			/* Keep track of the full header length */
1867			hdrlen = poff + (th->th_off << 2);
1868		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1869			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1870			if (m_head == NULL) {
1871				*m_headp = NULL;
1872				return (ENOBUFS);
1873			}
1874			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1875			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1876			if (m_head == NULL) {
1877				*m_headp = NULL;
1878				return (ENOBUFS);
1879			}
1880			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1881			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1882		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1883			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1884			if (m_head == NULL) {
1885				*m_headp = NULL;
1886				return (ENOBUFS);
1887			}
1888			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1889		}
1890		*m_headp = m_head;
1891	}
1892
1893	/*
1894	 * Map the packet for DMA
1895	 *
1896	 * Capture the first descriptor index,
1897	 * this descriptor will have the index
1898	 * of the EOP which is the only one that
1899	 * now gets a DONE bit writeback.
1900	 */
1901	first = txr->next_avail_desc;
1902	tx_buffer = &txr->tx_buffers[first];
1903	tx_buffer_mapped = tx_buffer;
1904	map = tx_buffer->map;
1905
1906	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1907	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1908
1909	/*
1910	 * There are two types of errors we can (try) to handle:
1911	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1912	 *   out of segments.  Defragment the mbuf chain and try again.
1913	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1914	 *   at this point in time.  Defer sending and try again later.
1915	 * All other errors, in particular EINVAL, are fatal and prevent the
1916	 * mbuf chain from ever going through.  Drop it and report error.
1917	 */
1918	if (error == EFBIG && remap) {
1919		struct mbuf *m;
1920
1921		m = m_defrag(*m_headp, M_DONTWAIT);
1922		if (m == NULL) {
1923			adapter->mbuf_defrag_failed++;
1924			m_freem(*m_headp);
1925			*m_headp = NULL;
1926			return (ENOBUFS);
1927		}
1928		*m_headp = m;
1929
1930		/* Try it again, but only once */
1931		remap = 0;
1932		goto retry;
1933	} else if (error == ENOMEM) {
1934		adapter->no_tx_dma_setup++;
1935		return (error);
1936	} else if (error != 0) {
1937		adapter->no_tx_dma_setup++;
1938		m_freem(*m_headp);
1939		*m_headp = NULL;
1940		return (error);
1941	}
1942
1943	/*
1944	** Make sure we don't overrun the ring,
1945	** we need nsegs descriptors and one for
1946	** the context descriptor used for the
1947	** offloads.
1948	*/
1949        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1950                txr->no_desc_avail++;
1951		bus_dmamap_unload(txr->txtag, map);
1952		return (ENOBUFS);
1953        }
1954	m_head = *m_headp;
1955
1956	/* Do hardware assists:
1957         * Set up the context descriptor, used
1958         * when any hardware offload is done.
1959         * This includes CSUM, VLAN, and TSO.
1960         * It will use the first descriptor.
1961         */
1962
1963	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1964		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1965			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1966			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1967			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1968		} else
1969			return (ENXIO);
1970	} else if (igb_tx_ctx_setup(txr, m_head))
1971			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1972
1973	/* Calculate payload length */
1974	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1975	    << E1000_ADVTXD_PAYLEN_SHIFT);
1976
1977	/* 82575 needs the queue index added */
1978	if (adapter->hw.mac.type == e1000_82575)
1979		olinfo_status |= txr->me << 4;
1980
1981	/* Set up our transmit descriptors */
1982	i = txr->next_avail_desc;
1983	for (int j = 0; j < nsegs; j++) {
1984		bus_size_t seg_len;
1985		bus_addr_t seg_addr;
1986
1987		tx_buffer = &txr->tx_buffers[i];
1988		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1989		seg_addr = segs[j].ds_addr;
1990		seg_len  = segs[j].ds_len;
1991
1992		txd->read.buffer_addr = htole64(seg_addr);
1993		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1994		txd->read.olinfo_status = htole32(olinfo_status);
1995		last = i;
1996		if (++i == adapter->num_tx_desc)
1997			i = 0;
1998		tx_buffer->m_head = NULL;
1999		tx_buffer->next_eop = -1;
2000	}
2001
2002	txr->next_avail_desc = i;
2003	txr->tx_avail -= nsegs;
2004        tx_buffer->m_head = m_head;
2005
2006	/*
2007	** Here we swap the map so the last descriptor,
2008	** which gets the completion interrupt has the
2009	** real map, and the first descriptor gets the
2010	** unused map from this descriptor.
2011	*/
2012	tx_buffer_mapped->map = tx_buffer->map;
2013	tx_buffer->map = map;
2014        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
2015
2016        /*
2017         * Last Descriptor of Packet
2018	 * needs End Of Packet (EOP)
2019	 * and Report Status (RS)
2020         */
2021        txd->read.cmd_type_len |=
2022	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
2023	/*
2024	 * Keep track in the first buffer which
2025	 * descriptor will be written back
2026	 */
2027	tx_buffer = &txr->tx_buffers[first];
2028	tx_buffer->next_eop = last;
2029	/* Update the watchdog time early and often */
2030	txr->watchdog_time = ticks;
2031
2032	/*
2033	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
2034	 * that this frame is available to transmit.
2035	 */
2036	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2037	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2038	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
2039	++txr->tx_packets;
2040
2041	return (0);
2042}
2043static void
2044igb_set_promisc(struct adapter *adapter)
2045{
2046	struct ifnet	*ifp = adapter->ifp;
2047	struct e1000_hw *hw = &adapter->hw;
2048	u32		reg;
2049
2050	if (adapter->vf_ifp) {
2051		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
2052		return;
2053	}
2054
2055	reg = E1000_READ_REG(hw, E1000_RCTL);
2056	if (ifp->if_flags & IFF_PROMISC) {
2057		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2058		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2059	} else if (ifp->if_flags & IFF_ALLMULTI) {
2060		reg |= E1000_RCTL_MPE;
2061		reg &= ~E1000_RCTL_UPE;
2062		E1000_WRITE_REG(hw, E1000_RCTL, reg);
2063	}
2064}
2065
2066static void
2067igb_disable_promisc(struct adapter *adapter)
2068{
2069	struct e1000_hw *hw = &adapter->hw;
2070	u32		reg;
2071
2072	if (adapter->vf_ifp) {
2073		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
2074		return;
2075	}
2076	reg = E1000_READ_REG(hw, E1000_RCTL);
2077	reg &=  (~E1000_RCTL_UPE);
2078	reg &=  (~E1000_RCTL_MPE);
2079	E1000_WRITE_REG(hw, E1000_RCTL, reg);
2080}
2081
2082
2083/*********************************************************************
2084 *  Multicast Update
2085 *
2086 *  This routine is called whenever multicast address list is updated.
2087 *
2088 **********************************************************************/
2089
2090static void
2091igb_set_multi(struct adapter *adapter)
2092{
2093	struct ifnet	*ifp = adapter->ifp;
2094	struct ifmultiaddr *ifma;
2095	u32 reg_rctl = 0;
2096	u8  *mta;
2097
2098	int mcnt = 0;
2099
2100	IOCTL_DEBUGOUT("igb_set_multi: begin");
2101
2102	mta = adapter->mta;
2103	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2104	    MAX_NUM_MULTICAST_ADDRESSES);
2105
2106#if __FreeBSD_version < 800000
2107	IF_ADDR_LOCK(ifp);
2108#else
2109	if_maddr_rlock(ifp);
2110#endif
2111	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2112		if (ifma->ifma_addr->sa_family != AF_LINK)
2113			continue;
2114
2115		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2116			break;
2117
2118		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2119		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2120		mcnt++;
2121	}
2122#if __FreeBSD_version < 800000
2123	IF_ADDR_UNLOCK(ifp);
2124#else
2125	if_maddr_runlock(ifp);
2126#endif
2127
2128	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2129		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2130		reg_rctl |= E1000_RCTL_MPE;
2131		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2132	} else
2133		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2134}
2135
2136
2137/*********************************************************************
2138 *  Timer routine:
2139 *  	This routine checks for link status,
2140 *	updates statistics, and does the watchdog.
2141 *
2142 **********************************************************************/
2143
2144static void
2145igb_local_timer(void *arg)
2146{
2147	struct adapter		*adapter = arg;
2148	device_t		dev = adapter->dev;
2149	struct ifnet		*ifp = adapter->ifp;
2150	struct tx_ring		*txr = adapter->tx_rings;
2151	struct igb_queue	*que = adapter->queues;
2152	int			hung = 0, busy = 0;
2153
2154
2155	IGB_CORE_LOCK_ASSERT(adapter);
2156
2157	igb_update_link_status(adapter);
2158	igb_update_stats_counters(adapter);
2159
2160        /*
2161        ** Check the TX queues status
2162	**	- central locked handling of OACTIVE
2163	**	- watchdog only if all queues show hung
2164        */
2165	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2166		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2167		    (adapter->pause_frames == 0))
2168			++hung;
2169		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2170			++busy;
2171		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2172			taskqueue_enqueue(que->tq, &que->que_task);
2173	}
2174	if (hung == adapter->num_queues)
2175		goto timeout;
2176	if (busy == adapter->num_queues)
2177		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2178	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2179	    (busy < adapter->num_queues))
2180		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2181
2182	adapter->pause_frames = 0;
2183	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2184#ifndef DEVICE_POLLING
2185	/* Schedule all queue interrupts - deadlock protection */
2186	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2187#endif
2188	return;
2189
2190timeout:
2191	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2192	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2193            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2194            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2195	device_printf(dev,"TX(%d) desc avail = %d,"
2196            "Next TX to Clean = %d\n",
2197            txr->me, txr->tx_avail, txr->next_to_clean);
2198	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2199	adapter->watchdog_events++;
2200	igb_init_locked(adapter);
2201}
2202
2203static void
2204igb_update_link_status(struct adapter *adapter)
2205{
2206	struct e1000_hw *hw = &adapter->hw;
2207	struct ifnet *ifp = adapter->ifp;
2208	device_t dev = adapter->dev;
2209	struct tx_ring *txr = adapter->tx_rings;
2210	u32 link_check, thstat, ctrl;
2211
2212	link_check = thstat = ctrl = 0;
2213
2214	/* Get the cached link value or read for real */
2215        switch (hw->phy.media_type) {
2216        case e1000_media_type_copper:
2217                if (hw->mac.get_link_status) {
2218			/* Do the work to read phy */
2219                        e1000_check_for_link(hw);
2220                        link_check = !hw->mac.get_link_status;
2221                } else
2222                        link_check = TRUE;
2223                break;
2224        case e1000_media_type_fiber:
2225                e1000_check_for_link(hw);
2226                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2227                                 E1000_STATUS_LU);
2228                break;
2229        case e1000_media_type_internal_serdes:
2230                e1000_check_for_link(hw);
2231                link_check = adapter->hw.mac.serdes_has_link;
2232                break;
2233	/* VF device is type_unknown */
2234        case e1000_media_type_unknown:
2235                e1000_check_for_link(hw);
2236		link_check = !hw->mac.get_link_status;
2237		/* Fall thru */
2238        default:
2239                break;
2240        }
2241
2242	/* Check for thermal downshift or shutdown */
2243	if (hw->mac.type == e1000_i350) {
2244		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2245		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2246	}
2247
2248	/* Now we check if a transition has happened */
2249	if (link_check && (adapter->link_active == 0)) {
2250		e1000_get_speed_and_duplex(&adapter->hw,
2251		    &adapter->link_speed, &adapter->link_duplex);
2252		if (bootverbose)
2253			device_printf(dev, "Link is up %d Mbps %s\n",
2254			    adapter->link_speed,
2255			    ((adapter->link_duplex == FULL_DUPLEX) ?
2256			    "Full Duplex" : "Half Duplex"));
2257		adapter->link_active = 1;
2258		ifp->if_baudrate = adapter->link_speed * 1000000;
2259		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2260		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2261			device_printf(dev, "Link: thermal downshift\n");
2262		/* This can sleep */
2263		if_link_state_change(ifp, LINK_STATE_UP);
2264	} else if (!link_check && (adapter->link_active == 1)) {
2265		ifp->if_baudrate = adapter->link_speed = 0;
2266		adapter->link_duplex = 0;
2267		if (bootverbose)
2268			device_printf(dev, "Link is Down\n");
2269		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2270		    (thstat & E1000_THSTAT_PWR_DOWN))
2271			device_printf(dev, "Link: thermal shutdown\n");
2272		adapter->link_active = 0;
2273		/* This can sleep */
2274		if_link_state_change(ifp, LINK_STATE_DOWN);
2275		/* Reset queue state */
2276		for (int i = 0; i < adapter->num_queues; i++, txr++)
2277			txr->queue_status = IGB_QUEUE_IDLE;
2278	}
2279}
2280
2281/*********************************************************************
2282 *
2283 *  This routine disables all traffic on the adapter by issuing a
2284 *  global reset on the MAC and deallocates TX/RX buffers.
2285 *
2286 **********************************************************************/
2287
2288static void
2289igb_stop(void *arg)
2290{
2291	struct adapter	*adapter = arg;
2292	struct ifnet	*ifp = adapter->ifp;
2293	struct tx_ring *txr = adapter->tx_rings;
2294
2295	IGB_CORE_LOCK_ASSERT(adapter);
2296
2297	INIT_DEBUGOUT("igb_stop: begin");
2298
2299	igb_disable_intr(adapter);
2300
2301	callout_stop(&adapter->timer);
2302
2303	/* Tell the stack that the interface is no longer active */
2304	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2305	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2306
2307	/* Disarm watchdog timer. */
2308	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2309		IGB_TX_LOCK(txr);
2310		txr->queue_status = IGB_QUEUE_IDLE;
2311		IGB_TX_UNLOCK(txr);
2312	}
2313
2314	e1000_reset_hw(&adapter->hw);
2315	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2316
2317	e1000_led_off(&adapter->hw);
2318	e1000_cleanup_led(&adapter->hw);
2319}
2320
2321
2322/*********************************************************************
2323 *
2324 *  Determine hardware revision.
2325 *
2326 **********************************************************************/
2327static void
2328igb_identify_hardware(struct adapter *adapter)
2329{
2330	device_t dev = adapter->dev;
2331
2332	/* Make sure our PCI config space has the necessary stuff set */
2333	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2334	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2335	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2336		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2337		    "bits were not set!\n");
2338		adapter->hw.bus.pci_cmd_word |=
2339		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2340		pci_write_config(dev, PCIR_COMMAND,
2341		    adapter->hw.bus.pci_cmd_word, 2);
2342	}
2343
2344	/* Save off the information about this board */
2345	adapter->hw.vendor_id = pci_get_vendor(dev);
2346	adapter->hw.device_id = pci_get_device(dev);
2347	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2348	adapter->hw.subsystem_vendor_id =
2349	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2350	adapter->hw.subsystem_device_id =
2351	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2352
2353	/* Set MAC type early for PCI setup */
2354	e1000_set_mac_type(&adapter->hw);
2355
2356	/* Are we a VF device? */
2357	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2358	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2359		adapter->vf_ifp = 1;
2360	else
2361		adapter->vf_ifp = 0;
2362}
2363
2364static int
2365igb_allocate_pci_resources(struct adapter *adapter)
2366{
2367	device_t	dev = adapter->dev;
2368	int		rid;
2369
2370	rid = PCIR_BAR(0);
2371	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2372	    &rid, RF_ACTIVE);
2373	if (adapter->pci_mem == NULL) {
2374		device_printf(dev, "Unable to allocate bus resource: memory\n");
2375		return (ENXIO);
2376	}
2377	adapter->osdep.mem_bus_space_tag =
2378	    rman_get_bustag(adapter->pci_mem);
2379	adapter->osdep.mem_bus_space_handle =
2380	    rman_get_bushandle(adapter->pci_mem);
2381	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2382
2383	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2384
2385	/* This will setup either MSI/X or MSI */
2386	adapter->msix = igb_setup_msix(adapter);
2387	adapter->hw.back = &adapter->osdep;
2388
2389	return (0);
2390}
2391
2392/*********************************************************************
2393 *
2394 *  Setup the Legacy or MSI Interrupt handler
2395 *
2396 **********************************************************************/
2397static int
2398igb_allocate_legacy(struct adapter *adapter)
2399{
2400	device_t		dev = adapter->dev;
2401	struct igb_queue	*que = adapter->queues;
2402	struct tx_ring		*txr = adapter->tx_rings;
2403	int			error, rid = 0;
2404
2405	/* Turn off all interrupts */
2406	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2407
2408	/* MSI RID is 1 */
2409	if (adapter->msix == 1)
2410		rid = 1;
2411
2412	/* We allocate a single interrupt resource */
2413	adapter->res = bus_alloc_resource_any(dev,
2414	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2415	if (adapter->res == NULL) {
2416		device_printf(dev, "Unable to allocate bus resource: "
2417		    "interrupt\n");
2418		return (ENXIO);
2419	}
2420
2421#if __FreeBSD_version >= 800000
2422	TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2423#endif
2424
2425	/*
2426	 * Try allocating a fast interrupt and the associated deferred
2427	 * processing contexts.
2428	 */
2429	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2430	/* Make tasklet for deferred link handling */
2431	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2432	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2433	    taskqueue_thread_enqueue, &que->tq);
2434	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2435	    device_get_nameunit(adapter->dev));
2436	if ((error = bus_setup_intr(dev, adapter->res,
2437	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2438	    adapter, &adapter->tag)) != 0) {
2439		device_printf(dev, "Failed to register fast interrupt "
2440			    "handler: %d\n", error);
2441		taskqueue_free(que->tq);
2442		que->tq = NULL;
2443		return (error);
2444	}
2445
2446	return (0);
2447}
2448
2449
2450/*********************************************************************
2451 *
2452 *  Setup the MSIX Queue Interrupt handlers:
2453 *
2454 **********************************************************************/
2455static int
2456igb_allocate_msix(struct adapter *adapter)
2457{
2458	device_t		dev = adapter->dev;
2459	struct igb_queue	*que = adapter->queues;
2460	int			error, rid, vector = 0;
2461
2462	/* Be sure to start with all interrupts disabled */
2463	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2464	E1000_WRITE_FLUSH(&adapter->hw);
2465
2466	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2467		rid = vector +1;
2468		que->res = bus_alloc_resource_any(dev,
2469		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2470		if (que->res == NULL) {
2471			device_printf(dev,
2472			    "Unable to allocate bus resource: "
2473			    "MSIX Queue Interrupt\n");
2474			return (ENXIO);
2475		}
2476		error = bus_setup_intr(dev, que->res,
2477	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2478		    igb_msix_que, que, &que->tag);
2479		if (error) {
2480			que->res = NULL;
2481			device_printf(dev, "Failed to register Queue handler");
2482			return (error);
2483		}
2484#if __FreeBSD_version >= 800504
2485		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2486#endif
2487		que->msix = vector;
2488		if (adapter->hw.mac.type == e1000_82575)
2489			que->eims = E1000_EICR_TX_QUEUE0 << i;
2490		else
2491			que->eims = 1 << vector;
2492		/*
2493		** Bind the msix vector, and thus the
2494		** rings to the corresponding cpu.
2495		*/
2496		if (adapter->num_queues > 1)
2497			bus_bind_intr(dev, que->res, i);
2498#if __FreeBSD_version >= 800000
2499		TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2500		    que->txr);
2501#endif
2502		/* Make tasklet for deferred handling */
2503		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2504		que->tq = taskqueue_create("igb_que", M_NOWAIT,
2505		    taskqueue_thread_enqueue, &que->tq);
2506		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2507		    device_get_nameunit(adapter->dev));
2508	}
2509
2510	/* And Link */
2511	rid = vector + 1;
2512	adapter->res = bus_alloc_resource_any(dev,
2513	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2514	if (adapter->res == NULL) {
2515		device_printf(dev,
2516		    "Unable to allocate bus resource: "
2517		    "MSIX Link Interrupt\n");
2518		return (ENXIO);
2519	}
2520	if ((error = bus_setup_intr(dev, adapter->res,
2521	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2522	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2523		device_printf(dev, "Failed to register Link handler");
2524		return (error);
2525	}
2526#if __FreeBSD_version >= 800504
2527	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2528#endif
2529	adapter->linkvec = vector;
2530
2531	return (0);
2532}
2533
2534
2535static void
2536igb_configure_queues(struct adapter *adapter)
2537{
2538	struct	e1000_hw	*hw = &adapter->hw;
2539	struct	igb_queue	*que;
2540	u32			tmp, ivar = 0, newitr = 0;
2541
2542	/* First turn on RSS capability */
2543	if (adapter->hw.mac.type != e1000_82575)
2544		E1000_WRITE_REG(hw, E1000_GPIE,
2545		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2546		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2547
2548	/* Turn on MSIX */
2549	switch (adapter->hw.mac.type) {
2550	case e1000_82580:
2551	case e1000_i350:
2552	case e1000_vfadapt:
2553	case e1000_vfadapt_i350:
2554		/* RX entries */
2555		for (int i = 0; i < adapter->num_queues; i++) {
2556			u32 index = i >> 1;
2557			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2558			que = &adapter->queues[i];
2559			if (i & 1) {
2560				ivar &= 0xFF00FFFF;
2561				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2562			} else {
2563				ivar &= 0xFFFFFF00;
2564				ivar |= que->msix | E1000_IVAR_VALID;
2565			}
2566			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2567		}
2568		/* TX entries */
2569		for (int i = 0; i < adapter->num_queues; i++) {
2570			u32 index = i >> 1;
2571			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2572			que = &adapter->queues[i];
2573			if (i & 1) {
2574				ivar &= 0x00FFFFFF;
2575				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2576			} else {
2577				ivar &= 0xFFFF00FF;
2578				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2579			}
2580			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2581			adapter->que_mask |= que->eims;
2582		}
2583
2584		/* And for the link interrupt */
2585		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2586		adapter->link_mask = 1 << adapter->linkvec;
2587		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2588		break;
2589	case e1000_82576:
2590		/* RX entries */
2591		for (int i = 0; i < adapter->num_queues; i++) {
2592			u32 index = i & 0x7; /* Each IVAR has two entries */
2593			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2594			que = &adapter->queues[i];
2595			if (i < 8) {
2596				ivar &= 0xFFFFFF00;
2597				ivar |= que->msix | E1000_IVAR_VALID;
2598			} else {
2599				ivar &= 0xFF00FFFF;
2600				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2601			}
2602			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2603			adapter->que_mask |= que->eims;
2604		}
2605		/* TX entries */
2606		for (int i = 0; i < adapter->num_queues; i++) {
2607			u32 index = i & 0x7; /* Each IVAR has two entries */
2608			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2609			que = &adapter->queues[i];
2610			if (i < 8) {
2611				ivar &= 0xFFFF00FF;
2612				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2613			} else {
2614				ivar &= 0x00FFFFFF;
2615				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2616			}
2617			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2618			adapter->que_mask |= que->eims;
2619		}
2620
2621		/* And for the link interrupt */
2622		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2623		adapter->link_mask = 1 << adapter->linkvec;
2624		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2625		break;
2626
2627	case e1000_82575:
2628                /* enable MSI-X support*/
2629		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2630                tmp |= E1000_CTRL_EXT_PBA_CLR;
2631                /* Auto-Mask interrupts upon ICR read. */
2632                tmp |= E1000_CTRL_EXT_EIAME;
2633                tmp |= E1000_CTRL_EXT_IRCA;
2634                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2635
2636		/* Queues */
2637		for (int i = 0; i < adapter->num_queues; i++) {
2638			que = &adapter->queues[i];
2639			tmp = E1000_EICR_RX_QUEUE0 << i;
2640			tmp |= E1000_EICR_TX_QUEUE0 << i;
2641			que->eims = tmp;
2642			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2643			    i, que->eims);
2644			adapter->que_mask |= que->eims;
2645		}
2646
2647		/* Link */
2648		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2649		    E1000_EIMS_OTHER);
2650		adapter->link_mask |= E1000_EIMS_OTHER;
2651	default:
2652		break;
2653	}
2654
2655	/* Set the starting interrupt rate */
2656	if (igb_max_interrupt_rate > 0)
2657		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2658
2659        if (hw->mac.type == e1000_82575)
2660                newitr |= newitr << 16;
2661        else
2662                newitr |= E1000_EITR_CNT_IGNR;
2663
2664	for (int i = 0; i < adapter->num_queues; i++) {
2665		que = &adapter->queues[i];
2666		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2667	}
2668
2669	return;
2670}
2671
2672
2673static void
2674igb_free_pci_resources(struct adapter *adapter)
2675{
2676	struct		igb_queue *que = adapter->queues;
2677	device_t	dev = adapter->dev;
2678	int		rid;
2679
2680	/*
2681	** There is a slight possibility of a failure mode
2682	** in attach that will result in entering this function
2683	** before interrupt resources have been initialized, and
2684	** in that case we do not want to execute the loops below
2685	** We can detect this reliably by the state of the adapter
2686	** res pointer.
2687	*/
2688	if (adapter->res == NULL)
2689		goto mem;
2690
2691	/*
2692	 * First release all the interrupt resources:
2693	 */
2694	for (int i = 0; i < adapter->num_queues; i++, que++) {
2695		rid = que->msix + 1;
2696		if (que->tag != NULL) {
2697			bus_teardown_intr(dev, que->res, que->tag);
2698			que->tag = NULL;
2699		}
2700		if (que->res != NULL)
2701			bus_release_resource(dev,
2702			    SYS_RES_IRQ, rid, que->res);
2703	}
2704
2705	/* Clean the Legacy or Link interrupt last */
2706	if (adapter->linkvec) /* we are doing MSIX */
2707		rid = adapter->linkvec + 1;
2708	else
2709		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2710
2711	que = adapter->queues;
2712	if (adapter->tag != NULL) {
2713		taskqueue_drain(que->tq, &adapter->link_task);
2714		bus_teardown_intr(dev, adapter->res, adapter->tag);
2715		adapter->tag = NULL;
2716	}
2717	if (adapter->res != NULL)
2718		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2719
2720	for (int i = 0; i < adapter->num_queues; i++, que++) {
2721		if (que->tq != NULL) {
2722#if __FreeBSD_version >= 800000
2723			taskqueue_drain(que->tq, &que->txr->txq_task);
2724#endif
2725			taskqueue_drain(que->tq, &que->que_task);
2726			taskqueue_free(que->tq);
2727		}
2728	}
2729mem:
2730	if (adapter->msix)
2731		pci_release_msi(dev);
2732
2733	if (adapter->msix_mem != NULL)
2734		bus_release_resource(dev, SYS_RES_MEMORY,
2735		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2736
2737	if (adapter->pci_mem != NULL)
2738		bus_release_resource(dev, SYS_RES_MEMORY,
2739		    PCIR_BAR(0), adapter->pci_mem);
2740
2741}
2742
2743/*
2744 * Setup Either MSI/X or MSI
2745 */
2746static int
2747igb_setup_msix(struct adapter *adapter)
2748{
2749	device_t dev = adapter->dev;
2750	int rid, want, queues, msgs;
2751
2752	/* tuneable override */
2753	if (igb_enable_msix == 0)
2754		goto msi;
2755
2756	/* First try MSI/X */
2757	rid = PCIR_BAR(IGB_MSIX_BAR);
2758	adapter->msix_mem = bus_alloc_resource_any(dev,
2759	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2760       	if (!adapter->msix_mem) {
2761		/* May not be enabled */
2762		device_printf(adapter->dev,
2763		    "Unable to map MSIX table \n");
2764		goto msi;
2765	}
2766
2767	msgs = pci_msix_count(dev);
2768	if (msgs == 0) { /* system has msix disabled */
2769		bus_release_resource(dev, SYS_RES_MEMORY,
2770		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2771		adapter->msix_mem = NULL;
2772		goto msi;
2773	}
2774
2775	/* Figure out a reasonable auto config value */
2776	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2777
2778	/* Manual override */
2779	if (igb_num_queues != 0)
2780		queues = igb_num_queues;
2781	if (queues > 8)  /* max queues */
2782		queues = 8;
2783
2784	/* Can have max of 4 queues on 82575 */
2785	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2786		queues = 4;
2787
2788	/* Limit the VF devices to one queue */
2789	if (adapter->vf_ifp)
2790		queues = 1;
2791
2792	/*
2793	** One vector (RX/TX pair) per queue
2794	** plus an additional for Link interrupt
2795	*/
2796	want = queues + 1;
2797	if (msgs >= want)
2798		msgs = want;
2799	else {
2800               	device_printf(adapter->dev,
2801		    "MSIX Configuration Problem, "
2802		    "%d vectors configured, but %d queues wanted!\n",
2803		    msgs, want);
2804		return (0);
2805	}
2806	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2807               	device_printf(adapter->dev,
2808		    "Using MSIX interrupts with %d vectors\n", msgs);
2809		adapter->num_queues = queues;
2810		return (msgs);
2811	}
2812msi:
2813       	msgs = pci_msi_count(dev);
2814	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0) {
2815		device_printf(adapter->dev," Using MSI interrupt\n");
2816		return (msgs);
2817	}
2818	return (0);
2819}
2820
2821/*********************************************************************
2822 *
2823 *  Set up an fresh starting state
2824 *
2825 **********************************************************************/
2826static void
2827igb_reset(struct adapter *adapter)
2828{
2829	device_t	dev = adapter->dev;
2830	struct e1000_hw *hw = &adapter->hw;
2831	struct e1000_fc_info *fc = &hw->fc;
2832	struct ifnet	*ifp = adapter->ifp;
2833	u32		pba = 0;
2834	u16		hwm;
2835
2836	INIT_DEBUGOUT("igb_reset: begin");
2837
2838	/* Let the firmware know the OS is in control */
2839	igb_get_hw_control(adapter);
2840
2841	/*
2842	 * Packet Buffer Allocation (PBA)
2843	 * Writing PBA sets the receive portion of the buffer
2844	 * the remainder is used for the transmit buffer.
2845	 */
2846	switch (hw->mac.type) {
2847	case e1000_82575:
2848		pba = E1000_PBA_32K;
2849		break;
2850	case e1000_82576:
2851	case e1000_vfadapt:
2852		pba = E1000_READ_REG(hw, E1000_RXPBS);
2853		pba &= E1000_RXPBS_SIZE_MASK_82576;
2854		break;
2855	case e1000_82580:
2856	case e1000_i350:
2857	case e1000_vfadapt_i350:
2858		pba = E1000_READ_REG(hw, E1000_RXPBS);
2859		pba = e1000_rxpbs_adjust_82580(pba);
2860		break;
2861	default:
2862		break;
2863	}
2864
2865	/* Special needs in case of Jumbo frames */
2866	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2867		u32 tx_space, min_tx, min_rx;
2868		pba = E1000_READ_REG(hw, E1000_PBA);
2869		tx_space = pba >> 16;
2870		pba &= 0xffff;
2871		min_tx = (adapter->max_frame_size +
2872		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2873		min_tx = roundup2(min_tx, 1024);
2874		min_tx >>= 10;
2875                min_rx = adapter->max_frame_size;
2876                min_rx = roundup2(min_rx, 1024);
2877                min_rx >>= 10;
2878		if (tx_space < min_tx &&
2879		    ((min_tx - tx_space) < pba)) {
2880			pba = pba - (min_tx - tx_space);
2881			/*
2882                         * if short on rx space, rx wins
2883                         * and must trump tx adjustment
2884			 */
2885                        if (pba < min_rx)
2886                                pba = min_rx;
2887		}
2888		E1000_WRITE_REG(hw, E1000_PBA, pba);
2889	}
2890
2891	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2892
2893	/*
2894	 * These parameters control the automatic generation (Tx) and
2895	 * response (Rx) to Ethernet PAUSE frames.
2896	 * - High water mark should allow for at least two frames to be
2897	 *   received after sending an XOFF.
2898	 * - Low water mark works best when it is very near the high water mark.
2899	 *   This allows the receiver to restart by sending XON when it has
2900	 *   drained a bit.
2901	 */
2902	hwm = min(((pba << 10) * 9 / 10),
2903	    ((pba << 10) - 2 * adapter->max_frame_size));
2904
2905	if (hw->mac.type < e1000_82576) {
2906		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2907		fc->low_water = fc->high_water - 8;
2908	} else {
2909		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2910		fc->low_water = fc->high_water - 16;
2911	}
2912
2913	fc->pause_time = IGB_FC_PAUSE_TIME;
2914	fc->send_xon = TRUE;
2915	if (adapter->fc)
2916		fc->requested_mode = adapter->fc;
2917	else
2918		fc->requested_mode = e1000_fc_default;
2919
2920	/* Issue a global reset */
2921	e1000_reset_hw(hw);
2922	E1000_WRITE_REG(hw, E1000_WUC, 0);
2923
2924	if (e1000_init_hw(hw) < 0)
2925		device_printf(dev, "Hardware Initialization Failed\n");
2926
2927	/* Setup DMA Coalescing */
2928	if (hw->mac.type == e1000_i350) {
2929		u32 reg = ~E1000_DMACR_DMAC_EN;
2930
2931		if (adapter->dmac == 0) { /* Disabling it */
2932			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2933			goto reset_out;
2934		}
2935
2936		hwm = (pba - 4) << 10;
2937		reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2938		    & E1000_DMACR_DMACTHR_MASK);
2939
2940		/* transition to L0x or L1 if available..*/
2941		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2942
2943		/* timer = value in adapter->dmac in 32usec intervals */
2944		reg |= (adapter->dmac >> 5);
2945		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2946
2947		/* No lower threshold */
2948		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2949
2950		/* set hwm to PBA -  2 * max frame size */
2951		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2952
2953		/* Set the interval before transition */
2954		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2955		reg |= 0x800000FF; /* 255 usec */
2956		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2957
2958		/* free space in tx packet buffer to wake from DMA coal */
2959		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2960		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2961
2962		/* make low power state decision controlled by DMA coal */
2963		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2964		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2965		    reg | E1000_PCIEMISC_LX_DECISION);
2966		device_printf(dev, "DMA Coalescing enabled\n");
2967	}
2968
2969reset_out:
2970	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2971	e1000_get_phy_info(hw);
2972	e1000_check_for_link(hw);
2973	return;
2974}
2975
2976/*********************************************************************
2977 *
2978 *  Setup networking device structure and register an interface.
2979 *
2980 **********************************************************************/
2981static int
2982igb_setup_interface(device_t dev, struct adapter *adapter)
2983{
2984	struct ifnet   *ifp;
2985
2986	INIT_DEBUGOUT("igb_setup_interface: begin");
2987
2988	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2989	if (ifp == NULL) {
2990		device_printf(dev, "can not allocate ifnet structure\n");
2991		return (-1);
2992	}
2993	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2994	ifp->if_init =  igb_init;
2995	ifp->if_softc = adapter;
2996	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2997	ifp->if_ioctl = igb_ioctl;
2998	ifp->if_start = igb_start;
2999#if __FreeBSD_version >= 800000
3000	ifp->if_transmit = igb_mq_start;
3001	ifp->if_qflush = igb_qflush;
3002#endif
3003	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3004	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3005	IFQ_SET_READY(&ifp->if_snd);
3006
3007	ether_ifattach(ifp, adapter->hw.mac.addr);
3008
3009	ifp->if_capabilities = ifp->if_capenable = 0;
3010
3011	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3012	ifp->if_capabilities |= IFCAP_TSO4;
3013	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3014	ifp->if_capenable = ifp->if_capabilities;
3015
3016	/* Don't enable LRO by default */
3017	ifp->if_capabilities |= IFCAP_LRO;
3018
3019#ifdef DEVICE_POLLING
3020	ifp->if_capabilities |= IFCAP_POLLING;
3021#endif
3022
3023	/*
3024	 * Tell the upper layer(s) we
3025	 * support full VLAN capability.
3026	 */
3027	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
3028	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3029			     |  IFCAP_VLAN_HWTSO
3030			     |  IFCAP_VLAN_MTU;
3031	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3032			  |  IFCAP_VLAN_HWTSO
3033			  |  IFCAP_VLAN_MTU;
3034
3035	/*
3036	** Don't turn this on by default, if vlans are
3037	** created on another pseudo device (eg. lagg)
3038	** then vlan events are not passed thru, breaking
3039	** operation, but with HW FILTER off it works. If
3040	** using vlans directly on the igb driver you can
3041	** enable this and get full hardware tag filtering.
3042	*/
3043	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3044
3045	/*
3046	 * Specify the media types supported by this adapter and register
3047	 * callbacks to update media and link information
3048	 */
3049	ifmedia_init(&adapter->media, IFM_IMASK,
3050	    igb_media_change, igb_media_status);
3051	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3052	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3053		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3054			    0, NULL);
3055		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3056	} else {
3057		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3058		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3059			    0, NULL);
3060		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3061			    0, NULL);
3062		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3063			    0, NULL);
3064		if (adapter->hw.phy.type != e1000_phy_ife) {
3065			ifmedia_add(&adapter->media,
3066				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3067			ifmedia_add(&adapter->media,
3068				IFM_ETHER | IFM_1000_T, 0, NULL);
3069		}
3070	}
3071	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3072	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3073	return (0);
3074}
3075
3076
3077/*
3078 * Manage DMA'able memory.
3079 */
3080static void
3081igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3082{
3083	if (error)
3084		return;
3085	*(bus_addr_t *) arg = segs[0].ds_addr;
3086}
3087
3088static int
3089igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3090        struct igb_dma_alloc *dma, int mapflags)
3091{
3092	int error;
3093
3094	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3095				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
3096				BUS_SPACE_MAXADDR,	/* lowaddr */
3097				BUS_SPACE_MAXADDR,	/* highaddr */
3098				NULL, NULL,		/* filter, filterarg */
3099				size,			/* maxsize */
3100				1,			/* nsegments */
3101				size,			/* maxsegsize */
3102				0,			/* flags */
3103				NULL,			/* lockfunc */
3104				NULL,			/* lockarg */
3105				&dma->dma_tag);
3106	if (error) {
3107		device_printf(adapter->dev,
3108		    "%s: bus_dma_tag_create failed: %d\n",
3109		    __func__, error);
3110		goto fail_0;
3111	}
3112
3113	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3114	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3115	if (error) {
3116		device_printf(adapter->dev,
3117		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3118		    __func__, (uintmax_t)size, error);
3119		goto fail_2;
3120	}
3121
3122	dma->dma_paddr = 0;
3123	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3124	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3125	if (error || dma->dma_paddr == 0) {
3126		device_printf(adapter->dev,
3127		    "%s: bus_dmamap_load failed: %d\n",
3128		    __func__, error);
3129		goto fail_3;
3130	}
3131
3132	return (0);
3133
3134fail_3:
3135	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3136fail_2:
3137	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3138	bus_dma_tag_destroy(dma->dma_tag);
3139fail_0:
3140	dma->dma_map = NULL;
3141	dma->dma_tag = NULL;
3142
3143	return (error);
3144}
3145
3146static void
3147igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3148{
3149	if (dma->dma_tag == NULL)
3150		return;
3151	if (dma->dma_map != NULL) {
3152		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3153		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3154		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3155		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3156		dma->dma_map = NULL;
3157	}
3158	bus_dma_tag_destroy(dma->dma_tag);
3159	dma->dma_tag = NULL;
3160}
3161
3162
3163/*********************************************************************
3164 *
3165 *  Allocate memory for the transmit and receive rings, and then
3166 *  the descriptors associated with each, called only once at attach.
3167 *
3168 **********************************************************************/
3169static int
3170igb_allocate_queues(struct adapter *adapter)
3171{
3172	device_t dev = adapter->dev;
3173	struct igb_queue	*que = NULL;
3174	struct tx_ring		*txr = NULL;
3175	struct rx_ring		*rxr = NULL;
3176	int rsize, tsize, error = E1000_SUCCESS;
3177	int txconf = 0, rxconf = 0;
3178
3179	/* First allocate the top level queue structs */
3180	if (!(adapter->queues =
3181	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3182	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3183		device_printf(dev, "Unable to allocate queue memory\n");
3184		error = ENOMEM;
3185		goto fail;
3186	}
3187
3188	/* Next allocate the TX ring struct memory */
3189	if (!(adapter->tx_rings =
3190	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3191	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3192		device_printf(dev, "Unable to allocate TX ring memory\n");
3193		error = ENOMEM;
3194		goto tx_fail;
3195	}
3196
3197	/* Now allocate the RX */
3198	if (!(adapter->rx_rings =
3199	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3200	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3201		device_printf(dev, "Unable to allocate RX ring memory\n");
3202		error = ENOMEM;
3203		goto rx_fail;
3204	}
3205
3206	tsize = roundup2(adapter->num_tx_desc *
3207	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3208	/*
3209	 * Now set up the TX queues, txconf is needed to handle the
3210	 * possibility that things fail midcourse and we need to
3211	 * undo memory gracefully
3212	 */
3213	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3214		/* Set up some basics */
3215		txr = &adapter->tx_rings[i];
3216		txr->adapter = adapter;
3217		txr->me = i;
3218
3219		/* Initialize the TX lock */
3220		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3221		    device_get_nameunit(dev), txr->me);
3222		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3223
3224		if (igb_dma_malloc(adapter, tsize,
3225			&txr->txdma, BUS_DMA_NOWAIT)) {
3226			device_printf(dev,
3227			    "Unable to allocate TX Descriptor memory\n");
3228			error = ENOMEM;
3229			goto err_tx_desc;
3230		}
3231		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3232		bzero((void *)txr->tx_base, tsize);
3233
3234        	/* Now allocate transmit buffers for the ring */
3235        	if (igb_allocate_transmit_buffers(txr)) {
3236			device_printf(dev,
3237			    "Critical Failure setting up transmit buffers\n");
3238			error = ENOMEM;
3239			goto err_tx_desc;
3240        	}
3241#if __FreeBSD_version >= 800000
3242		/* Allocate a buf ring */
3243		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3244		    M_WAITOK, &txr->tx_mtx);
3245#endif
3246	}
3247
3248	/*
3249	 * Next the RX queues...
3250	 */
3251	rsize = roundup2(adapter->num_rx_desc *
3252	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3253	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3254		rxr = &adapter->rx_rings[i];
3255		rxr->adapter = adapter;
3256		rxr->me = i;
3257
3258		/* Initialize the RX lock */
3259		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3260		    device_get_nameunit(dev), txr->me);
3261		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3262
3263		if (igb_dma_malloc(adapter, rsize,
3264			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3265			device_printf(dev,
3266			    "Unable to allocate RxDescriptor memory\n");
3267			error = ENOMEM;
3268			goto err_rx_desc;
3269		}
3270		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3271		bzero((void *)rxr->rx_base, rsize);
3272
3273        	/* Allocate receive buffers for the ring*/
3274		if (igb_allocate_receive_buffers(rxr)) {
3275			device_printf(dev,
3276			    "Critical Failure setting up receive buffers\n");
3277			error = ENOMEM;
3278			goto err_rx_desc;
3279		}
3280	}
3281
3282	/*
3283	** Finally set up the queue holding structs
3284	*/
3285	for (int i = 0; i < adapter->num_queues; i++) {
3286		que = &adapter->queues[i];
3287		que->adapter = adapter;
3288		que->txr = &adapter->tx_rings[i];
3289		que->rxr = &adapter->rx_rings[i];
3290	}
3291
3292	return (0);
3293
3294err_rx_desc:
3295	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3296		igb_dma_free(adapter, &rxr->rxdma);
3297err_tx_desc:
3298	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3299		igb_dma_free(adapter, &txr->txdma);
3300	free(adapter->rx_rings, M_DEVBUF);
3301rx_fail:
3302#if __FreeBSD_version >= 800000
3303	buf_ring_free(txr->br, M_DEVBUF);
3304#endif
3305	free(adapter->tx_rings, M_DEVBUF);
3306tx_fail:
3307	free(adapter->queues, M_DEVBUF);
3308fail:
3309	return (error);
3310}
3311
3312/*********************************************************************
3313 *
3314 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3315 *  the information needed to transmit a packet on the wire. This is
3316 *  called only once at attach, setup is done every reset.
3317 *
3318 **********************************************************************/
3319static int
3320igb_allocate_transmit_buffers(struct tx_ring *txr)
3321{
3322	struct adapter *adapter = txr->adapter;
3323	device_t dev = adapter->dev;
3324	struct igb_tx_buffer *txbuf;
3325	int error, i;
3326
3327	/*
3328	 * Setup DMA descriptor areas.
3329	 */
3330	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3331			       1, 0,			/* alignment, bounds */
3332			       BUS_SPACE_MAXADDR,	/* lowaddr */
3333			       BUS_SPACE_MAXADDR,	/* highaddr */
3334			       NULL, NULL,		/* filter, filterarg */
3335			       IGB_TSO_SIZE,		/* maxsize */
3336			       IGB_MAX_SCATTER,		/* nsegments */
3337			       PAGE_SIZE,		/* maxsegsize */
3338			       0,			/* flags */
3339			       NULL,			/* lockfunc */
3340			       NULL,			/* lockfuncarg */
3341			       &txr->txtag))) {
3342		device_printf(dev,"Unable to allocate TX DMA tag\n");
3343		goto fail;
3344	}
3345
3346	if (!(txr->tx_buffers =
3347	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3348	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3349		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3350		error = ENOMEM;
3351		goto fail;
3352	}
3353
3354        /* Create the descriptor buffer dma maps */
3355	txbuf = txr->tx_buffers;
3356	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3357		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3358		if (error != 0) {
3359			device_printf(dev, "Unable to create TX DMA map\n");
3360			goto fail;
3361		}
3362	}
3363
3364	return 0;
3365fail:
3366	/* We free all, it handles case where we are in the middle */
3367	igb_free_transmit_structures(adapter);
3368	return (error);
3369}
3370
3371/*********************************************************************
3372 *
3373 *  Initialize a transmit ring.
3374 *
3375 **********************************************************************/
3376static void
3377igb_setup_transmit_ring(struct tx_ring *txr)
3378{
3379	struct adapter *adapter = txr->adapter;
3380	struct igb_tx_buffer *txbuf;
3381	int i;
3382#ifdef DEV_NETMAP
3383	struct netmap_adapter *na = NA(adapter->ifp);
3384	struct netmap_slot *slot;
3385#endif /* DEV_NETMAP */
3386
3387	/* Clear the old descriptor contents */
3388	IGB_TX_LOCK(txr);
3389#ifdef DEV_NETMAP
3390	slot = netmap_reset(na, NR_TX, txr->me, 0);
3391#endif /* DEV_NETMAP */
3392	bzero((void *)txr->tx_base,
3393	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3394	/* Reset indices */
3395	txr->next_avail_desc = 0;
3396	txr->next_to_clean = 0;
3397
3398	/* Free any existing tx buffers. */
3399        txbuf = txr->tx_buffers;
3400	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3401		if (txbuf->m_head != NULL) {
3402			bus_dmamap_sync(txr->txtag, txbuf->map,
3403			    BUS_DMASYNC_POSTWRITE);
3404			bus_dmamap_unload(txr->txtag, txbuf->map);
3405			m_freem(txbuf->m_head);
3406			txbuf->m_head = NULL;
3407		}
3408#ifdef DEV_NETMAP
3409		if (slot) {
3410			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3411			/* no need to set the address */
3412			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3413		}
3414#endif /* DEV_NETMAP */
3415		/* clear the watch index */
3416		txbuf->next_eop = -1;
3417        }
3418
3419	/* Set number of descriptors available */
3420	txr->tx_avail = adapter->num_tx_desc;
3421
3422	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3423	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3424	IGB_TX_UNLOCK(txr);
3425}
3426
3427/*********************************************************************
3428 *
3429 *  Initialize all transmit rings.
3430 *
3431 **********************************************************************/
3432static void
3433igb_setup_transmit_structures(struct adapter *adapter)
3434{
3435	struct tx_ring *txr = adapter->tx_rings;
3436
3437	for (int i = 0; i < adapter->num_queues; i++, txr++)
3438		igb_setup_transmit_ring(txr);
3439
3440	return;
3441}
3442
3443/*********************************************************************
3444 *
3445 *  Enable transmit unit.
3446 *
3447 **********************************************************************/
3448static void
3449igb_initialize_transmit_units(struct adapter *adapter)
3450{
3451	struct tx_ring	*txr = adapter->tx_rings;
3452	struct e1000_hw *hw = &adapter->hw;
3453	u32		tctl, txdctl;
3454
3455	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3456	tctl = txdctl = 0;
3457
3458	/* Setup the Tx Descriptor Rings */
3459	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3460		u64 bus_addr = txr->txdma.dma_paddr;
3461
3462		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3463		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3464		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3465		    (uint32_t)(bus_addr >> 32));
3466		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3467		    (uint32_t)bus_addr);
3468
3469		/* Setup the HW Tx Head and Tail descriptor pointers */
3470		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3471		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3472
3473		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3474		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3475		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3476
3477		txr->queue_status = IGB_QUEUE_IDLE;
3478
3479		txdctl |= IGB_TX_PTHRESH;
3480		txdctl |= IGB_TX_HTHRESH << 8;
3481		txdctl |= IGB_TX_WTHRESH << 16;
3482		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3483		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3484	}
3485
3486	if (adapter->vf_ifp)
3487		return;
3488
3489	e1000_config_collision_dist(hw);
3490
3491	/* Program the Transmit Control Register */
3492	tctl = E1000_READ_REG(hw, E1000_TCTL);
3493	tctl &= ~E1000_TCTL_CT;
3494	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3495		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3496
3497	/* This write will effectively turn on the transmit unit. */
3498	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3499}
3500
3501/*********************************************************************
3502 *
3503 *  Free all transmit rings.
3504 *
3505 **********************************************************************/
3506static void
3507igb_free_transmit_structures(struct adapter *adapter)
3508{
3509	struct tx_ring *txr = adapter->tx_rings;
3510
3511	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3512		IGB_TX_LOCK(txr);
3513		igb_free_transmit_buffers(txr);
3514		igb_dma_free(adapter, &txr->txdma);
3515		IGB_TX_UNLOCK(txr);
3516		IGB_TX_LOCK_DESTROY(txr);
3517	}
3518	free(adapter->tx_rings, M_DEVBUF);
3519}
3520
3521/*********************************************************************
3522 *
3523 *  Free transmit ring related data structures.
3524 *
3525 **********************************************************************/
3526static void
3527igb_free_transmit_buffers(struct tx_ring *txr)
3528{
3529	struct adapter *adapter = txr->adapter;
3530	struct igb_tx_buffer *tx_buffer;
3531	int             i;
3532
3533	INIT_DEBUGOUT("free_transmit_ring: begin");
3534
3535	if (txr->tx_buffers == NULL)
3536		return;
3537
3538	tx_buffer = txr->tx_buffers;
3539	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3540		if (tx_buffer->m_head != NULL) {
3541			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3542			    BUS_DMASYNC_POSTWRITE);
3543			bus_dmamap_unload(txr->txtag,
3544			    tx_buffer->map);
3545			m_freem(tx_buffer->m_head);
3546			tx_buffer->m_head = NULL;
3547			if (tx_buffer->map != NULL) {
3548				bus_dmamap_destroy(txr->txtag,
3549				    tx_buffer->map);
3550				tx_buffer->map = NULL;
3551			}
3552		} else if (tx_buffer->map != NULL) {
3553			bus_dmamap_unload(txr->txtag,
3554			    tx_buffer->map);
3555			bus_dmamap_destroy(txr->txtag,
3556			    tx_buffer->map);
3557			tx_buffer->map = NULL;
3558		}
3559	}
3560#if __FreeBSD_version >= 800000
3561	if (txr->br != NULL)
3562		buf_ring_free(txr->br, M_DEVBUF);
3563#endif
3564	if (txr->tx_buffers != NULL) {
3565		free(txr->tx_buffers, M_DEVBUF);
3566		txr->tx_buffers = NULL;
3567	}
3568	if (txr->txtag != NULL) {
3569		bus_dma_tag_destroy(txr->txtag);
3570		txr->txtag = NULL;
3571	}
3572	return;
3573}
3574
3575/**********************************************************************
3576 *
3577 *  Setup work for hardware segmentation offload (TSO)
3578 *
3579 **********************************************************************/
3580static bool
3581igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3582	struct ip *ip, struct tcphdr *th)
3583{
3584	struct adapter *adapter = txr->adapter;
3585	struct e1000_adv_tx_context_desc *TXD;
3586	struct igb_tx_buffer        *tx_buffer;
3587	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3588	u32 mss_l4len_idx = 0;
3589	u16 vtag = 0;
3590	int ctxd, ip_hlen, tcp_hlen;
3591
3592	ctxd = txr->next_avail_desc;
3593	tx_buffer = &txr->tx_buffers[ctxd];
3594	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3595
3596	ip->ip_sum = 0;
3597	ip_hlen = ip->ip_hl << 2;
3598	tcp_hlen = th->th_off << 2;
3599
3600	/* VLAN MACLEN IPLEN */
3601	if (mp->m_flags & M_VLANTAG) {
3602		vtag = htole16(mp->m_pkthdr.ether_vtag);
3603		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3604	}
3605
3606	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3607	vlan_macip_lens |= ip_hlen;
3608	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3609
3610	/* ADV DTYPE TUCMD */
3611	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3612	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3613	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3614	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3615
3616	/* MSS L4LEN IDX */
3617	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3618	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3619	/* 82575 needs the queue index added */
3620	if (adapter->hw.mac.type == e1000_82575)
3621		mss_l4len_idx |= txr->me << 4;
3622	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3623
3624	TXD->seqnum_seed = htole32(0);
3625	tx_buffer->m_head = NULL;
3626	tx_buffer->next_eop = -1;
3627
3628	if (++ctxd == adapter->num_tx_desc)
3629		ctxd = 0;
3630
3631	txr->tx_avail--;
3632	txr->next_avail_desc = ctxd;
3633	return TRUE;
3634}
3635
3636
3637/*********************************************************************
3638 *
3639 *  Context Descriptor setup for VLAN or CSUM
3640 *
3641 **********************************************************************/
3642
3643static bool
3644igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3645{
3646	struct adapter *adapter = txr->adapter;
3647	struct e1000_adv_tx_context_desc *TXD;
3648	struct igb_tx_buffer        *tx_buffer;
3649	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3650	struct ether_vlan_header *eh;
3651	struct ip *ip = NULL;
3652	struct ip6_hdr *ip6;
3653	int  ehdrlen, ctxd, ip_hlen = 0;
3654	u16	etype, vtag = 0;
3655	u8	ipproto = 0;
3656	bool	offload = TRUE;
3657
3658	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3659		offload = FALSE;
3660
3661	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3662	ctxd = txr->next_avail_desc;
3663	tx_buffer = &txr->tx_buffers[ctxd];
3664	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3665
3666	/*
3667	** In advanced descriptors the vlan tag must
3668	** be placed into the context descriptor, thus
3669	** we need to be here just for that setup.
3670	*/
3671	if (mp->m_flags & M_VLANTAG) {
3672		vtag = htole16(mp->m_pkthdr.ether_vtag);
3673		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3674	} else if (offload == FALSE)
3675		return FALSE;
3676
3677	/*
3678	 * Determine where frame payload starts.
3679	 * Jump over vlan headers if already present,
3680	 * helpful for QinQ too.
3681	 */
3682	eh = mtod(mp, struct ether_vlan_header *);
3683	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3684		etype = ntohs(eh->evl_proto);
3685		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3686	} else {
3687		etype = ntohs(eh->evl_encap_proto);
3688		ehdrlen = ETHER_HDR_LEN;
3689	}
3690
3691	/* Set the ether header length */
3692	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3693
3694	switch (etype) {
3695		case ETHERTYPE_IP:
3696			ip = (struct ip *)(mp->m_data + ehdrlen);
3697			ip_hlen = ip->ip_hl << 2;
3698			if (mp->m_len < ehdrlen + ip_hlen) {
3699				offload = FALSE;
3700				break;
3701			}
3702			ipproto = ip->ip_p;
3703			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3704			break;
3705		case ETHERTYPE_IPV6:
3706			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3707			ip_hlen = sizeof(struct ip6_hdr);
3708			ipproto = ip6->ip6_nxt;
3709			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3710			break;
3711		default:
3712			offload = FALSE;
3713			break;
3714	}
3715
3716	vlan_macip_lens |= ip_hlen;
3717	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3718
3719	switch (ipproto) {
3720		case IPPROTO_TCP:
3721			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3722				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3723			break;
3724		case IPPROTO_UDP:
3725			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3726				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3727			break;
3728#if __FreeBSD_version >= 800000
3729		case IPPROTO_SCTP:
3730			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3731				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3732			break;
3733#endif
3734		default:
3735			offload = FALSE;
3736			break;
3737	}
3738
3739	/* 82575 needs the queue index added */
3740	if (adapter->hw.mac.type == e1000_82575)
3741		mss_l4len_idx = txr->me << 4;
3742
3743	/* Now copy bits into descriptor */
3744	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3745	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3746	TXD->seqnum_seed = htole32(0);
3747	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3748
3749	tx_buffer->m_head = NULL;
3750	tx_buffer->next_eop = -1;
3751
3752	/* We've consumed the first desc, adjust counters */
3753	if (++ctxd == adapter->num_tx_desc)
3754		ctxd = 0;
3755	txr->next_avail_desc = ctxd;
3756	--txr->tx_avail;
3757
3758        return (offload);
3759}
3760
3761
3762/**********************************************************************
3763 *
3764 *  Examine each tx_buffer in the used queue. If the hardware is done
3765 *  processing the packet then free associated resources. The
3766 *  tx_buffer is put back on the free queue.
3767 *
3768 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3769 **********************************************************************/
3770static bool
3771igb_txeof(struct tx_ring *txr)
3772{
3773	struct adapter	*adapter = txr->adapter;
3774        int first, last, done, processed;
3775        struct igb_tx_buffer *tx_buffer;
3776        struct e1000_tx_desc   *tx_desc, *eop_desc;
3777	struct ifnet   *ifp = adapter->ifp;
3778
3779	IGB_TX_LOCK_ASSERT(txr);
3780
3781#ifdef DEV_NETMAP
3782	if (ifp->if_capenable & IFCAP_NETMAP) {
3783		struct netmap_adapter *na = NA(ifp);
3784
3785		selwakeuppri(&na->tx_rings[txr->me].si, PI_NET);
3786		IGB_TX_UNLOCK(txr);
3787		IGB_CORE_LOCK(adapter);
3788		selwakeuppri(&na->tx_si, PI_NET);
3789		IGB_CORE_UNLOCK(adapter);
3790		IGB_TX_LOCK(txr);
3791		return FALSE;
3792	}
3793#endif /* DEV_NETMAP */
3794        if (txr->tx_avail == adapter->num_tx_desc) {
3795		txr->queue_status = IGB_QUEUE_IDLE;
3796                return FALSE;
3797	}
3798
3799	processed = 0;
3800        first = txr->next_to_clean;
3801        tx_desc = &txr->tx_base[first];
3802        tx_buffer = &txr->tx_buffers[first];
3803	last = tx_buffer->next_eop;
3804        eop_desc = &txr->tx_base[last];
3805
3806	/*
3807	 * What this does is get the index of the
3808	 * first descriptor AFTER the EOP of the
3809	 * first packet, that way we can do the
3810	 * simple comparison on the inner while loop.
3811	 */
3812	if (++last == adapter->num_tx_desc)
3813 		last = 0;
3814	done = last;
3815
3816        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3817            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3818
3819        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3820		/* We clean the range of the packet */
3821		while (first != done) {
3822                	tx_desc->upper.data = 0;
3823                	tx_desc->lower.data = 0;
3824                	tx_desc->buffer_addr = 0;
3825                	++txr->tx_avail;
3826			++processed;
3827
3828			if (tx_buffer->m_head) {
3829				txr->bytes +=
3830				    tx_buffer->m_head->m_pkthdr.len;
3831				bus_dmamap_sync(txr->txtag,
3832				    tx_buffer->map,
3833				    BUS_DMASYNC_POSTWRITE);
3834				bus_dmamap_unload(txr->txtag,
3835				    tx_buffer->map);
3836
3837                        	m_freem(tx_buffer->m_head);
3838                        	tx_buffer->m_head = NULL;
3839                	}
3840			tx_buffer->next_eop = -1;
3841			txr->watchdog_time = ticks;
3842
3843	                if (++first == adapter->num_tx_desc)
3844				first = 0;
3845
3846	                tx_buffer = &txr->tx_buffers[first];
3847			tx_desc = &txr->tx_base[first];
3848		}
3849		++txr->packets;
3850		++ifp->if_opackets;
3851		/* See if we can continue to the next packet */
3852		last = tx_buffer->next_eop;
3853		if (last != -1) {
3854        		eop_desc = &txr->tx_base[last];
3855			/* Get new done point */
3856			if (++last == adapter->num_tx_desc) last = 0;
3857			done = last;
3858		} else
3859			break;
3860        }
3861        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3862            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3863
3864        txr->next_to_clean = first;
3865
3866	/*
3867	** Watchdog calculation, we know there's
3868	** work outstanding or the first return
3869	** would have been taken, so none processed
3870	** for too long indicates a hang.
3871	*/
3872	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3873		txr->queue_status |= IGB_QUEUE_HUNG;
3874        /*
3875         * If we have a minimum free,
3876         * clear depleted state bit
3877         */
3878        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3879                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3880
3881	/* All clean, turn off the watchdog */
3882	if (txr->tx_avail == adapter->num_tx_desc) {
3883		txr->queue_status = IGB_QUEUE_IDLE;
3884		return (FALSE);
3885        }
3886
3887	return (TRUE);
3888}
3889
3890/*********************************************************************
3891 *
3892 *  Refresh mbuf buffers for RX descriptor rings
3893 *   - now keeps its own state so discards due to resource
3894 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3895 *     it just returns, keeping its placeholder, thus it can simply
3896 *     be recalled to try again.
3897 *
3898 **********************************************************************/
3899static void
3900igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3901{
3902	struct adapter		*adapter = rxr->adapter;
3903	bus_dma_segment_t	hseg[1];
3904	bus_dma_segment_t	pseg[1];
3905	struct igb_rx_buf	*rxbuf;
3906	struct mbuf		*mh, *mp;
3907	int			i, j, nsegs, error;
3908	bool			refreshed = FALSE;
3909
3910	i = j = rxr->next_to_refresh;
3911	/*
3912	** Get one descriptor beyond
3913	** our work mark to control
3914	** the loop.
3915        */
3916	if (++j == adapter->num_rx_desc)
3917		j = 0;
3918
3919	while (j != limit) {
3920		rxbuf = &rxr->rx_buffers[i];
3921		/* No hdr mbuf used with header split off */
3922		if (rxr->hdr_split == FALSE)
3923			goto no_split;
3924		if (rxbuf->m_head == NULL) {
3925			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3926			if (mh == NULL)
3927				goto update;
3928		} else
3929			mh = rxbuf->m_head;
3930
3931		mh->m_pkthdr.len = mh->m_len = MHLEN;
3932		mh->m_len = MHLEN;
3933		mh->m_flags |= M_PKTHDR;
3934		/* Get the memory mapping */
3935		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3936		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3937		if (error != 0) {
3938			printf("Refresh mbufs: hdr dmamap load"
3939			    " failure - %d\n", error);
3940			m_free(mh);
3941			rxbuf->m_head = NULL;
3942			goto update;
3943		}
3944		rxbuf->m_head = mh;
3945		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3946		    BUS_DMASYNC_PREREAD);
3947		rxr->rx_base[i].read.hdr_addr =
3948		    htole64(hseg[0].ds_addr);
3949no_split:
3950		if (rxbuf->m_pack == NULL) {
3951			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3952			    M_PKTHDR, adapter->rx_mbuf_sz);
3953			if (mp == NULL)
3954				goto update;
3955		} else
3956			mp = rxbuf->m_pack;
3957
3958		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3959		/* Get the memory mapping */
3960		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3961		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3962		if (error != 0) {
3963			printf("Refresh mbufs: payload dmamap load"
3964			    " failure - %d\n", error);
3965			m_free(mp);
3966			rxbuf->m_pack = NULL;
3967			goto update;
3968		}
3969		rxbuf->m_pack = mp;
3970		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3971		    BUS_DMASYNC_PREREAD);
3972		rxr->rx_base[i].read.pkt_addr =
3973		    htole64(pseg[0].ds_addr);
3974		refreshed = TRUE; /* I feel wefreshed :) */
3975
3976		i = j; /* our next is precalculated */
3977		rxr->next_to_refresh = i;
3978		if (++j == adapter->num_rx_desc)
3979			j = 0;
3980	}
3981update:
3982	if (refreshed) /* update tail */
3983		E1000_WRITE_REG(&adapter->hw,
3984		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3985	return;
3986}
3987
3988
3989/*********************************************************************
3990 *
3991 *  Allocate memory for rx_buffer structures. Since we use one
3992 *  rx_buffer per received packet, the maximum number of rx_buffer's
3993 *  that we'll need is equal to the number of receive descriptors
3994 *  that we've allocated.
3995 *
3996 **********************************************************************/
3997static int
3998igb_allocate_receive_buffers(struct rx_ring *rxr)
3999{
4000	struct	adapter 	*adapter = rxr->adapter;
4001	device_t 		dev = adapter->dev;
4002	struct igb_rx_buf	*rxbuf;
4003	int             	i, bsize, error;
4004
4005	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4006	if (!(rxr->rx_buffers =
4007	    (struct igb_rx_buf *) malloc(bsize,
4008	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
4009		device_printf(dev, "Unable to allocate rx_buffer memory\n");
4010		error = ENOMEM;
4011		goto fail;
4012	}
4013
4014	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4015				   1, 0,		/* alignment, bounds */
4016				   BUS_SPACE_MAXADDR,	/* lowaddr */
4017				   BUS_SPACE_MAXADDR,	/* highaddr */
4018				   NULL, NULL,		/* filter, filterarg */
4019				   MSIZE,		/* maxsize */
4020				   1,			/* nsegments */
4021				   MSIZE,		/* maxsegsize */
4022				   0,			/* flags */
4023				   NULL,		/* lockfunc */
4024				   NULL,		/* lockfuncarg */
4025				   &rxr->htag))) {
4026		device_printf(dev, "Unable to create RX DMA tag\n");
4027		goto fail;
4028	}
4029
4030	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4031				   1, 0,		/* alignment, bounds */
4032				   BUS_SPACE_MAXADDR,	/* lowaddr */
4033				   BUS_SPACE_MAXADDR,	/* highaddr */
4034				   NULL, NULL,		/* filter, filterarg */
4035				   MJUM9BYTES,		/* maxsize */
4036				   1,			/* nsegments */
4037				   MJUM9BYTES,		/* maxsegsize */
4038				   0,			/* flags */
4039				   NULL,		/* lockfunc */
4040				   NULL,		/* lockfuncarg */
4041				   &rxr->ptag))) {
4042		device_printf(dev, "Unable to create RX payload DMA tag\n");
4043		goto fail;
4044	}
4045
4046	for (i = 0; i < adapter->num_rx_desc; i++) {
4047		rxbuf = &rxr->rx_buffers[i];
4048		error = bus_dmamap_create(rxr->htag,
4049		    BUS_DMA_NOWAIT, &rxbuf->hmap);
4050		if (error) {
4051			device_printf(dev,
4052			    "Unable to create RX head DMA maps\n");
4053			goto fail;
4054		}
4055		error = bus_dmamap_create(rxr->ptag,
4056		    BUS_DMA_NOWAIT, &rxbuf->pmap);
4057		if (error) {
4058			device_printf(dev,
4059			    "Unable to create RX packet DMA maps\n");
4060			goto fail;
4061		}
4062	}
4063
4064	return (0);
4065
4066fail:
4067	/* Frees all, but can handle partial completion */
4068	igb_free_receive_structures(adapter);
4069	return (error);
4070}
4071
4072
4073static void
4074igb_free_receive_ring(struct rx_ring *rxr)
4075{
4076	struct	adapter		*adapter = rxr->adapter;
4077	struct igb_rx_buf	*rxbuf;
4078
4079
4080	for (int i = 0; i < adapter->num_rx_desc; i++) {
4081		rxbuf = &rxr->rx_buffers[i];
4082		if (rxbuf->m_head != NULL) {
4083			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4084			    BUS_DMASYNC_POSTREAD);
4085			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4086			rxbuf->m_head->m_flags |= M_PKTHDR;
4087			m_freem(rxbuf->m_head);
4088		}
4089		if (rxbuf->m_pack != NULL) {
4090			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4091			    BUS_DMASYNC_POSTREAD);
4092			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4093			rxbuf->m_pack->m_flags |= M_PKTHDR;
4094			m_freem(rxbuf->m_pack);
4095		}
4096		rxbuf->m_head = NULL;
4097		rxbuf->m_pack = NULL;
4098	}
4099}
4100
4101
4102/*********************************************************************
4103 *
4104 *  Initialize a receive ring and its buffers.
4105 *
4106 **********************************************************************/
4107static int
4108igb_setup_receive_ring(struct rx_ring *rxr)
4109{
4110	struct	adapter		*adapter;
4111	struct  ifnet		*ifp;
4112	device_t		dev;
4113	struct igb_rx_buf	*rxbuf;
4114	bus_dma_segment_t	pseg[1], hseg[1];
4115	struct lro_ctrl		*lro = &rxr->lro;
4116	int			rsize, nsegs, error = 0;
4117#ifdef DEV_NETMAP
4118	struct netmap_adapter *na = NA(rxr->adapter->ifp);
4119	struct netmap_slot *slot;
4120#endif /* DEV_NETMAP */
4121
4122	adapter = rxr->adapter;
4123	dev = adapter->dev;
4124	ifp = adapter->ifp;
4125
4126	/* Clear the ring contents */
4127	IGB_RX_LOCK(rxr);
4128#ifdef DEV_NETMAP
4129	slot = netmap_reset(na, NR_RX, rxr->me, 0);
4130#endif /* DEV_NETMAP */
4131	rsize = roundup2(adapter->num_rx_desc *
4132	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4133	bzero((void *)rxr->rx_base, rsize);
4134
4135	/*
4136	** Free current RX buffer structures and their mbufs
4137	*/
4138	igb_free_receive_ring(rxr);
4139
4140	/* Configure for header split? */
4141	if (igb_header_split)
4142		rxr->hdr_split = TRUE;
4143
4144        /* Now replenish the ring mbufs */
4145	for (int j = 0; j < adapter->num_rx_desc; ++j) {
4146		struct mbuf	*mh, *mp;
4147
4148		rxbuf = &rxr->rx_buffers[j];
4149#ifdef DEV_NETMAP
4150		if (slot) {
4151			/* slot sj is mapped to the i-th NIC-ring entry */
4152			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4153			uint64_t paddr;
4154			void *addr;
4155
4156			addr = PNMB(slot + sj, &paddr);
4157			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4158			/* Update descriptor */
4159			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4160			continue;
4161		}
4162#endif /* DEV_NETMAP */
4163		if (rxr->hdr_split == FALSE)
4164			goto skip_head;
4165
4166		/* First the header */
4167		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
4168		if (rxbuf->m_head == NULL) {
4169			error = ENOBUFS;
4170                        goto fail;
4171		}
4172		m_adj(rxbuf->m_head, ETHER_ALIGN);
4173		mh = rxbuf->m_head;
4174		mh->m_len = mh->m_pkthdr.len = MHLEN;
4175		mh->m_flags |= M_PKTHDR;
4176		/* Get the memory mapping */
4177		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4178		    rxbuf->hmap, rxbuf->m_head, hseg,
4179		    &nsegs, BUS_DMA_NOWAIT);
4180		if (error != 0) /* Nothing elegant to do here */
4181                        goto fail;
4182		bus_dmamap_sync(rxr->htag,
4183		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4184		/* Update descriptor */
4185		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4186
4187skip_head:
4188		/* Now the payload cluster */
4189		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4190		    M_PKTHDR, adapter->rx_mbuf_sz);
4191		if (rxbuf->m_pack == NULL) {
4192			error = ENOBUFS;
4193                        goto fail;
4194		}
4195		mp = rxbuf->m_pack;
4196		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4197		/* Get the memory mapping */
4198		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4199		    rxbuf->pmap, mp, pseg,
4200		    &nsegs, BUS_DMA_NOWAIT);
4201		if (error != 0)
4202                        goto fail;
4203		bus_dmamap_sync(rxr->ptag,
4204		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4205		/* Update descriptor */
4206		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4207        }
4208
4209	/* Setup our descriptor indices */
4210	rxr->next_to_check = 0;
4211	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4212	rxr->lro_enabled = FALSE;
4213	rxr->rx_split_packets = 0;
4214	rxr->rx_bytes = 0;
4215
4216	rxr->fmp = NULL;
4217	rxr->lmp = NULL;
4218	rxr->discard = FALSE;
4219
4220	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4221	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4222
4223	/*
4224	** Now set up the LRO interface, we
4225	** also only do head split when LRO
4226	** is enabled, since so often they
4227	** are undesireable in similar setups.
4228	*/
4229	if (ifp->if_capenable & IFCAP_LRO) {
4230		error = tcp_lro_init(lro);
4231		if (error) {
4232			device_printf(dev, "LRO Initialization failed!\n");
4233			goto fail;
4234		}
4235		INIT_DEBUGOUT("RX LRO Initialized\n");
4236		rxr->lro_enabled = TRUE;
4237		lro->ifp = adapter->ifp;
4238	}
4239
4240	IGB_RX_UNLOCK(rxr);
4241	return (0);
4242
4243fail:
4244	igb_free_receive_ring(rxr);
4245	IGB_RX_UNLOCK(rxr);
4246	return (error);
4247}
4248
4249
4250/*********************************************************************
4251 *
4252 *  Initialize all receive rings.
4253 *
4254 **********************************************************************/
4255static int
4256igb_setup_receive_structures(struct adapter *adapter)
4257{
4258	struct rx_ring *rxr = adapter->rx_rings;
4259	int i;
4260
4261	for (i = 0; i < adapter->num_queues; i++, rxr++)
4262		if (igb_setup_receive_ring(rxr))
4263			goto fail;
4264
4265	return (0);
4266fail:
4267	/*
4268	 * Free RX buffers allocated so far, we will only handle
4269	 * the rings that completed, the failing case will have
4270	 * cleaned up for itself. 'i' is the endpoint.
4271	 */
4272	for (int j = 0; j > i; ++j) {
4273		rxr = &adapter->rx_rings[i];
4274		IGB_RX_LOCK(rxr);
4275		igb_free_receive_ring(rxr);
4276		IGB_RX_UNLOCK(rxr);
4277	}
4278
4279	return (ENOBUFS);
4280}
4281
4282/*********************************************************************
4283 *
4284 *  Enable receive unit.
4285 *
4286 **********************************************************************/
4287static void
4288igb_initialize_receive_units(struct adapter *adapter)
4289{
4290	struct rx_ring	*rxr = adapter->rx_rings;
4291	struct ifnet	*ifp = adapter->ifp;
4292	struct e1000_hw *hw = &adapter->hw;
4293	u32		rctl, rxcsum, psize, srrctl = 0;
4294
4295	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4296
4297	/*
4298	 * Make sure receives are disabled while setting
4299	 * up the descriptor ring
4300	 */
4301	rctl = E1000_READ_REG(hw, E1000_RCTL);
4302	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4303
4304	/*
4305	** Set up for header split
4306	*/
4307	if (igb_header_split) {
4308		/* Use a standard mbuf for the header */
4309		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4310		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4311	} else
4312		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4313
4314	/*
4315	** Set up for jumbo frames
4316	*/
4317	if (ifp->if_mtu > ETHERMTU) {
4318		rctl |= E1000_RCTL_LPE;
4319		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4320			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4321			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4322		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4323			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4324			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4325		}
4326		/* Set maximum packet len */
4327		psize = adapter->max_frame_size;
4328		/* are we on a vlan? */
4329		if (adapter->ifp->if_vlantrunk != NULL)
4330			psize += VLAN_TAG_SIZE;
4331		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4332	} else {
4333		rctl &= ~E1000_RCTL_LPE;
4334		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4335		rctl |= E1000_RCTL_SZ_2048;
4336	}
4337
4338	/* Setup the Base and Length of the Rx Descriptor Rings */
4339	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4340		u64 bus_addr = rxr->rxdma.dma_paddr;
4341		u32 rxdctl;
4342
4343		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4344		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4345		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4346		    (uint32_t)(bus_addr >> 32));
4347		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4348		    (uint32_t)bus_addr);
4349		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4350		/* Enable this Queue */
4351		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4352		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4353		rxdctl &= 0xFFF00000;
4354		rxdctl |= IGB_RX_PTHRESH;
4355		rxdctl |= IGB_RX_HTHRESH << 8;
4356		rxdctl |= IGB_RX_WTHRESH << 16;
4357		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4358	}
4359
4360	/*
4361	** Setup for RX MultiQueue
4362	*/
4363	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4364	if (adapter->num_queues >1) {
4365		u32 random[10], mrqc, shift = 0;
4366		union igb_reta {
4367			u32 dword;
4368			u8  bytes[4];
4369		} reta;
4370
4371		arc4rand(&random, sizeof(random), 0);
4372		if (adapter->hw.mac.type == e1000_82575)
4373			shift = 6;
4374		/* Warning FM follows */
4375		for (int i = 0; i < 128; i++) {
4376			reta.bytes[i & 3] =
4377			    (i % adapter->num_queues) << shift;
4378			if ((i & 3) == 3)
4379				E1000_WRITE_REG(hw,
4380				    E1000_RETA(i >> 2), reta.dword);
4381		}
4382		/* Now fill in hash table */
4383		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4384		for (int i = 0; i < 10; i++)
4385			E1000_WRITE_REG_ARRAY(hw,
4386			    E1000_RSSRK(0), i, random[i]);
4387
4388		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4389		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4390		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4391		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4392		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4393		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4394		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4395		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4396
4397		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4398
4399		/*
4400		** NOTE: Receive Full-Packet Checksum Offload
4401		** is mutually exclusive with Multiqueue. However
4402		** this is not the same as TCP/IP checksums which
4403		** still work.
4404		*/
4405		rxcsum |= E1000_RXCSUM_PCSD;
4406#if __FreeBSD_version >= 800000
4407		/* For SCTP Offload */
4408		if ((hw->mac.type == e1000_82576)
4409		    && (ifp->if_capenable & IFCAP_RXCSUM))
4410			rxcsum |= E1000_RXCSUM_CRCOFL;
4411#endif
4412	} else {
4413		/* Non RSS setup */
4414		if (ifp->if_capenable & IFCAP_RXCSUM) {
4415			rxcsum |= E1000_RXCSUM_IPPCSE;
4416#if __FreeBSD_version >= 800000
4417			if (adapter->hw.mac.type == e1000_82576)
4418				rxcsum |= E1000_RXCSUM_CRCOFL;
4419#endif
4420		} else
4421			rxcsum &= ~E1000_RXCSUM_TUOFL;
4422	}
4423	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4424
4425	/* Setup the Receive Control Register */
4426	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4427	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4428		   E1000_RCTL_RDMTS_HALF |
4429		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4430	/* Strip CRC bytes. */
4431	rctl |= E1000_RCTL_SECRC;
4432	/* Make sure VLAN Filters are off */
4433	rctl &= ~E1000_RCTL_VFE;
4434	/* Don't store bad packets */
4435	rctl &= ~E1000_RCTL_SBP;
4436
4437	/* Enable Receives */
4438	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4439
4440	/*
4441	 * Setup the HW Rx Head and Tail Descriptor Pointers
4442	 *   - needs to be after enable
4443	 */
4444	for (int i = 0; i < adapter->num_queues; i++) {
4445		rxr = &adapter->rx_rings[i];
4446		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4447#ifdef DEV_NETMAP
4448		/*
4449		 * an init() while a netmap client is active must
4450		 * preserve the rx buffers passed to userspace.
4451		 * In this driver it means we adjust RDT to
4452		 * somthing different from next_to_refresh
4453		 * (which is not used in netmap mode).
4454		 */
4455		if (ifp->if_capenable & IFCAP_NETMAP) {
4456			struct netmap_adapter *na = NA(adapter->ifp);
4457			struct netmap_kring *kring = &na->rx_rings[i];
4458			int t = rxr->next_to_refresh - kring->nr_hwavail;
4459
4460			if (t >= adapter->num_rx_desc)
4461				t -= adapter->num_rx_desc;
4462			else if (t < 0)
4463				t += adapter->num_rx_desc;
4464			E1000_WRITE_REG(hw, E1000_RDT(i), t);
4465		} else
4466#endif /* DEV_NETMAP */
4467		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4468	}
4469	return;
4470}
4471
4472/*********************************************************************
4473 *
4474 *  Free receive rings.
4475 *
4476 **********************************************************************/
4477static void
4478igb_free_receive_structures(struct adapter *adapter)
4479{
4480	struct rx_ring *rxr = adapter->rx_rings;
4481
4482	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4483		struct lro_ctrl	*lro = &rxr->lro;
4484		igb_free_receive_buffers(rxr);
4485		tcp_lro_free(lro);
4486		igb_dma_free(adapter, &rxr->rxdma);
4487	}
4488
4489	free(adapter->rx_rings, M_DEVBUF);
4490}
4491
4492/*********************************************************************
4493 *
4494 *  Free receive ring data structures.
4495 *
4496 **********************************************************************/
4497static void
4498igb_free_receive_buffers(struct rx_ring *rxr)
4499{
4500	struct adapter		*adapter = rxr->adapter;
4501	struct igb_rx_buf	*rxbuf;
4502	int i;
4503
4504	INIT_DEBUGOUT("free_receive_structures: begin");
4505
4506	/* Cleanup any existing buffers */
4507	if (rxr->rx_buffers != NULL) {
4508		for (i = 0; i < adapter->num_rx_desc; i++) {
4509			rxbuf = &rxr->rx_buffers[i];
4510			if (rxbuf->m_head != NULL) {
4511				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4512				    BUS_DMASYNC_POSTREAD);
4513				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4514				rxbuf->m_head->m_flags |= M_PKTHDR;
4515				m_freem(rxbuf->m_head);
4516			}
4517			if (rxbuf->m_pack != NULL) {
4518				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4519				    BUS_DMASYNC_POSTREAD);
4520				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4521				rxbuf->m_pack->m_flags |= M_PKTHDR;
4522				m_freem(rxbuf->m_pack);
4523			}
4524			rxbuf->m_head = NULL;
4525			rxbuf->m_pack = NULL;
4526			if (rxbuf->hmap != NULL) {
4527				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4528				rxbuf->hmap = NULL;
4529			}
4530			if (rxbuf->pmap != NULL) {
4531				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4532				rxbuf->pmap = NULL;
4533			}
4534		}
4535		if (rxr->rx_buffers != NULL) {
4536			free(rxr->rx_buffers, M_DEVBUF);
4537			rxr->rx_buffers = NULL;
4538		}
4539	}
4540
4541	if (rxr->htag != NULL) {
4542		bus_dma_tag_destroy(rxr->htag);
4543		rxr->htag = NULL;
4544	}
4545	if (rxr->ptag != NULL) {
4546		bus_dma_tag_destroy(rxr->ptag);
4547		rxr->ptag = NULL;
4548	}
4549}
4550
4551static __inline void
4552igb_rx_discard(struct rx_ring *rxr, int i)
4553{
4554	struct igb_rx_buf	*rbuf;
4555
4556	rbuf = &rxr->rx_buffers[i];
4557
4558	/* Partially received? Free the chain */
4559	if (rxr->fmp != NULL) {
4560		rxr->fmp->m_flags |= M_PKTHDR;
4561		m_freem(rxr->fmp);
4562		rxr->fmp = NULL;
4563		rxr->lmp = NULL;
4564	}
4565
4566	/*
4567	** With advanced descriptors the writeback
4568	** clobbers the buffer addrs, so its easier
4569	** to just free the existing mbufs and take
4570	** the normal refresh path to get new buffers
4571	** and mapping.
4572	*/
4573	if (rbuf->m_head) {
4574		m_free(rbuf->m_head);
4575		rbuf->m_head = NULL;
4576	}
4577
4578	if (rbuf->m_pack) {
4579		m_free(rbuf->m_pack);
4580		rbuf->m_pack = NULL;
4581	}
4582
4583	return;
4584}
4585
4586static __inline void
4587igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4588{
4589
4590	/*
4591	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4592	 * should be computed by hardware. Also it should not have VLAN tag in
4593	 * ethernet header.
4594	 */
4595	if (rxr->lro_enabled &&
4596	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4597	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4598	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4599	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4600	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4601	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4602		/*
4603		 * Send to the stack if:
4604		 **  - LRO not enabled, or
4605		 **  - no LRO resources, or
4606		 **  - lro enqueue fails
4607		 */
4608		if (rxr->lro.lro_cnt != 0)
4609			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4610				return;
4611	}
4612	IGB_RX_UNLOCK(rxr);
4613	(*ifp->if_input)(ifp, m);
4614	IGB_RX_LOCK(rxr);
4615}
4616
4617/*********************************************************************
4618 *
4619 *  This routine executes in interrupt context. It replenishes
4620 *  the mbufs in the descriptor and sends data which has been
4621 *  dma'ed into host memory to upper layer.
4622 *
4623 *  We loop at most count times if count is > 0, or until done if
4624 *  count < 0.
4625 *
4626 *  Return TRUE if more to clean, FALSE otherwise
4627 *********************************************************************/
4628static bool
4629igb_rxeof(struct igb_queue *que, int count, int *done)
4630{
4631	struct adapter		*adapter = que->adapter;
4632	struct rx_ring		*rxr = que->rxr;
4633	struct ifnet		*ifp = adapter->ifp;
4634	struct lro_ctrl		*lro = &rxr->lro;
4635	struct lro_entry	*queued;
4636	int			i, processed = 0, rxdone = 0;
4637	u32			ptype, staterr = 0;
4638	union e1000_adv_rx_desc	*cur;
4639
4640	IGB_RX_LOCK(rxr);
4641	/* Sync the ring. */
4642	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4643	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4644
4645#ifdef DEV_NETMAP
4646	if (ifp->if_capenable & IFCAP_NETMAP) {
4647		struct netmap_adapter *na = NA(ifp);
4648
4649		na->rx_rings[rxr->me].nr_kflags |= NKR_PENDINTR;
4650		selwakeuppri(&na->rx_rings[rxr->me].si, PI_NET);
4651		IGB_RX_UNLOCK(rxr);
4652		IGB_CORE_LOCK(adapter);
4653		selwakeuppri(&na->rx_si, PI_NET);
4654		IGB_CORE_UNLOCK(adapter);
4655		return (0);
4656	}
4657#endif /* DEV_NETMAP */
4658
4659	/* Main clean loop */
4660	for (i = rxr->next_to_check; count != 0;) {
4661		struct mbuf		*sendmp, *mh, *mp;
4662		struct igb_rx_buf	*rxbuf;
4663		u16			hlen, plen, hdr, vtag;
4664		bool			eop = FALSE;
4665
4666		cur = &rxr->rx_base[i];
4667		staterr = le32toh(cur->wb.upper.status_error);
4668		if ((staterr & E1000_RXD_STAT_DD) == 0)
4669			break;
4670		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4671			break;
4672		count--;
4673		sendmp = mh = mp = NULL;
4674		cur->wb.upper.status_error = 0;
4675		rxbuf = &rxr->rx_buffers[i];
4676		plen = le16toh(cur->wb.upper.length);
4677		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4678		if ((adapter->hw.mac.type == e1000_i350) &&
4679		    (staterr & E1000_RXDEXT_STATERR_LB))
4680			vtag = be16toh(cur->wb.upper.vlan);
4681		else
4682			vtag = le16toh(cur->wb.upper.vlan);
4683		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4684		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4685
4686		/* Make sure all segments of a bad packet are discarded */
4687		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4688		    (rxr->discard)) {
4689			ifp->if_ierrors++;
4690			++rxr->rx_discarded;
4691			if (!eop) /* Catch subsequent segs */
4692				rxr->discard = TRUE;
4693			else
4694				rxr->discard = FALSE;
4695			igb_rx_discard(rxr, i);
4696			goto next_desc;
4697		}
4698
4699		/*
4700		** The way the hardware is configured to
4701		** split, it will ONLY use the header buffer
4702		** when header split is enabled, otherwise we
4703		** get normal behavior, ie, both header and
4704		** payload are DMA'd into the payload buffer.
4705		**
4706		** The fmp test is to catch the case where a
4707		** packet spans multiple descriptors, in that
4708		** case only the first header is valid.
4709		*/
4710		if (rxr->hdr_split && rxr->fmp == NULL) {
4711			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4712			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4713			if (hlen > IGB_HDR_BUF)
4714				hlen = IGB_HDR_BUF;
4715			mh = rxr->rx_buffers[i].m_head;
4716			mh->m_len = hlen;
4717			/* clear buf pointer for refresh */
4718			rxbuf->m_head = NULL;
4719			/*
4720			** Get the payload length, this
4721			** could be zero if its a small
4722			** packet.
4723			*/
4724			if (plen > 0) {
4725				mp = rxr->rx_buffers[i].m_pack;
4726				mp->m_len = plen;
4727				mh->m_next = mp;
4728				/* clear buf pointer */
4729				rxbuf->m_pack = NULL;
4730				rxr->rx_split_packets++;
4731			}
4732		} else {
4733			/*
4734			** Either no header split, or a
4735			** secondary piece of a fragmented
4736			** split packet.
4737			*/
4738			mh = rxr->rx_buffers[i].m_pack;
4739			mh->m_len = plen;
4740			/* clear buf info for refresh */
4741			rxbuf->m_pack = NULL;
4742		}
4743
4744		++processed; /* So we know when to refresh */
4745
4746		/* Initial frame - setup */
4747		if (rxr->fmp == NULL) {
4748			mh->m_pkthdr.len = mh->m_len;
4749			/* Save the head of the chain */
4750			rxr->fmp = mh;
4751			rxr->lmp = mh;
4752			if (mp != NULL) {
4753				/* Add payload if split */
4754				mh->m_pkthdr.len += mp->m_len;
4755				rxr->lmp = mh->m_next;
4756			}
4757		} else {
4758			/* Chain mbuf's together */
4759			rxr->lmp->m_next = mh;
4760			rxr->lmp = rxr->lmp->m_next;
4761			rxr->fmp->m_pkthdr.len += mh->m_len;
4762		}
4763
4764		if (eop) {
4765			rxr->fmp->m_pkthdr.rcvif = ifp;
4766			ifp->if_ipackets++;
4767			rxr->rx_packets++;
4768			/* capture data for AIM */
4769			rxr->packets++;
4770			rxr->bytes += rxr->fmp->m_pkthdr.len;
4771			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4772
4773			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4774				igb_rx_checksum(staterr, rxr->fmp, ptype);
4775
4776			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4777			    (staterr & E1000_RXD_STAT_VP) != 0) {
4778				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4779				rxr->fmp->m_flags |= M_VLANTAG;
4780			}
4781#if __FreeBSD_version >= 800000
4782			rxr->fmp->m_pkthdr.flowid = que->msix;
4783			rxr->fmp->m_flags |= M_FLOWID;
4784#endif
4785			sendmp = rxr->fmp;
4786			/* Make sure to set M_PKTHDR. */
4787			sendmp->m_flags |= M_PKTHDR;
4788			rxr->fmp = NULL;
4789			rxr->lmp = NULL;
4790		}
4791
4792next_desc:
4793		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4794		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4795
4796		/* Advance our pointers to the next descriptor. */
4797		if (++i == adapter->num_rx_desc)
4798			i = 0;
4799		/*
4800		** Send to the stack or LRO
4801		*/
4802		if (sendmp != NULL) {
4803			rxr->next_to_check = i;
4804			igb_rx_input(rxr, ifp, sendmp, ptype);
4805			i = rxr->next_to_check;
4806			rxdone++;
4807		}
4808
4809		/* Every 8 descriptors we go to refresh mbufs */
4810		if (processed == 8) {
4811                        igb_refresh_mbufs(rxr, i);
4812                        processed = 0;
4813		}
4814	}
4815
4816	/* Catch any remainders */
4817	if (igb_rx_unrefreshed(rxr))
4818		igb_refresh_mbufs(rxr, i);
4819
4820	rxr->next_to_check = i;
4821
4822	/*
4823	 * Flush any outstanding LRO work
4824	 */
4825	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4826		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4827		tcp_lro_flush(lro, queued);
4828	}
4829
4830	if (done != NULL)
4831		*done = rxdone;
4832
4833	IGB_RX_UNLOCK(rxr);
4834	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4835}
4836
4837/*********************************************************************
4838 *
4839 *  Verify that the hardware indicated that the checksum is valid.
4840 *  Inform the stack about the status of checksum so that stack
4841 *  doesn't spend time verifying the checksum.
4842 *
4843 *********************************************************************/
4844static void
4845igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4846{
4847	u16 status = (u16)staterr;
4848	u8  errors = (u8) (staterr >> 24);
4849	int sctp;
4850
4851	/* Ignore Checksum bit is set */
4852	if (status & E1000_RXD_STAT_IXSM) {
4853		mp->m_pkthdr.csum_flags = 0;
4854		return;
4855	}
4856
4857	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4858	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4859		sctp = 1;
4860	else
4861		sctp = 0;
4862	if (status & E1000_RXD_STAT_IPCS) {
4863		/* Did it pass? */
4864		if (!(errors & E1000_RXD_ERR_IPE)) {
4865			/* IP Checksum Good */
4866			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4867			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4868		} else
4869			mp->m_pkthdr.csum_flags = 0;
4870	}
4871
4872	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4873		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4874#if __FreeBSD_version >= 800000
4875		if (sctp) /* reassign */
4876			type = CSUM_SCTP_VALID;
4877#endif
4878		/* Did it pass? */
4879		if (!(errors & E1000_RXD_ERR_TCPE)) {
4880			mp->m_pkthdr.csum_flags |= type;
4881			if (sctp == 0)
4882				mp->m_pkthdr.csum_data = htons(0xffff);
4883		}
4884	}
4885	return;
4886}
4887
4888/*
4889 * This routine is run via an vlan
4890 * config EVENT
4891 */
4892static void
4893igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4894{
4895	struct adapter	*adapter = ifp->if_softc;
4896	u32		index, bit;
4897
4898	if (ifp->if_softc !=  arg)   /* Not our event */
4899		return;
4900
4901	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4902                return;
4903
4904	IGB_CORE_LOCK(adapter);
4905	index = (vtag >> 5) & 0x7F;
4906	bit = vtag & 0x1F;
4907	adapter->shadow_vfta[index] |= (1 << bit);
4908	++adapter->num_vlans;
4909	/* Change hw filter setting */
4910	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4911		igb_setup_vlan_hw_support(adapter);
4912	IGB_CORE_UNLOCK(adapter);
4913}
4914
4915/*
4916 * This routine is run via an vlan
4917 * unconfig EVENT
4918 */
4919static void
4920igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4921{
4922	struct adapter	*adapter = ifp->if_softc;
4923	u32		index, bit;
4924
4925	if (ifp->if_softc !=  arg)
4926		return;
4927
4928	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4929                return;
4930
4931	IGB_CORE_LOCK(adapter);
4932	index = (vtag >> 5) & 0x7F;
4933	bit = vtag & 0x1F;
4934	adapter->shadow_vfta[index] &= ~(1 << bit);
4935	--adapter->num_vlans;
4936	/* Change hw filter setting */
4937	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4938		igb_setup_vlan_hw_support(adapter);
4939	IGB_CORE_UNLOCK(adapter);
4940}
4941
4942static void
4943igb_setup_vlan_hw_support(struct adapter *adapter)
4944{
4945	struct e1000_hw *hw = &adapter->hw;
4946	struct ifnet	*ifp = adapter->ifp;
4947	u32             reg;
4948
4949	if (adapter->vf_ifp) {
4950		e1000_rlpml_set_vf(hw,
4951		    adapter->max_frame_size + VLAN_TAG_SIZE);
4952		return;
4953	}
4954
4955	reg = E1000_READ_REG(hw, E1000_CTRL);
4956	reg |= E1000_CTRL_VME;
4957	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4958
4959	/* Enable the Filter Table */
4960	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4961		reg = E1000_READ_REG(hw, E1000_RCTL);
4962		reg &= ~E1000_RCTL_CFIEN;
4963		reg |= E1000_RCTL_VFE;
4964		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4965	}
4966
4967	/* Update the frame size */
4968	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4969	    adapter->max_frame_size + VLAN_TAG_SIZE);
4970
4971	/* Don't bother with table if no vlans */
4972	if ((adapter->num_vlans == 0) ||
4973	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4974                return;
4975	/*
4976	** A soft reset zero's out the VFTA, so
4977	** we need to repopulate it now.
4978	*/
4979	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4980                if (adapter->shadow_vfta[i] != 0) {
4981			if (adapter->vf_ifp)
4982				e1000_vfta_set_vf(hw,
4983				    adapter->shadow_vfta[i], TRUE);
4984			else
4985				e1000_write_vfta(hw,
4986				    i, adapter->shadow_vfta[i]);
4987		}
4988}
4989
4990static void
4991igb_enable_intr(struct adapter *adapter)
4992{
4993	/* With RSS set up what to auto clear */
4994	if (adapter->msix_mem) {
4995		u32 mask = (adapter->que_mask | adapter->link_mask);
4996		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4997		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4998		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4999		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5000		    E1000_IMS_LSC);
5001	} else {
5002		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5003		    IMS_ENABLE_MASK);
5004	}
5005	E1000_WRITE_FLUSH(&adapter->hw);
5006
5007	return;
5008}
5009
5010static void
5011igb_disable_intr(struct adapter *adapter)
5012{
5013	if (adapter->msix_mem) {
5014		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5015		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5016	}
5017	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5018	E1000_WRITE_FLUSH(&adapter->hw);
5019	return;
5020}
5021
5022/*
5023 * Bit of a misnomer, what this really means is
5024 * to enable OS management of the system... aka
5025 * to disable special hardware management features
5026 */
5027static void
5028igb_init_manageability(struct adapter *adapter)
5029{
5030	if (adapter->has_manage) {
5031		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5032		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5033
5034		/* disable hardware interception of ARP */
5035		manc &= ~(E1000_MANC_ARP_EN);
5036
5037                /* enable receiving management packets to the host */
5038		manc |= E1000_MANC_EN_MNG2HOST;
5039		manc2h |= 1 << 5;  /* Mng Port 623 */
5040		manc2h |= 1 << 6;  /* Mng Port 664 */
5041		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5042		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5043	}
5044}
5045
5046/*
5047 * Give control back to hardware management
5048 * controller if there is one.
5049 */
5050static void
5051igb_release_manageability(struct adapter *adapter)
5052{
5053	if (adapter->has_manage) {
5054		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5055
5056		/* re-enable hardware interception of ARP */
5057		manc |= E1000_MANC_ARP_EN;
5058		manc &= ~E1000_MANC_EN_MNG2HOST;
5059
5060		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5061	}
5062}
5063
5064/*
5065 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5066 * For ASF and Pass Through versions of f/w this means that
5067 * the driver is loaded.
5068 *
5069 */
5070static void
5071igb_get_hw_control(struct adapter *adapter)
5072{
5073	u32 ctrl_ext;
5074
5075	if (adapter->vf_ifp)
5076		return;
5077
5078	/* Let firmware know the driver has taken over */
5079	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5080	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5081	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5082}
5083
5084/*
5085 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5086 * For ASF and Pass Through versions of f/w this means that the
5087 * driver is no longer loaded.
5088 *
5089 */
5090static void
5091igb_release_hw_control(struct adapter *adapter)
5092{
5093	u32 ctrl_ext;
5094
5095	if (adapter->vf_ifp)
5096		return;
5097
5098	/* Let firmware taken over control of h/w */
5099	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5100	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5101	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5102}
5103
5104static int
5105igb_is_valid_ether_addr(uint8_t *addr)
5106{
5107	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5108
5109	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5110		return (FALSE);
5111	}
5112
5113	return (TRUE);
5114}
5115
5116
5117/*
5118 * Enable PCI Wake On Lan capability
5119 */
5120static void
5121igb_enable_wakeup(device_t dev)
5122{
5123	u16     cap, status;
5124	u8      id;
5125
5126	/* First find the capabilities pointer*/
5127	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5128	/* Read the PM Capabilities */
5129	id = pci_read_config(dev, cap, 1);
5130	if (id != PCIY_PMG)     /* Something wrong */
5131		return;
5132	/* OK, we have the power capabilities, so
5133	   now get the status register */
5134	cap += PCIR_POWER_STATUS;
5135	status = pci_read_config(dev, cap, 2);
5136	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5137	pci_write_config(dev, cap, status, 2);
5138	return;
5139}
5140
5141static void
5142igb_led_func(void *arg, int onoff)
5143{
5144	struct adapter	*adapter = arg;
5145
5146	IGB_CORE_LOCK(adapter);
5147	if (onoff) {
5148		e1000_setup_led(&adapter->hw);
5149		e1000_led_on(&adapter->hw);
5150	} else {
5151		e1000_led_off(&adapter->hw);
5152		e1000_cleanup_led(&adapter->hw);
5153	}
5154	IGB_CORE_UNLOCK(adapter);
5155}
5156
5157/**********************************************************************
5158 *
5159 *  Update the board statistics counters.
5160 *
5161 **********************************************************************/
5162static void
5163igb_update_stats_counters(struct adapter *adapter)
5164{
5165	struct ifnet		*ifp;
5166        struct e1000_hw		*hw = &adapter->hw;
5167	struct e1000_hw_stats	*stats;
5168
5169	/*
5170	** The virtual function adapter has only a
5171	** small controlled set of stats, do only
5172	** those and return.
5173	*/
5174	if (adapter->vf_ifp) {
5175		igb_update_vf_stats_counters(adapter);
5176		return;
5177	}
5178
5179	stats = (struct e1000_hw_stats	*)adapter->stats;
5180
5181	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
5182	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5183		stats->symerrs +=
5184		    E1000_READ_REG(hw,E1000_SYMERRS);
5185		stats->sec += E1000_READ_REG(hw, E1000_SEC);
5186	}
5187
5188	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5189	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5190	stats->scc += E1000_READ_REG(hw, E1000_SCC);
5191	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5192
5193	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5194	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5195	stats->colc += E1000_READ_REG(hw, E1000_COLC);
5196	stats->dc += E1000_READ_REG(hw, E1000_DC);
5197	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5198	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5199	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5200	/*
5201	** For watchdog management we need to know if we have been
5202	** paused during the last interval, so capture that here.
5203	*/
5204        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5205        stats->xoffrxc += adapter->pause_frames;
5206	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5207	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5208	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5209	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5210	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5211	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5212	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5213	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5214	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5215	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5216	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5217	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5218
5219	/* For the 64-bit byte counters the low dword must be read first. */
5220	/* Both registers clear on the read of the high dword */
5221
5222	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5223	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5224	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5225	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5226
5227	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5228	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5229	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5230	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5231	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5232
5233	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5234	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5235
5236	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5237	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5238	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5239	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5240	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5241	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5242	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5243	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5244	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5245	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5246
5247	/* Interrupt Counts */
5248
5249	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5250	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5251	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5252	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5253	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5254	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5255	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5256	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5257	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5258
5259	/* Host to Card Statistics */
5260
5261	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5262	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5263	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5264	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5265	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5266	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5267	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5268	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5269	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5270	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5271	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5272	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5273	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5274	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5275
5276	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5277	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5278	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5279	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5280	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5281	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5282
5283	ifp = adapter->ifp;
5284	ifp->if_collisions = stats->colc;
5285
5286	/* Rx Errors */
5287	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5288	    stats->crcerrs + stats->algnerrc +
5289	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5290
5291	/* Tx Errors */
5292	ifp->if_oerrors = stats->ecol +
5293	    stats->latecol + adapter->watchdog_events;
5294
5295	/* Driver specific counters */
5296	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5297	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5298	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5299	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5300	adapter->packet_buf_alloc_tx =
5301	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5302	adapter->packet_buf_alloc_rx =
5303	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5304}
5305
5306
5307/**********************************************************************
5308 *
5309 *  Initialize the VF board statistics counters.
5310 *
5311 **********************************************************************/
5312static void
5313igb_vf_init_stats(struct adapter *adapter)
5314{
5315        struct e1000_hw *hw = &adapter->hw;
5316	struct e1000_vf_stats	*stats;
5317
5318	stats = (struct e1000_vf_stats	*)adapter->stats;
5319	if (stats == NULL)
5320		return;
5321        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5322        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5323        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5324        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5325        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5326}
5327
5328/**********************************************************************
5329 *
5330 *  Update the VF board statistics counters.
5331 *
5332 **********************************************************************/
5333static void
5334igb_update_vf_stats_counters(struct adapter *adapter)
5335{
5336	struct e1000_hw *hw = &adapter->hw;
5337	struct e1000_vf_stats	*stats;
5338
5339	if (adapter->link_speed == 0)
5340		return;
5341
5342	stats = (struct e1000_vf_stats	*)adapter->stats;
5343
5344	UPDATE_VF_REG(E1000_VFGPRC,
5345	    stats->last_gprc, stats->gprc);
5346	UPDATE_VF_REG(E1000_VFGORC,
5347	    stats->last_gorc, stats->gorc);
5348	UPDATE_VF_REG(E1000_VFGPTC,
5349	    stats->last_gptc, stats->gptc);
5350	UPDATE_VF_REG(E1000_VFGOTC,
5351	    stats->last_gotc, stats->gotc);
5352	UPDATE_VF_REG(E1000_VFMPRC,
5353	    stats->last_mprc, stats->mprc);
5354}
5355
5356/* Export a single 32-bit register via a read-only sysctl. */
5357static int
5358igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5359{
5360	struct adapter *adapter;
5361	u_int val;
5362
5363	adapter = oidp->oid_arg1;
5364	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5365	return (sysctl_handle_int(oidp, &val, 0, req));
5366}
5367
5368/*
5369**  Tuneable interrupt rate handler
5370*/
5371static int
5372igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5373{
5374	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5375	int			error;
5376	u32			reg, usec, rate;
5377
5378	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5379	usec = ((reg & 0x7FFC) >> 2);
5380	if (usec > 0)
5381		rate = 1000000 / usec;
5382	else
5383		rate = 0;
5384	error = sysctl_handle_int(oidp, &rate, 0, req);
5385	if (error || !req->newptr)
5386		return error;
5387	return 0;
5388}
5389
5390/*
5391 * Add sysctl variables, one per statistic, to the system.
5392 */
5393static void
5394igb_add_hw_stats(struct adapter *adapter)
5395{
5396	device_t dev = adapter->dev;
5397
5398	struct tx_ring *txr = adapter->tx_rings;
5399	struct rx_ring *rxr = adapter->rx_rings;
5400
5401	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5402	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5403	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5404	struct e1000_hw_stats *stats = adapter->stats;
5405
5406	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5407	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5408
5409#define QUEUE_NAME_LEN 32
5410	char namebuf[QUEUE_NAME_LEN];
5411
5412	/* Driver Statistics */
5413	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5414			CTLFLAG_RD, &adapter->link_irq, 0,
5415			"Link MSIX IRQ Handled");
5416	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5417			CTLFLAG_RD, &adapter->dropped_pkts,
5418			"Driver dropped packets");
5419	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5420			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5421			"Driver tx dma failure in xmit");
5422	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5423			CTLFLAG_RD, &adapter->rx_overruns,
5424			"RX overruns");
5425	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5426			CTLFLAG_RD, &adapter->watchdog_events,
5427			"Watchdog timeouts");
5428
5429	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5430			CTLFLAG_RD, &adapter->device_control,
5431			"Device Control Register");
5432	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5433			CTLFLAG_RD, &adapter->rx_control,
5434			"Receiver Control Register");
5435	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5436			CTLFLAG_RD, &adapter->int_mask,
5437			"Interrupt Mask");
5438	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5439			CTLFLAG_RD, &adapter->eint_mask,
5440			"Extended Interrupt Mask");
5441	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5442			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5443			"Transmit Buffer Packet Allocation");
5444	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5445			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5446			"Receive Buffer Packet Allocation");
5447	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5448			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5449			"Flow Control High Watermark");
5450	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5451			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5452			"Flow Control Low Watermark");
5453
5454	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5455		struct lro_ctrl *lro = &rxr->lro;
5456
5457		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5458		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5459					    CTLFLAG_RD, NULL, "Queue Name");
5460		queue_list = SYSCTL_CHILDREN(queue_node);
5461
5462		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5463				CTLFLAG_RD, &adapter->queues[i],
5464				sizeof(&adapter->queues[i]),
5465				igb_sysctl_interrupt_rate_handler,
5466				"IU", "Interrupt Rate");
5467
5468		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5469				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5470				igb_sysctl_reg_handler, "IU",
5471 				"Transmit Descriptor Head");
5472		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5473				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5474				igb_sysctl_reg_handler, "IU",
5475 				"Transmit Descriptor Tail");
5476		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5477				CTLFLAG_RD, &txr->no_desc_avail,
5478				"Queue No Descriptor Available");
5479		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5480				CTLFLAG_RD, &txr->tx_packets,
5481				"Queue Packets Transmitted");
5482
5483		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5484				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5485				igb_sysctl_reg_handler, "IU",
5486				"Receive Descriptor Head");
5487		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5488				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5489				igb_sysctl_reg_handler, "IU",
5490				"Receive Descriptor Tail");
5491		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5492				CTLFLAG_RD, &rxr->rx_packets,
5493				"Queue Packets Received");
5494		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5495				CTLFLAG_RD, &rxr->rx_bytes,
5496				"Queue Bytes Received");
5497		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5498				CTLFLAG_RD, &lro->lro_queued, 0,
5499				"LRO Queued");
5500		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5501				CTLFLAG_RD, &lro->lro_flushed, 0,
5502				"LRO Flushed");
5503	}
5504
5505	/* MAC stats get their own sub node */
5506
5507	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5508				    CTLFLAG_RD, NULL, "MAC Statistics");
5509	stat_list = SYSCTL_CHILDREN(stat_node);
5510
5511	/*
5512	** VF adapter has a very limited set of stats
5513	** since its not managing the metal, so to speak.
5514	*/
5515	if (adapter->vf_ifp) {
5516	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5517			CTLFLAG_RD, &stats->gprc,
5518			"Good Packets Received");
5519	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5520			CTLFLAG_RD, &stats->gptc,
5521			"Good Packets Transmitted");
5522 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5523 			CTLFLAG_RD, &stats->gorc,
5524 			"Good Octets Received");
5525 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5526 			CTLFLAG_RD, &stats->gotc,
5527 			"Good Octets Transmitted");
5528	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5529			CTLFLAG_RD, &stats->mprc,
5530			"Multicast Packets Received");
5531		return;
5532	}
5533
5534	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5535			CTLFLAG_RD, &stats->ecol,
5536			"Excessive collisions");
5537	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5538			CTLFLAG_RD, &stats->scc,
5539			"Single collisions");
5540	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5541			CTLFLAG_RD, &stats->mcc,
5542			"Multiple collisions");
5543	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5544			CTLFLAG_RD, &stats->latecol,
5545			"Late collisions");
5546	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5547			CTLFLAG_RD, &stats->colc,
5548			"Collision Count");
5549	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5550			CTLFLAG_RD, &stats->symerrs,
5551			"Symbol Errors");
5552	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5553			CTLFLAG_RD, &stats->sec,
5554			"Sequence Errors");
5555	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5556			CTLFLAG_RD, &stats->dc,
5557			"Defer Count");
5558	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5559			CTLFLAG_RD, &stats->mpc,
5560			"Missed Packets");
5561	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5562			CTLFLAG_RD, &stats->rnbc,
5563			"Receive No Buffers");
5564	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5565			CTLFLAG_RD, &stats->ruc,
5566			"Receive Undersize");
5567	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5568			CTLFLAG_RD, &stats->rfc,
5569			"Fragmented Packets Received ");
5570	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5571			CTLFLAG_RD, &stats->roc,
5572			"Oversized Packets Received");
5573	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5574			CTLFLAG_RD, &stats->rjc,
5575			"Recevied Jabber");
5576	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5577			CTLFLAG_RD, &stats->rxerrc,
5578			"Receive Errors");
5579	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5580			CTLFLAG_RD, &stats->crcerrs,
5581			"CRC errors");
5582	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5583			CTLFLAG_RD, &stats->algnerrc,
5584			"Alignment Errors");
5585	/* On 82575 these are collision counts */
5586	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5587			CTLFLAG_RD, &stats->cexterr,
5588			"Collision/Carrier extension errors");
5589	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5590			CTLFLAG_RD, &stats->xonrxc,
5591			"XON Received");
5592	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5593			CTLFLAG_RD, &stats->xontxc,
5594			"XON Transmitted");
5595	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5596			CTLFLAG_RD, &stats->xoffrxc,
5597			"XOFF Received");
5598	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5599			CTLFLAG_RD, &stats->xofftxc,
5600			"XOFF Transmitted");
5601	/* Packet Reception Stats */
5602	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5603			CTLFLAG_RD, &stats->tpr,
5604			"Total Packets Received ");
5605	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5606			CTLFLAG_RD, &stats->gprc,
5607			"Good Packets Received");
5608	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5609			CTLFLAG_RD, &stats->bprc,
5610			"Broadcast Packets Received");
5611	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5612			CTLFLAG_RD, &stats->mprc,
5613			"Multicast Packets Received");
5614	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5615			CTLFLAG_RD, &stats->prc64,
5616			"64 byte frames received ");
5617	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5618			CTLFLAG_RD, &stats->prc127,
5619			"65-127 byte frames received");
5620	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5621			CTLFLAG_RD, &stats->prc255,
5622			"128-255 byte frames received");
5623	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5624			CTLFLAG_RD, &stats->prc511,
5625			"256-511 byte frames received");
5626	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5627			CTLFLAG_RD, &stats->prc1023,
5628			"512-1023 byte frames received");
5629	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5630			CTLFLAG_RD, &stats->prc1522,
5631			"1023-1522 byte frames received");
5632 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5633 			CTLFLAG_RD, &stats->gorc,
5634 			"Good Octets Received");
5635
5636	/* Packet Transmission Stats */
5637 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5638 			CTLFLAG_RD, &stats->gotc,
5639 			"Good Octets Transmitted");
5640	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5641			CTLFLAG_RD, &stats->tpt,
5642			"Total Packets Transmitted");
5643	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5644			CTLFLAG_RD, &stats->gptc,
5645			"Good Packets Transmitted");
5646	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5647			CTLFLAG_RD, &stats->bptc,
5648			"Broadcast Packets Transmitted");
5649	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5650			CTLFLAG_RD, &stats->mptc,
5651			"Multicast Packets Transmitted");
5652	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5653			CTLFLAG_RD, &stats->ptc64,
5654			"64 byte frames transmitted ");
5655	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5656			CTLFLAG_RD, &stats->ptc127,
5657			"65-127 byte frames transmitted");
5658	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5659			CTLFLAG_RD, &stats->ptc255,
5660			"128-255 byte frames transmitted");
5661	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5662			CTLFLAG_RD, &stats->ptc511,
5663			"256-511 byte frames transmitted");
5664	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5665			CTLFLAG_RD, &stats->ptc1023,
5666			"512-1023 byte frames transmitted");
5667	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5668			CTLFLAG_RD, &stats->ptc1522,
5669			"1024-1522 byte frames transmitted");
5670	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5671			CTLFLAG_RD, &stats->tsctc,
5672			"TSO Contexts Transmitted");
5673	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5674			CTLFLAG_RD, &stats->tsctfc,
5675			"TSO Contexts Failed");
5676
5677
5678	/* Interrupt Stats */
5679
5680	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5681				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5682	int_list = SYSCTL_CHILDREN(int_node);
5683
5684	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5685			CTLFLAG_RD, &stats->iac,
5686			"Interrupt Assertion Count");
5687
5688	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5689			CTLFLAG_RD, &stats->icrxptc,
5690			"Interrupt Cause Rx Pkt Timer Expire Count");
5691
5692	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5693			CTLFLAG_RD, &stats->icrxatc,
5694			"Interrupt Cause Rx Abs Timer Expire Count");
5695
5696	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5697			CTLFLAG_RD, &stats->ictxptc,
5698			"Interrupt Cause Tx Pkt Timer Expire Count");
5699
5700	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5701			CTLFLAG_RD, &stats->ictxatc,
5702			"Interrupt Cause Tx Abs Timer Expire Count");
5703
5704	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5705			CTLFLAG_RD, &stats->ictxqec,
5706			"Interrupt Cause Tx Queue Empty Count");
5707
5708	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5709			CTLFLAG_RD, &stats->ictxqmtc,
5710			"Interrupt Cause Tx Queue Min Thresh Count");
5711
5712	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5713			CTLFLAG_RD, &stats->icrxdmtc,
5714			"Interrupt Cause Rx Desc Min Thresh Count");
5715
5716	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5717			CTLFLAG_RD, &stats->icrxoc,
5718			"Interrupt Cause Receiver Overrun Count");
5719
5720	/* Host to Card Stats */
5721
5722	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5723				    CTLFLAG_RD, NULL,
5724				    "Host to Card Statistics");
5725
5726	host_list = SYSCTL_CHILDREN(host_node);
5727
5728	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5729			CTLFLAG_RD, &stats->cbtmpc,
5730			"Circuit Breaker Tx Packet Count");
5731
5732	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5733			CTLFLAG_RD, &stats->htdpmc,
5734			"Host Transmit Discarded Packets");
5735
5736	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5737			CTLFLAG_RD, &stats->rpthc,
5738			"Rx Packets To Host");
5739
5740	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5741			CTLFLAG_RD, &stats->cbrmpc,
5742			"Circuit Breaker Rx Packet Count");
5743
5744	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5745			CTLFLAG_RD, &stats->cbrdpc,
5746			"Circuit Breaker Rx Dropped Count");
5747
5748	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5749			CTLFLAG_RD, &stats->hgptc,
5750			"Host Good Packets Tx Count");
5751
5752	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5753			CTLFLAG_RD, &stats->htcbdpc,
5754			"Host Tx Circuit Breaker Dropped Count");
5755
5756	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5757			CTLFLAG_RD, &stats->hgorc,
5758			"Host Good Octets Received Count");
5759
5760	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5761			CTLFLAG_RD, &stats->hgotc,
5762			"Host Good Octets Transmit Count");
5763
5764	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5765			CTLFLAG_RD, &stats->lenerrs,
5766			"Length Errors");
5767
5768	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5769			CTLFLAG_RD, &stats->scvpc,
5770			"SerDes/SGMII Code Violation Pkt Count");
5771
5772	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5773			CTLFLAG_RD, &stats->hrmpc,
5774			"Header Redirection Missed Packet Count");
5775}
5776
5777
5778/**********************************************************************
5779 *
5780 *  This routine provides a way to dump out the adapter eeprom,
5781 *  often a useful debug/service tool. This only dumps the first
5782 *  32 words, stuff that matters is in that extent.
5783 *
5784 **********************************************************************/
5785static int
5786igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5787{
5788	struct adapter *adapter;
5789	int error;
5790	int result;
5791
5792	result = -1;
5793	error = sysctl_handle_int(oidp, &result, 0, req);
5794
5795	if (error || !req->newptr)
5796		return (error);
5797
5798	/*
5799	 * This value will cause a hex dump of the
5800	 * first 32 16-bit words of the EEPROM to
5801	 * the screen.
5802	 */
5803	if (result == 1) {
5804		adapter = (struct adapter *)arg1;
5805		igb_print_nvm_info(adapter);
5806        }
5807
5808	return (error);
5809}
5810
5811static void
5812igb_print_nvm_info(struct adapter *adapter)
5813{
5814	u16	eeprom_data;
5815	int	i, j, row = 0;
5816
5817	/* Its a bit crude, but it gets the job done */
5818	printf("\nInterface EEPROM Dump:\n");
5819	printf("Offset\n0x0000  ");
5820	for (i = 0, j = 0; i < 32; i++, j++) {
5821		if (j == 8) { /* Make the offset block */
5822			j = 0; ++row;
5823			printf("\n0x00%x0  ",row);
5824		}
5825		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5826		printf("%04x ", eeprom_data);
5827	}
5828	printf("\n");
5829}
5830
5831static void
5832igb_set_sysctl_value(struct adapter *adapter, const char *name,
5833	const char *description, int *limit, int value)
5834{
5835	*limit = value;
5836	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5837	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5838	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5839}
5840
5841/*
5842** Set flow control using sysctl:
5843** Flow control values:
5844** 	0 - off
5845**	1 - rx pause
5846**	2 - tx pause
5847**	3 - full
5848*/
5849static int
5850igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5851{
5852	int		error;
5853	static int	input = 3; /* default is full */
5854	struct adapter	*adapter = (struct adapter *) arg1;
5855
5856	error = sysctl_handle_int(oidp, &input, 0, req);
5857
5858	if ((error) || (req->newptr == NULL))
5859		return (error);
5860
5861	switch (input) {
5862		case e1000_fc_rx_pause:
5863		case e1000_fc_tx_pause:
5864		case e1000_fc_full:
5865		case e1000_fc_none:
5866			adapter->hw.fc.requested_mode = input;
5867			adapter->fc = input;
5868			break;
5869		default:
5870			/* Do nothing */
5871			return (error);
5872	}
5873
5874	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5875	e1000_force_mac_fc(&adapter->hw);
5876	return (error);
5877}
5878
5879/*
5880** Manage DMA Coalesce:
5881** Control values:
5882** 	0/1 - off/on
5883**	Legal timer values are:
5884**	250,500,1000-10000 in thousands
5885*/
5886static int
5887igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5888{
5889	struct adapter *adapter = (struct adapter *) arg1;
5890	int		error;
5891
5892	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5893
5894	if ((error) || (req->newptr == NULL))
5895		return (error);
5896
5897	switch (adapter->dmac) {
5898		case 0:
5899			/*Disabling */
5900			break;
5901		case 1: /* Just enable and use default */
5902			adapter->dmac = 1000;
5903			break;
5904		case 250:
5905		case 500:
5906		case 1000:
5907		case 2000:
5908		case 3000:
5909		case 4000:
5910		case 5000:
5911		case 6000:
5912		case 7000:
5913		case 8000:
5914		case 9000:
5915		case 10000:
5916			/* Legal values - allow */
5917			break;
5918		default:
5919			/* Do nothing, illegal value */
5920			adapter->dmac = 0;
5921			return (error);
5922	}
5923	/* Reinit the interface */
5924	igb_init(adapter);
5925	return (error);
5926}
5927