if_igb.c revision 203051
1/******************************************************************************
2
3  Copyright (c) 2001-2010, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 203051 2010-01-26 23:04:13Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#if __FreeBSD_version >= 800000
44#include <sys/buf_ring.h>
45#endif
46#include <sys/bus.h>
47#include <sys/endian.h>
48#include <sys/kernel.h>
49#include <sys/kthread.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/rman.h>
54#include <sys/socket.h>
55#include <sys/sockio.h>
56#include <sys/sysctl.h>
57#include <sys/taskqueue.h>
58#include <sys/eventhandler.h>
59#include <sys/pcpu.h>
60#include <sys/smp.h>
61#include <machine/smp.h>
62#include <machine/bus.h>
63#include <machine/resource.h>
64
65#ifdef IGB_IEEE1588
66#include <sys/ieee1588.h>
67#endif
68
69#include <net/bpf.h>
70#include <net/ethernet.h>
71#include <net/if.h>
72#include <net/if_arp.h>
73#include <net/if_dl.h>
74#include <net/if_media.h>
75
76#include <net/if_types.h>
77#include <net/if_vlan_var.h>
78
79#include <netinet/in_systm.h>
80#include <netinet/in.h>
81#include <netinet/if_ether.h>
82#include <netinet/ip.h>
83#include <netinet/ip6.h>
84#include <netinet/tcp.h>
85#include <netinet/tcp_lro.h>
86#include <netinet/udp.h>
87
88#include <machine/in_cksum.h>
89#include <dev/pci/pcivar.h>
90#include <dev/pci/pcireg.h>
91
92#include "e1000_api.h"
93#include "e1000_82575.h"
94#include "if_igb.h"
95
96/*********************************************************************
97 *  Set this to one to display debug statistics
98 *********************************************************************/
99int	igb_display_debug_stats = 0;
100
101/*********************************************************************
102 *  Driver version:
103 *********************************************************************/
104char igb_driver_version[] = "version - 1.9.1";
105
106
107/*********************************************************************
108 *  PCI Device ID Table
109 *
110 *  Used by probe to select devices to load on
111 *  Last field stores an index into e1000_strings
112 *  Last entry must be all 0s
113 *
114 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115 *********************************************************************/
116
117static igb_vendor_info_t igb_vendor_info_array[] =
118{
119	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
132						PCI_ANY_ID, PCI_ANY_ID, 0},
133	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
134	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
138						PCI_ANY_ID, PCI_ANY_ID, 0},
139	/* required last entry */
140	{ 0, 0, 0, 0, 0}
141};
142
143/*********************************************************************
144 *  Table of branding strings for all supported NICs.
145 *********************************************************************/
146
147static char *igb_strings[] = {
148	"Intel(R) PRO/1000 Network Connection"
149};
150
151/*********************************************************************
152 *  Function prototypes
153 *********************************************************************/
154static int	igb_probe(device_t);
155static int	igb_attach(device_t);
156static int	igb_detach(device_t);
157static int	igb_shutdown(device_t);
158static int	igb_suspend(device_t);
159static int	igb_resume(device_t);
160static void	igb_start(struct ifnet *);
161static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
162#if __FreeBSD_version >= 800000
163static int	igb_mq_start(struct ifnet *, struct mbuf *);
164static int	igb_mq_start_locked(struct ifnet *,
165		    struct tx_ring *, struct mbuf *);
166static void	igb_qflush(struct ifnet *);
167#endif
168static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
169static void	igb_init(void *);
170static void	igb_init_locked(struct adapter *);
171static void	igb_stop(void *);
172static void	igb_media_status(struct ifnet *, struct ifmediareq *);
173static int	igb_media_change(struct ifnet *);
174static void	igb_identify_hardware(struct adapter *);
175static int	igb_allocate_pci_resources(struct adapter *);
176static int	igb_allocate_msix(struct adapter *);
177static int	igb_allocate_legacy(struct adapter *);
178static int	igb_setup_msix(struct adapter *);
179static void	igb_free_pci_resources(struct adapter *);
180static void	igb_local_timer(void *);
181static void	igb_reset(struct adapter *);
182static void	igb_setup_interface(device_t, struct adapter *);
183static int	igb_allocate_queues(struct adapter *);
184static void	igb_configure_queues(struct adapter *);
185
186static int	igb_allocate_transmit_buffers(struct tx_ring *);
187static void	igb_setup_transmit_structures(struct adapter *);
188static void	igb_setup_transmit_ring(struct tx_ring *);
189static void	igb_initialize_transmit_units(struct adapter *);
190static void	igb_free_transmit_structures(struct adapter *);
191static void	igb_free_transmit_buffers(struct tx_ring *);
192
193static int	igb_allocate_receive_buffers(struct rx_ring *);
194static int	igb_setup_receive_structures(struct adapter *);
195static int	igb_setup_receive_ring(struct rx_ring *);
196static void	igb_initialize_receive_units(struct adapter *);
197static void	igb_free_receive_structures(struct adapter *);
198static void	igb_free_receive_buffers(struct rx_ring *);
199static void	igb_free_receive_ring(struct rx_ring *);
200
201static void	igb_enable_intr(struct adapter *);
202static void	igb_disable_intr(struct adapter *);
203static void	igb_update_stats_counters(struct adapter *);
204static bool	igb_txeof(struct tx_ring *);
205
206static __inline	void igb_rx_discard(struct rx_ring *,
207		    union e1000_adv_rx_desc *, int);
208static __inline void igb_rx_input(struct rx_ring *,
209		    struct ifnet *, struct mbuf *, u32);
210
211static bool	igb_rxeof(struct rx_ring *, int);
212static void	igb_rx_checksum(u32, struct mbuf *, u32);
213static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
214static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
215static void	igb_set_promisc(struct adapter *);
216static void	igb_disable_promisc(struct adapter *);
217static void	igb_set_multi(struct adapter *);
218static void	igb_print_hw_stats(struct adapter *);
219static void	igb_update_link_status(struct adapter *);
220static int	igb_get_buf(struct rx_ring *, int, u8);
221
222static void	igb_register_vlan(void *, struct ifnet *, u16);
223static void	igb_unregister_vlan(void *, struct ifnet *, u16);
224static void	igb_setup_vlan_hw_support(struct adapter *);
225
226static int	igb_xmit(struct tx_ring *, struct mbuf **);
227static int	igb_dma_malloc(struct adapter *, bus_size_t,
228		    struct igb_dma_alloc *, int);
229static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
230static void	igb_print_debug_info(struct adapter *);
231static void	igb_print_nvm_info(struct adapter *);
232static int 	igb_is_valid_ether_addr(u8 *);
233static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
234static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
235/* Management and WOL Support */
236static void	igb_init_manageability(struct adapter *);
237static void	igb_release_manageability(struct adapter *);
238static void     igb_get_hw_control(struct adapter *);
239static void     igb_release_hw_control(struct adapter *);
240static void     igb_enable_wakeup(device_t);
241
242static int	igb_irq_fast(void *);
243static void	igb_add_rx_process_limit(struct adapter *, const char *,
244		    const char *, int *, int);
245static void	igb_handle_rxtx(void *context, int pending);
246static void	igb_handle_que(void *context, int pending);
247
248/* These are MSIX only irq handlers */
249static void	igb_msix_que(void *);
250static void	igb_msix_link(void *);
251
252/*********************************************************************
253 *  FreeBSD Device Interface Entry Points
254 *********************************************************************/
255
256static device_method_t igb_methods[] = {
257	/* Device interface */
258	DEVMETHOD(device_probe, igb_probe),
259	DEVMETHOD(device_attach, igb_attach),
260	DEVMETHOD(device_detach, igb_detach),
261	DEVMETHOD(device_shutdown, igb_shutdown),
262	DEVMETHOD(device_suspend, igb_suspend),
263	DEVMETHOD(device_resume, igb_resume),
264	{0, 0}
265};
266
267static driver_t igb_driver = {
268	"igb", igb_methods, sizeof(struct adapter),
269};
270
271static devclass_t igb_devclass;
272DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
273MODULE_DEPEND(igb, pci, 1, 1, 1);
274MODULE_DEPEND(igb, ether, 1, 1, 1);
275
276/*********************************************************************
277 *  Tunable default values.
278 *********************************************************************/
279
280/* Descriptor defaults */
281static int igb_rxd = IGB_DEFAULT_RXD;
282static int igb_txd = IGB_DEFAULT_TXD;
283TUNABLE_INT("hw.igb.rxd", &igb_rxd);
284TUNABLE_INT("hw.igb.txd", &igb_txd);
285
286/*
287** AIM: Adaptive Interrupt Moderation
288** which means that the interrupt rate
289** is varied over time based on the
290** traffic for that interrupt vector
291*/
292static int igb_enable_aim = TRUE;
293TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
294
295/*
296 * MSIX should be the default for best performance,
297 * but this allows it to be forced off for testing.
298 */
299static int igb_enable_msix = 1;
300TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
301
302/*
303 * Header split has seemed to be beneficial in
304 * many circumstances tested, however there have
305 * been some stability issues, so the default is
306 * off.
307 */
308static bool igb_header_split = FALSE;
309TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
310
311/*
312** This will autoconfigure based on
313** the number of CPUs if left at 0.
314*/
315static int igb_num_queues = 0;
316TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
317
318/* How many packets rxeof tries to clean at a time */
319static int igb_rx_process_limit = 100;
320TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
321
322/* Flow control setting - default to FULL */
323static int igb_fc_setting = e1000_fc_full;
324TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
325
326/*
327** Shadow VFTA table, this is needed because
328** the real filter table gets cleared during
329** a soft reset and the driver needs to be able
330** to repopulate it.
331*/
332static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
333
334
335/*********************************************************************
336 *  Device identification routine
337 *
338 *  igb_probe determines if the driver should be loaded on
339 *  adapter based on PCI vendor/device id of the adapter.
340 *
341 *  return BUS_PROBE_DEFAULT on success, positive on failure
342 *********************************************************************/
343
344static int
345igb_probe(device_t dev)
346{
347	char		adapter_name[60];
348	uint16_t	pci_vendor_id = 0;
349	uint16_t	pci_device_id = 0;
350	uint16_t	pci_subvendor_id = 0;
351	uint16_t	pci_subdevice_id = 0;
352	igb_vendor_info_t *ent;
353
354	INIT_DEBUGOUT("igb_probe: begin");
355
356	pci_vendor_id = pci_get_vendor(dev);
357	if (pci_vendor_id != IGB_VENDOR_ID)
358		return (ENXIO);
359
360	pci_device_id = pci_get_device(dev);
361	pci_subvendor_id = pci_get_subvendor(dev);
362	pci_subdevice_id = pci_get_subdevice(dev);
363
364	ent = igb_vendor_info_array;
365	while (ent->vendor_id != 0) {
366		if ((pci_vendor_id == ent->vendor_id) &&
367		    (pci_device_id == ent->device_id) &&
368
369		    ((pci_subvendor_id == ent->subvendor_id) ||
370		    (ent->subvendor_id == PCI_ANY_ID)) &&
371
372		    ((pci_subdevice_id == ent->subdevice_id) ||
373		    (ent->subdevice_id == PCI_ANY_ID))) {
374			sprintf(adapter_name, "%s %s",
375				igb_strings[ent->index],
376				igb_driver_version);
377			device_set_desc_copy(dev, adapter_name);
378			return (BUS_PROBE_DEFAULT);
379		}
380		ent++;
381	}
382
383	return (ENXIO);
384}
385
386/*********************************************************************
387 *  Device initialization routine
388 *
389 *  The attach entry point is called when the driver is being loaded.
390 *  This routine identifies the type of hardware, allocates all resources
391 *  and initializes the hardware.
392 *
393 *  return 0 on success, positive on failure
394 *********************************************************************/
395
396static int
397igb_attach(device_t dev)
398{
399	struct adapter	*adapter;
400	int		error = 0;
401	u16		eeprom_data;
402
403	INIT_DEBUGOUT("igb_attach: begin");
404
405	adapter = device_get_softc(dev);
406	adapter->dev = adapter->osdep.dev = dev;
407	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
408
409	/* SYSCTL stuff */
410	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
411	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
412	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
413	    igb_sysctl_debug_info, "I", "Debug Information");
414
415	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
416	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
417	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
418	    igb_sysctl_stats, "I", "Statistics");
419
420	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
421	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
422	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
423	    &igb_fc_setting, 0, "Flow Control");
424
425	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
426	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
427	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
428	    &igb_enable_aim, 1, "Interrupt Moderation");
429
430	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
431
432	/* Determine hardware and mac info */
433	igb_identify_hardware(adapter);
434
435	/* Setup PCI resources */
436	if (igb_allocate_pci_resources(adapter)) {
437		device_printf(dev, "Allocation of PCI resources failed\n");
438		error = ENXIO;
439		goto err_pci;
440	}
441
442	/* Do Shared Code initialization */
443	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
444		device_printf(dev, "Setup of Shared code failed\n");
445		error = ENXIO;
446		goto err_pci;
447	}
448
449	e1000_get_bus_info(&adapter->hw);
450
451	/* Sysctls for limiting the amount of work done in the taskqueue */
452	igb_add_rx_process_limit(adapter, "rx_processing_limit",
453	    "max number of rx packets to process", &adapter->rx_process_limit,
454	    igb_rx_process_limit);
455
456	/*
457	 * Validate number of transmit and receive descriptors. It
458	 * must not exceed hardware maximum, and must be multiple
459	 * of E1000_DBA_ALIGN.
460	 */
461	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
462	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
463		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
464		    IGB_DEFAULT_TXD, igb_txd);
465		adapter->num_tx_desc = IGB_DEFAULT_TXD;
466	} else
467		adapter->num_tx_desc = igb_txd;
468	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
469	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
470		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
471		    IGB_DEFAULT_RXD, igb_rxd);
472		adapter->num_rx_desc = IGB_DEFAULT_RXD;
473	} else
474		adapter->num_rx_desc = igb_rxd;
475
476	adapter->hw.mac.autoneg = DO_AUTO_NEG;
477	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
478	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
479
480	/* Copper options */
481	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
482		adapter->hw.phy.mdix = AUTO_ALL_MODES;
483		adapter->hw.phy.disable_polarity_correction = FALSE;
484		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
485	}
486
487	/*
488	 * Set the frame limits assuming
489	 * standard ethernet sized frames.
490	 */
491	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
492	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
493
494	/*
495	** Allocate and Setup Queues
496	*/
497	if (igb_allocate_queues(adapter)) {
498		error = ENOMEM;
499		goto err_pci;
500	}
501
502	/*
503	** Start from a known state, this is
504	** important in reading the nvm and
505	** mac from that.
506	*/
507	e1000_reset_hw(&adapter->hw);
508
509	/* Make sure we have a good EEPROM before we read from it */
510	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
511		/*
512		** Some PCI-E parts fail the first check due to
513		** the link being in sleep state, call it again,
514		** if it fails a second time its a real issue.
515		*/
516		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
517			device_printf(dev,
518			    "The EEPROM Checksum Is Not Valid\n");
519			error = EIO;
520			goto err_late;
521		}
522	}
523
524	/*
525	** Copy the permanent MAC address out of the EEPROM
526	*/
527	if (e1000_read_mac_addr(&adapter->hw) < 0) {
528		device_printf(dev, "EEPROM read error while reading MAC"
529		    " address\n");
530		error = EIO;
531		goto err_late;
532	}
533	/* Check its sanity */
534	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
535		device_printf(dev, "Invalid MAC address\n");
536		error = EIO;
537		goto err_late;
538	}
539
540	/*
541	** Configure Interrupts
542	*/
543	if ((adapter->msix > 1) && (igb_enable_msix))
544		error = igb_allocate_msix(adapter);
545	else /* MSI or Legacy */
546		error = igb_allocate_legacy(adapter);
547	if (error)
548		goto err_late;
549
550	/* Setup OS specific network interface */
551	igb_setup_interface(dev, adapter);
552
553	/* Now get a good starting state */
554	igb_reset(adapter);
555
556	/* Initialize statistics */
557	igb_update_stats_counters(adapter);
558
559	adapter->hw.mac.get_link_status = 1;
560	igb_update_link_status(adapter);
561
562	/* Indicate SOL/IDER usage */
563	if (e1000_check_reset_block(&adapter->hw))
564		device_printf(dev,
565		    "PHY reset is blocked due to SOL/IDER session.\n");
566
567	/* Determine if we have to control management hardware */
568	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
569
570	/*
571	 * Setup Wake-on-Lan
572	 */
573	/* APME bit in EEPROM is mapped to WUC.APME */
574	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
575	if (eeprom_data)
576		adapter->wol = E1000_WUFC_MAG;
577
578	/* Register for VLAN events */
579	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
580	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
581	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
582	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
583
584	/* Tell the stack that the interface is not active */
585	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
586
587	INIT_DEBUGOUT("igb_attach: end");
588
589	return (0);
590
591err_late:
592	igb_free_transmit_structures(adapter);
593	igb_free_receive_structures(adapter);
594	igb_release_hw_control(adapter);
595err_pci:
596	igb_free_pci_resources(adapter);
597	IGB_CORE_LOCK_DESTROY(adapter);
598
599	return (error);
600}
601
602/*********************************************************************
603 *  Device removal routine
604 *
605 *  The detach entry point is called when the driver is being removed.
606 *  This routine stops the adapter and deallocates all the resources
607 *  that were allocated for driver operation.
608 *
609 *  return 0 on success, positive on failure
610 *********************************************************************/
611
612static int
613igb_detach(device_t dev)
614{
615	struct adapter	*adapter = device_get_softc(dev);
616	struct ifnet	*ifp = adapter->ifp;
617
618	INIT_DEBUGOUT("igb_detach: begin");
619
620	/* Make sure VLANS are not using driver */
621	if (adapter->ifp->if_vlantrunk != NULL) {
622		device_printf(dev,"Vlan in use, detach first\n");
623		return (EBUSY);
624	}
625
626	IGB_CORE_LOCK(adapter);
627	adapter->in_detach = 1;
628	igb_stop(adapter);
629	IGB_CORE_UNLOCK(adapter);
630
631	e1000_phy_hw_reset(&adapter->hw);
632
633	/* Give control back to firmware */
634	igb_release_manageability(adapter);
635	igb_release_hw_control(adapter);
636
637	if (adapter->wol) {
638		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
639		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
640		igb_enable_wakeup(dev);
641	}
642
643	/* Unregister VLAN events */
644	if (adapter->vlan_attach != NULL)
645		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
646	if (adapter->vlan_detach != NULL)
647		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
648
649	ether_ifdetach(adapter->ifp);
650
651	callout_drain(&adapter->timer);
652
653	igb_free_pci_resources(adapter);
654	bus_generic_detach(dev);
655	if_free(ifp);
656
657	igb_free_transmit_structures(adapter);
658	igb_free_receive_structures(adapter);
659
660	IGB_CORE_LOCK_DESTROY(adapter);
661
662	return (0);
663}
664
665/*********************************************************************
666 *
667 *  Shutdown entry point
668 *
669 **********************************************************************/
670
671static int
672igb_shutdown(device_t dev)
673{
674	return igb_suspend(dev);
675}
676
677/*
678 * Suspend/resume device methods.
679 */
680static int
681igb_suspend(device_t dev)
682{
683	struct adapter *adapter = device_get_softc(dev);
684
685	IGB_CORE_LOCK(adapter);
686
687	igb_stop(adapter);
688
689        igb_release_manageability(adapter);
690	igb_release_hw_control(adapter);
691
692        if (adapter->wol) {
693                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
694                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
695                igb_enable_wakeup(dev);
696        }
697
698	IGB_CORE_UNLOCK(adapter);
699
700	return bus_generic_suspend(dev);
701}
702
703static int
704igb_resume(device_t dev)
705{
706	struct adapter *adapter = device_get_softc(dev);
707	struct ifnet *ifp = adapter->ifp;
708
709	IGB_CORE_LOCK(adapter);
710	igb_init_locked(adapter);
711	igb_init_manageability(adapter);
712
713	if ((ifp->if_flags & IFF_UP) &&
714	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
715		igb_start(ifp);
716
717	IGB_CORE_UNLOCK(adapter);
718
719	return bus_generic_resume(dev);
720}
721
722
723/*********************************************************************
724 *  Transmit entry point
725 *
726 *  igb_start is called by the stack to initiate a transmit.
727 *  The driver will remain in this routine as long as there are
728 *  packets to transmit and transmit resources are available.
729 *  In case resources are not available stack is notified and
730 *  the packet is requeued.
731 **********************************************************************/
732
733static void
734igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
735{
736	struct adapter	*adapter = ifp->if_softc;
737	struct mbuf	*m_head;
738
739	IGB_TX_LOCK_ASSERT(txr);
740
741	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
742	    IFF_DRV_RUNNING)
743		return;
744	if (!adapter->link_active)
745		return;
746
747	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
748
749		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
750		if (m_head == NULL)
751			break;
752		/*
753		 *  Encapsulation can modify our pointer, and or make it
754		 *  NULL on failure.  In that event, we can't requeue.
755		 */
756		if (igb_xmit(txr, &m_head)) {
757			if (m_head == NULL)
758				break;
759			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
760			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
761			break;
762		}
763
764		/* Send a copy of the frame to the BPF listener */
765		ETHER_BPF_MTAP(ifp, m_head);
766
767		/* Set watchdog on */
768		txr->watchdog_check = TRUE;
769	}
770}
771
772/*
773 * Legacy TX driver routine, called from the
774 * stack, always uses tx[0], and spins for it.
775 * Should not be used with multiqueue tx
776 */
777static void
778igb_start(struct ifnet *ifp)
779{
780	struct adapter	*adapter = ifp->if_softc;
781	struct tx_ring	*txr = adapter->tx_rings;
782
783	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
784		IGB_TX_LOCK(txr);
785		igb_start_locked(txr, ifp);
786		IGB_TX_UNLOCK(txr);
787	}
788	return;
789}
790
791#if __FreeBSD_version >= 800000
792/*
793** Multiqueue Transmit driver
794**
795*/
796static int
797igb_mq_start(struct ifnet *ifp, struct mbuf *m)
798{
799	struct adapter	*adapter = ifp->if_softc;
800	struct tx_ring	*txr;
801	int 		i = 0, err = 0;
802
803	/* Which queue to use */
804	if ((m->m_flags & M_FLOWID) != 0)
805		i = m->m_pkthdr.flowid % adapter->num_queues;
806	txr = &adapter->tx_rings[i];
807
808	if (IGB_TX_TRYLOCK(txr)) {
809		err = igb_mq_start_locked(ifp, txr, m);
810		IGB_TX_UNLOCK(txr);
811	} else
812		err = drbr_enqueue(ifp, txr->br, m);
813
814	return (err);
815}
816
817static int
818igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
819{
820	struct adapter  *adapter = txr->adapter;
821        struct mbuf     *next;
822        int             err = 0, enq;
823
824	IGB_TX_LOCK_ASSERT(txr);
825
826	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
827	    IFF_DRV_RUNNING || adapter->link_active == 0) {
828		if (m != NULL)
829			err = drbr_enqueue(ifp, txr->br, m);
830		return (err);
831	}
832
833	enq = 0;
834	if (m == NULL)
835		next = drbr_dequeue(ifp, txr->br);
836	else
837		next = m;
838	/* Process the queue */
839	while (next != NULL) {
840		if ((err = igb_xmit(txr, &next)) != 0) {
841			if (next != NULL)
842				err = drbr_enqueue(ifp, txr->br, next);
843			break;
844		}
845		enq++;
846		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
847		ETHER_BPF_MTAP(ifp, next);
848		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
849			break;
850		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
851			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
852			break;
853		}
854		next = drbr_dequeue(ifp, txr->br);
855	}
856	if (enq > 0) {
857		/* Set the watchdog */
858		txr->watchdog_check = TRUE;
859	}
860	return (err);
861}
862
863/*
864** Flush all ring buffers
865*/
866static void
867igb_qflush(struct ifnet *ifp)
868{
869	struct adapter	*adapter = ifp->if_softc;
870	struct tx_ring	*txr = adapter->tx_rings;
871	struct mbuf	*m;
872
873	for (int i = 0; i < adapter->num_queues; i++, txr++) {
874		IGB_TX_LOCK(txr);
875		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
876			m_freem(m);
877		IGB_TX_UNLOCK(txr);
878	}
879	if_qflush(ifp);
880}
881#endif /* __FreeBSD_version >= 800000 */
882
883/*********************************************************************
884 *  Ioctl entry point
885 *
886 *  igb_ioctl is called when the user wants to configure the
887 *  interface.
888 *
889 *  return 0 on success, positive on failure
890 **********************************************************************/
891
892static int
893igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
894{
895	struct adapter	*adapter = ifp->if_softc;
896	struct ifreq *ifr = (struct ifreq *)data;
897#ifdef INET
898	struct ifaddr *ifa = (struct ifaddr *)data;
899#endif
900	int error = 0;
901
902	if (adapter->in_detach)
903		return (error);
904
905	switch (command) {
906	case SIOCSIFADDR:
907#ifdef INET
908		if (ifa->ifa_addr->sa_family == AF_INET) {
909			/*
910			 * XXX
911			 * Since resetting hardware takes a very long time
912			 * and results in link renegotiation we only
913			 * initialize the hardware only when it is absolutely
914			 * required.
915			 */
916			ifp->if_flags |= IFF_UP;
917			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
918				IGB_CORE_LOCK(adapter);
919				igb_init_locked(adapter);
920				IGB_CORE_UNLOCK(adapter);
921			}
922			if (!(ifp->if_flags & IFF_NOARP))
923				arp_ifinit(ifp, ifa);
924		} else
925#endif
926			error = ether_ioctl(ifp, command, data);
927		break;
928	case SIOCSIFMTU:
929	    {
930		int max_frame_size;
931
932		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
933
934		IGB_CORE_LOCK(adapter);
935		max_frame_size = 9234;
936		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
937		    ETHER_CRC_LEN) {
938			IGB_CORE_UNLOCK(adapter);
939			error = EINVAL;
940			break;
941		}
942
943		ifp->if_mtu = ifr->ifr_mtu;
944		adapter->max_frame_size =
945		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
946		igb_init_locked(adapter);
947		IGB_CORE_UNLOCK(adapter);
948		break;
949	    }
950	case SIOCSIFFLAGS:
951		IOCTL_DEBUGOUT("ioctl rcv'd:\
952		    SIOCSIFFLAGS (Set Interface Flags)");
953		IGB_CORE_LOCK(adapter);
954		if (ifp->if_flags & IFF_UP) {
955			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
956				if ((ifp->if_flags ^ adapter->if_flags) &
957				    (IFF_PROMISC | IFF_ALLMULTI)) {
958					igb_disable_promisc(adapter);
959					igb_set_promisc(adapter);
960				}
961			} else
962				igb_init_locked(adapter);
963		} else
964			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
965				igb_stop(adapter);
966		adapter->if_flags = ifp->if_flags;
967		IGB_CORE_UNLOCK(adapter);
968		break;
969	case SIOCADDMULTI:
970	case SIOCDELMULTI:
971		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
972		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
973			IGB_CORE_LOCK(adapter);
974			igb_disable_intr(adapter);
975			igb_set_multi(adapter);
976				igb_enable_intr(adapter);
977			IGB_CORE_UNLOCK(adapter);
978		}
979		break;
980	case SIOCSIFMEDIA:
981		/* Check SOL/IDER usage */
982		IGB_CORE_LOCK(adapter);
983		if (e1000_check_reset_block(&adapter->hw)) {
984			IGB_CORE_UNLOCK(adapter);
985			device_printf(adapter->dev, "Media change is"
986			    " blocked due to SOL/IDER session.\n");
987			break;
988		}
989		IGB_CORE_UNLOCK(adapter);
990	case SIOCGIFMEDIA:
991		IOCTL_DEBUGOUT("ioctl rcv'd: \
992		    SIOCxIFMEDIA (Get/Set Interface Media)");
993		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
994		break;
995	case SIOCSIFCAP:
996	    {
997		int mask, reinit;
998
999		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1000		reinit = 0;
1001		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1002		if (mask & IFCAP_HWCSUM) {
1003			ifp->if_capenable ^= IFCAP_HWCSUM;
1004			reinit = 1;
1005		}
1006		if (mask & IFCAP_TSO4) {
1007			ifp->if_capenable ^= IFCAP_TSO4;
1008			reinit = 1;
1009		}
1010		if (mask & IFCAP_VLAN_HWTAGGING) {
1011			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1012			reinit = 1;
1013		}
1014		if (mask & IFCAP_LRO) {
1015			ifp->if_capenable ^= IFCAP_LRO;
1016			reinit = 1;
1017		}
1018		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1019			igb_init(adapter);
1020		VLAN_CAPABILITIES(ifp);
1021		break;
1022	    }
1023
1024	default:
1025		error = ether_ioctl(ifp, command, data);
1026		break;
1027	}
1028
1029	return (error);
1030}
1031
1032
1033/*********************************************************************
1034 *  Init entry point
1035 *
1036 *  This routine is used in two ways. It is used by the stack as
1037 *  init entry point in network interface structure. It is also used
1038 *  by the driver as a hw/sw initialization routine to get to a
1039 *  consistent state.
1040 *
1041 *  return 0 on success, positive on failure
1042 **********************************************************************/
1043
1044static void
1045igb_init_locked(struct adapter *adapter)
1046{
1047	struct ifnet	*ifp = adapter->ifp;
1048	device_t	dev = adapter->dev;
1049
1050	INIT_DEBUGOUT("igb_init: begin");
1051
1052	IGB_CORE_LOCK_ASSERT(adapter);
1053
1054	igb_disable_intr(adapter);
1055	callout_stop(&adapter->timer);
1056
1057	/* Get the latest mac address, User can use a LAA */
1058        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1059              ETHER_ADDR_LEN);
1060
1061	/* Put the address into the Receive Address Array */
1062	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1063
1064	igb_reset(adapter);
1065	igb_update_link_status(adapter);
1066
1067	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1068
1069	/* Set hardware offload abilities */
1070	ifp->if_hwassist = 0;
1071	if (ifp->if_capenable & IFCAP_TXCSUM) {
1072		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1073#if __FreeBSD_version >= 800000
1074		if (adapter->hw.mac.type == e1000_82576)
1075			ifp->if_hwassist |= CSUM_SCTP;
1076#endif
1077	}
1078
1079	if (ifp->if_capenable & IFCAP_TSO4)
1080		ifp->if_hwassist |= CSUM_TSO;
1081
1082	/* Configure for OS presence */
1083	igb_init_manageability(adapter);
1084
1085	/* Prepare transmit descriptors and buffers */
1086	igb_setup_transmit_structures(adapter);
1087	igb_initialize_transmit_units(adapter);
1088
1089	/* Setup Multicast table */
1090	igb_set_multi(adapter);
1091
1092	/*
1093	** Figure out the desired mbuf pool
1094	** for doing jumbo/packetsplit
1095	*/
1096	if (ifp->if_mtu > ETHERMTU)
1097		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1098	else
1099		adapter->rx_mbuf_sz = MCLBYTES;
1100
1101	/* Prepare receive descriptors and buffers */
1102	if (igb_setup_receive_structures(adapter)) {
1103		device_printf(dev, "Could not setup receive structures\n");
1104		return;
1105	}
1106	igb_initialize_receive_units(adapter);
1107
1108	/* Don't lose promiscuous settings */
1109	igb_set_promisc(adapter);
1110
1111	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1112	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1113
1114	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1115	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1116
1117	if (adapter->msix > 1) /* Set up queue routing */
1118		igb_configure_queues(adapter);
1119
1120	/* Set up VLAN tag offload and filter */
1121	igb_setup_vlan_hw_support(adapter);
1122
1123	/* this clears any pending interrupts */
1124	E1000_READ_REG(&adapter->hw, E1000_ICR);
1125	igb_enable_intr(adapter);
1126	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1127
1128	/* Don't reset the phy next time init gets called */
1129	adapter->hw.phy.reset_disable = TRUE;
1130}
1131
1132static void
1133igb_init(void *arg)
1134{
1135	struct adapter *adapter = arg;
1136
1137	IGB_CORE_LOCK(adapter);
1138	igb_init_locked(adapter);
1139	IGB_CORE_UNLOCK(adapter);
1140}
1141
1142
1143static void
1144igb_handle_rxtx(void *context, int pending)
1145{
1146	struct adapter	*adapter = context;
1147	struct tx_ring	*txr = adapter->tx_rings;
1148	struct rx_ring	*rxr = adapter->rx_rings;
1149	struct ifnet	*ifp;
1150
1151	ifp = adapter->ifp;
1152
1153	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1154		if (igb_rxeof(rxr, adapter->rx_process_limit))
1155			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1156		IGB_TX_LOCK(txr);
1157		igb_txeof(txr);
1158
1159#if __FreeBSD_version >= 800000
1160		if (!drbr_empty(ifp, txr->br))
1161			igb_mq_start_locked(ifp, txr, NULL);
1162#else
1163		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1164			igb_start_locked(txr, ifp);
1165#endif
1166		IGB_TX_UNLOCK(txr);
1167	}
1168
1169	igb_enable_intr(adapter);
1170}
1171
1172static void
1173igb_handle_que(void *context, int pending)
1174{
1175	struct igb_queue *que = context;
1176	struct adapter *adapter = que->adapter;
1177	struct tx_ring *txr = que->txr;
1178	struct rx_ring *rxr = que->rxr;
1179	struct ifnet	*ifp = adapter->ifp;
1180	u32		loop = IGB_MAX_LOOP;
1181	bool		more;
1182
1183	/* RX first */
1184	do {
1185		more = igb_rxeof(rxr, -1);
1186	} while (loop-- && more);
1187
1188	if (IGB_TX_TRYLOCK(txr)) {
1189		loop = IGB_MAX_LOOP;
1190		do {
1191			more = igb_txeof(txr);
1192		} while (loop-- && more);
1193#if __FreeBSD_version >= 800000
1194		igb_mq_start_locked(ifp, txr, NULL);
1195#else
1196		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1197			igb_start_locked(txr, ifp);
1198#endif
1199		IGB_TX_UNLOCK(txr);
1200	}
1201
1202	/* Reenable this interrupt */
1203	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1204}
1205
1206
1207/*********************************************************************
1208 *
1209 *  MSI/Legacy Deferred
1210 *  Interrupt Service routine
1211 *
1212 *********************************************************************/
1213static int
1214igb_irq_fast(void *arg)
1215{
1216	struct adapter	*adapter = arg;
1217	uint32_t	reg_icr;
1218
1219
1220	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1221
1222	/* Hot eject?  */
1223	if (reg_icr == 0xffffffff)
1224		return FILTER_STRAY;
1225
1226	/* Definitely not our interrupt.  */
1227	if (reg_icr == 0x0)
1228		return FILTER_STRAY;
1229
1230	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1231		return FILTER_STRAY;
1232
1233	/*
1234	 * Mask interrupts until the taskqueue is finished running.  This is
1235	 * cheap, just assume that it is needed.  This also works around the
1236	 * MSI message reordering errata on certain systems.
1237	 */
1238	igb_disable_intr(adapter);
1239	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1240
1241	/* Link status change */
1242	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1243		adapter->hw.mac.get_link_status = 1;
1244		igb_update_link_status(adapter);
1245	}
1246
1247	if (reg_icr & E1000_ICR_RXO)
1248		adapter->rx_overruns++;
1249	return FILTER_HANDLED;
1250}
1251
1252
1253/*********************************************************************
1254 *
1255 *  MSIX TX Interrupt Service routine
1256 *
1257 **********************************************************************/
1258static void
1259igb_msix_que(void *arg)
1260{
1261	struct igb_queue *que = arg;
1262	struct adapter *adapter = que->adapter;
1263	struct tx_ring *txr = que->txr;
1264	struct rx_ring *rxr = que->rxr;
1265	u32		newitr = 0;
1266	bool		more_tx, more_rx;
1267
1268	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1269	++que->irqs;
1270
1271	IGB_TX_LOCK(txr);
1272	more_tx = igb_txeof(txr);
1273	IGB_TX_UNLOCK(txr);
1274
1275	more_rx = igb_rxeof(rxr, adapter->rx_process_limit);
1276
1277	if (igb_enable_aim == FALSE)
1278		goto no_calc;
1279	/*
1280	** Do Adaptive Interrupt Moderation:
1281        **  - Write out last calculated setting
1282	**  - Calculate based on average size over
1283	**    the last interval.
1284	*/
1285        if (que->eitr_setting)
1286                E1000_WRITE_REG(&adapter->hw,
1287                    E1000_EITR(que->msix), que->eitr_setting);
1288
1289        que->eitr_setting = 0;
1290
1291        /* Idle, do nothing */
1292        if ((txr->bytes == 0) && (rxr->bytes == 0))
1293                goto no_calc;
1294
1295        /* Used half Default if sub-gig */
1296        if (adapter->link_speed != 1000)
1297                newitr = IGB_DEFAULT_ITR / 2;
1298        else {
1299		if ((txr->bytes) && (txr->packets))
1300                	newitr = txr->bytes/txr->packets;
1301		if ((rxr->bytes) && (rxr->packets))
1302			newitr = max(newitr,
1303			    (rxr->bytes / rxr->packets));
1304                newitr += 24; /* account for hardware frame, crc */
1305		/* set an upper boundary */
1306		newitr = min(newitr, 3000);
1307		/* Be nice to the mid range */
1308                if ((newitr > 300) && (newitr < 1200))
1309                        newitr = (newitr / 3);
1310                else
1311                        newitr = (newitr / 2);
1312        }
1313        newitr &= 0x7FFC;  /* Mask invalid bits */
1314        if (adapter->hw.mac.type == e1000_82575)
1315                newitr |= newitr << 16;
1316        else
1317                newitr |= 0x8000000;
1318
1319        /* save for next interrupt */
1320        que->eitr_setting = newitr;
1321
1322        /* Reset state */
1323        txr->bytes = 0;
1324        txr->packets = 0;
1325        rxr->bytes = 0;
1326        rxr->packets = 0;
1327
1328no_calc:
1329	/* Schedule a clean task if needed*/
1330	if (more_tx || more_rx)
1331		taskqueue_enqueue(que->tq, &que->que_task);
1332	else
1333		/* Reenable this interrupt */
1334		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1335	return;
1336}
1337
1338
1339/*********************************************************************
1340 *
1341 *  MSIX Link Interrupt Service routine
1342 *
1343 **********************************************************************/
1344
1345static void
1346igb_msix_link(void *arg)
1347{
1348	struct adapter	*adapter = arg;
1349	u32       	icr;
1350
1351	++adapter->link_irq;
1352	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1353	if (!(icr & E1000_ICR_LSC))
1354		goto spurious;
1355	adapter->hw.mac.get_link_status = 1;
1356	igb_update_link_status(adapter);
1357
1358spurious:
1359	/* Rearm */
1360	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1361	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1362	return;
1363}
1364
1365
1366/*********************************************************************
1367 *
1368 *  Media Ioctl callback
1369 *
1370 *  This routine is called whenever the user queries the status of
1371 *  the interface using ifconfig.
1372 *
1373 **********************************************************************/
1374static void
1375igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1376{
1377	struct adapter *adapter = ifp->if_softc;
1378	u_char fiber_type = IFM_1000_SX;
1379
1380	INIT_DEBUGOUT("igb_media_status: begin");
1381
1382	IGB_CORE_LOCK(adapter);
1383	igb_update_link_status(adapter);
1384
1385	ifmr->ifm_status = IFM_AVALID;
1386	ifmr->ifm_active = IFM_ETHER;
1387
1388	if (!adapter->link_active) {
1389		IGB_CORE_UNLOCK(adapter);
1390		return;
1391	}
1392
1393	ifmr->ifm_status |= IFM_ACTIVE;
1394
1395	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1396	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1397		ifmr->ifm_active |= fiber_type | IFM_FDX;
1398	else {
1399		switch (adapter->link_speed) {
1400		case 10:
1401			ifmr->ifm_active |= IFM_10_T;
1402			break;
1403		case 100:
1404			ifmr->ifm_active |= IFM_100_TX;
1405			break;
1406		case 1000:
1407			ifmr->ifm_active |= IFM_1000_T;
1408			break;
1409		}
1410		if (adapter->link_duplex == FULL_DUPLEX)
1411			ifmr->ifm_active |= IFM_FDX;
1412		else
1413			ifmr->ifm_active |= IFM_HDX;
1414	}
1415	IGB_CORE_UNLOCK(adapter);
1416}
1417
1418/*********************************************************************
1419 *
1420 *  Media Ioctl callback
1421 *
1422 *  This routine is called when the user changes speed/duplex using
1423 *  media/mediopt option with ifconfig.
1424 *
1425 **********************************************************************/
1426static int
1427igb_media_change(struct ifnet *ifp)
1428{
1429	struct adapter *adapter = ifp->if_softc;
1430	struct ifmedia  *ifm = &adapter->media;
1431
1432	INIT_DEBUGOUT("igb_media_change: begin");
1433
1434	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1435		return (EINVAL);
1436
1437	IGB_CORE_LOCK(adapter);
1438	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1439	case IFM_AUTO:
1440		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1441		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1442		break;
1443	case IFM_1000_LX:
1444	case IFM_1000_SX:
1445	case IFM_1000_T:
1446		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1447		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1448		break;
1449	case IFM_100_TX:
1450		adapter->hw.mac.autoneg = FALSE;
1451		adapter->hw.phy.autoneg_advertised = 0;
1452		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1453			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1454		else
1455			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1456		break;
1457	case IFM_10_T:
1458		adapter->hw.mac.autoneg = FALSE;
1459		adapter->hw.phy.autoneg_advertised = 0;
1460		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1461			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1462		else
1463			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1464		break;
1465	default:
1466		device_printf(adapter->dev, "Unsupported media type\n");
1467	}
1468
1469	/* As the speed/duplex settings my have changed we need to
1470	 * reset the PHY.
1471	 */
1472	adapter->hw.phy.reset_disable = FALSE;
1473
1474	igb_init_locked(adapter);
1475	IGB_CORE_UNLOCK(adapter);
1476
1477	return (0);
1478}
1479
1480
1481/*********************************************************************
1482 *
1483 *  This routine maps the mbufs to Advanced TX descriptors.
1484 *  used by the 82575 adapter.
1485 *
1486 **********************************************************************/
1487
1488static int
1489igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1490{
1491	struct adapter		*adapter = txr->adapter;
1492	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1493	bus_dmamap_t		map;
1494	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1495	union e1000_adv_tx_desc	*txd = NULL;
1496	struct mbuf		*m_head;
1497	u32			olinfo_status = 0, cmd_type_len = 0;
1498	int			nsegs, i, j, error, first, last = 0;
1499	u32			hdrlen = 0;
1500
1501	m_head = *m_headp;
1502
1503
1504	/* Set basic descriptor constants */
1505	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1506	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1507	if (m_head->m_flags & M_VLANTAG)
1508		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1509
1510        /*
1511         * Force a cleanup if number of TX descriptors
1512         * available hits the threshold
1513         */
1514	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1515		igb_txeof(txr);
1516		/* Now do we at least have a minimal? */
1517		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1518			txr->no_desc_avail++;
1519			return (ENOBUFS);
1520		}
1521	}
1522
1523	/*
1524         * Map the packet for DMA.
1525	 *
1526	 * Capture the first descriptor index,
1527	 * this descriptor will have the index
1528	 * of the EOP which is the only one that
1529	 * now gets a DONE bit writeback.
1530	 */
1531	first = txr->next_avail_desc;
1532	tx_buffer = &txr->tx_buffers[first];
1533	tx_buffer_mapped = tx_buffer;
1534	map = tx_buffer->map;
1535
1536	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1537	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1538
1539	if (error == EFBIG) {
1540		struct mbuf *m;
1541
1542		m = m_defrag(*m_headp, M_DONTWAIT);
1543		if (m == NULL) {
1544			adapter->mbuf_defrag_failed++;
1545			m_freem(*m_headp);
1546			*m_headp = NULL;
1547			return (ENOBUFS);
1548		}
1549		*m_headp = m;
1550
1551		/* Try it again */
1552		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1553		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1554
1555		if (error == ENOMEM) {
1556			adapter->no_tx_dma_setup++;
1557			return (error);
1558		} else if (error != 0) {
1559			adapter->no_tx_dma_setup++;
1560			m_freem(*m_headp);
1561			*m_headp = NULL;
1562			return (error);
1563		}
1564	} else if (error == ENOMEM) {
1565		adapter->no_tx_dma_setup++;
1566		return (error);
1567	} else if (error != 0) {
1568		adapter->no_tx_dma_setup++;
1569		m_freem(*m_headp);
1570		*m_headp = NULL;
1571		return (error);
1572	}
1573
1574	/* Check again to be sure we have enough descriptors */
1575        if (nsegs > (txr->tx_avail - 2)) {
1576                txr->no_desc_avail++;
1577		bus_dmamap_unload(txr->txtag, map);
1578		return (ENOBUFS);
1579        }
1580	m_head = *m_headp;
1581
1582        /*
1583         * Set up the context descriptor:
1584         * used when any hardware offload is done.
1585	 * This includes CSUM, VLAN, and TSO. It
1586	 * will use the first descriptor.
1587         */
1588        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1589		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1590			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1591			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1592			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1593		} else
1594			return (ENXIO);
1595	} else if (igb_tx_ctx_setup(txr, m_head))
1596		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1597
1598	/* Calculate payload length */
1599	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1600	    << E1000_ADVTXD_PAYLEN_SHIFT);
1601
1602	/* 82575 needs the queue index added */
1603	if (adapter->hw.mac.type == e1000_82575)
1604		olinfo_status |= txr->me << 4;
1605
1606	/* Set up our transmit descriptors */
1607	i = txr->next_avail_desc;
1608	for (j = 0; j < nsegs; j++) {
1609		bus_size_t seg_len;
1610		bus_addr_t seg_addr;
1611
1612		tx_buffer = &txr->tx_buffers[i];
1613		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1614		seg_addr = segs[j].ds_addr;
1615		seg_len  = segs[j].ds_len;
1616
1617		txd->read.buffer_addr = htole64(seg_addr);
1618		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1619		txd->read.olinfo_status = htole32(olinfo_status);
1620		last = i;
1621		if (++i == adapter->num_tx_desc)
1622			i = 0;
1623		tx_buffer->m_head = NULL;
1624		tx_buffer->next_eop = -1;
1625	}
1626
1627	txr->next_avail_desc = i;
1628	txr->tx_avail -= nsegs;
1629
1630        tx_buffer->m_head = m_head;
1631	tx_buffer_mapped->map = tx_buffer->map;
1632	tx_buffer->map = map;
1633        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1634
1635        /*
1636         * Last Descriptor of Packet
1637	 * needs End Of Packet (EOP)
1638	 * and Report Status (RS)
1639         */
1640        txd->read.cmd_type_len |=
1641	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1642	/*
1643	 * Keep track in the first buffer which
1644	 * descriptor will be written back
1645	 */
1646	tx_buffer = &txr->tx_buffers[first];
1647	tx_buffer->next_eop = last;
1648	txr->watchdog_time = ticks;
1649
1650	/*
1651	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1652	 * that this frame is available to transmit.
1653	 */
1654	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1655	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1656	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1657	++txr->tx_packets;
1658
1659	return (0);
1660
1661}
1662
1663static void
1664igb_set_promisc(struct adapter *adapter)
1665{
1666	struct ifnet	*ifp = adapter->ifp;
1667	uint32_t	reg_rctl;
1668
1669	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1670
1671	if (ifp->if_flags & IFF_PROMISC) {
1672		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1673		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1674	} else if (ifp->if_flags & IFF_ALLMULTI) {
1675		reg_rctl |= E1000_RCTL_MPE;
1676		reg_rctl &= ~E1000_RCTL_UPE;
1677		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1678	}
1679}
1680
1681static void
1682igb_disable_promisc(struct adapter *adapter)
1683{
1684	uint32_t	reg_rctl;
1685
1686	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1687
1688	reg_rctl &=  (~E1000_RCTL_UPE);
1689	reg_rctl &=  (~E1000_RCTL_MPE);
1690	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1691}
1692
1693
1694/*********************************************************************
1695 *  Multicast Update
1696 *
1697 *  This routine is called whenever multicast address list is updated.
1698 *
1699 **********************************************************************/
1700
1701static void
1702igb_set_multi(struct adapter *adapter)
1703{
1704	struct ifnet	*ifp = adapter->ifp;
1705	struct ifmultiaddr *ifma;
1706	u32 reg_rctl = 0;
1707	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1708
1709	int mcnt = 0;
1710
1711	IOCTL_DEBUGOUT("igb_set_multi: begin");
1712
1713#if __FreeBSD_version < 800000
1714	IF_ADDR_LOCK(ifp);
1715#else
1716	if_maddr_rlock(ifp);
1717#endif
1718	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1719		if (ifma->ifma_addr->sa_family != AF_LINK)
1720			continue;
1721
1722		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1723			break;
1724
1725		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1726		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1727		mcnt++;
1728	}
1729#if __FreeBSD_version < 800000
1730	IF_ADDR_UNLOCK(ifp);
1731#else
1732	if_maddr_runlock(ifp);
1733#endif
1734
1735	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1736		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1737		reg_rctl |= E1000_RCTL_MPE;
1738		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1739	} else
1740		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1741}
1742
1743
1744/*********************************************************************
1745 *  Timer routine:
1746 *  	This routine checks for link status,
1747 *	updates statistics, and does the watchdog.
1748 *
1749 **********************************************************************/
1750
1751static void
1752igb_local_timer(void *arg)
1753{
1754	struct adapter		*adapter = arg;
1755	struct ifnet		*ifp = adapter->ifp;
1756	device_t		dev = adapter->dev;
1757	struct tx_ring		*txr = adapter->tx_rings;
1758
1759
1760	IGB_CORE_LOCK_ASSERT(adapter);
1761
1762	igb_update_link_status(adapter);
1763	igb_update_stats_counters(adapter);
1764
1765	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1766		igb_print_hw_stats(adapter);
1767
1768        /*
1769        ** Watchdog: check for time since any descriptor was cleaned
1770        */
1771	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1772		if (txr->watchdog_check == FALSE)
1773			continue;
1774		if ((ticks - txr->watchdog_time) > IGB_WATCHDOG)
1775			goto timeout;
1776	}
1777
1778	/* Trigger an RX interrupt on all queues */
1779	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1780	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1781	return;
1782
1783timeout:
1784	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
1785	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
1786            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
1787            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
1788	device_printf(dev,"TX(%d) desc avail = %d,"
1789            "Next TX to Clean = %d\n",
1790            txr->me, txr->tx_avail, txr->next_to_clean);
1791	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1792	adapter->watchdog_events++;
1793	igb_init_locked(adapter);
1794}
1795
1796static void
1797igb_update_link_status(struct adapter *adapter)
1798{
1799	struct e1000_hw *hw = &adapter->hw;
1800	struct ifnet *ifp = adapter->ifp;
1801	device_t dev = adapter->dev;
1802	struct tx_ring *txr = adapter->tx_rings;
1803	u32 link_check = 0;
1804
1805	/* Get the cached link value or read for real */
1806        switch (hw->phy.media_type) {
1807        case e1000_media_type_copper:
1808                if (hw->mac.get_link_status) {
1809			/* Do the work to read phy */
1810                        e1000_check_for_link(hw);
1811                        link_check = !hw->mac.get_link_status;
1812                } else
1813                        link_check = TRUE;
1814                break;
1815        case e1000_media_type_fiber:
1816                e1000_check_for_link(hw);
1817                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1818                                 E1000_STATUS_LU);
1819                break;
1820        case e1000_media_type_internal_serdes:
1821                e1000_check_for_link(hw);
1822                link_check = adapter->hw.mac.serdes_has_link;
1823                break;
1824        default:
1825        case e1000_media_type_unknown:
1826                break;
1827        }
1828
1829	/* Now we check if a transition has happened */
1830	if (link_check && (adapter->link_active == 0)) {
1831		e1000_get_speed_and_duplex(&adapter->hw,
1832		    &adapter->link_speed, &adapter->link_duplex);
1833		if (bootverbose)
1834			device_printf(dev, "Link is up %d Mbps %s\n",
1835			    adapter->link_speed,
1836			    ((adapter->link_duplex == FULL_DUPLEX) ?
1837			    "Full Duplex" : "Half Duplex"));
1838		adapter->link_active = 1;
1839		ifp->if_baudrate = adapter->link_speed * 1000000;
1840		if_link_state_change(ifp, LINK_STATE_UP);
1841	} else if (!link_check && (adapter->link_active == 1)) {
1842		ifp->if_baudrate = adapter->link_speed = 0;
1843		adapter->link_duplex = 0;
1844		if (bootverbose)
1845			device_printf(dev, "Link is Down\n");
1846		adapter->link_active = 0;
1847		if_link_state_change(ifp, LINK_STATE_DOWN);
1848		/* Turn off watchdogs */
1849		for (int i = 0; i < adapter->num_queues; i++, txr++)
1850			txr->watchdog_check = FALSE;
1851	}
1852}
1853
1854/*********************************************************************
1855 *
1856 *  This routine disables all traffic on the adapter by issuing a
1857 *  global reset on the MAC and deallocates TX/RX buffers.
1858 *
1859 **********************************************************************/
1860
1861static void
1862igb_stop(void *arg)
1863{
1864	struct adapter	*adapter = arg;
1865	struct ifnet	*ifp = adapter->ifp;
1866	struct tx_ring *txr = adapter->tx_rings;
1867
1868	IGB_CORE_LOCK_ASSERT(adapter);
1869
1870	INIT_DEBUGOUT("igb_stop: begin");
1871
1872	igb_disable_intr(adapter);
1873
1874	callout_stop(&adapter->timer);
1875
1876	/* Tell the stack that the interface is no longer active */
1877	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1878
1879	/* Unarm watchdog timer. */
1880	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1881		IGB_TX_LOCK(txr);
1882		txr->watchdog_check = FALSE;
1883		IGB_TX_UNLOCK(txr);
1884	}
1885
1886	e1000_reset_hw(&adapter->hw);
1887	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
1888}
1889
1890
1891/*********************************************************************
1892 *
1893 *  Determine hardware revision.
1894 *
1895 **********************************************************************/
1896static void
1897igb_identify_hardware(struct adapter *adapter)
1898{
1899	device_t dev = adapter->dev;
1900
1901	/* Make sure our PCI config space has the necessary stuff set */
1902	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
1903	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
1904	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
1905		device_printf(dev, "Memory Access and/or Bus Master bits "
1906		    "were not set!\n");
1907		adapter->hw.bus.pci_cmd_word |=
1908		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
1909		pci_write_config(dev, PCIR_COMMAND,
1910		    adapter->hw.bus.pci_cmd_word, 2);
1911	}
1912
1913	/* Save off the information about this board */
1914	adapter->hw.vendor_id = pci_get_vendor(dev);
1915	adapter->hw.device_id = pci_get_device(dev);
1916	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
1917	adapter->hw.subsystem_vendor_id =
1918	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
1919	adapter->hw.subsystem_device_id =
1920	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
1921
1922	/* Do Shared Code Init and Setup */
1923	if (e1000_set_mac_type(&adapter->hw)) {
1924		device_printf(dev, "Setup init failure\n");
1925		return;
1926	}
1927}
1928
1929static int
1930igb_allocate_pci_resources(struct adapter *adapter)
1931{
1932	device_t	dev = adapter->dev;
1933	int		rid;
1934
1935	rid = PCIR_BAR(0);
1936	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1937	    &rid, RF_ACTIVE);
1938	if (adapter->pci_mem == NULL) {
1939		device_printf(dev, "Unable to allocate bus resource: memory\n");
1940		return (ENXIO);
1941	}
1942	adapter->osdep.mem_bus_space_tag =
1943	    rman_get_bustag(adapter->pci_mem);
1944	adapter->osdep.mem_bus_space_handle =
1945	    rman_get_bushandle(adapter->pci_mem);
1946	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
1947
1948	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
1949
1950	/* This will setup either MSI/X or MSI */
1951	adapter->msix = igb_setup_msix(adapter);
1952	adapter->hw.back = &adapter->osdep;
1953
1954	return (0);
1955}
1956
1957/*********************************************************************
1958 *
1959 *  Setup the Legacy or MSI Interrupt handler
1960 *
1961 **********************************************************************/
1962static int
1963igb_allocate_legacy(struct adapter *adapter)
1964{
1965	device_t dev = adapter->dev;
1966	int error, rid = 0;
1967
1968	/* Turn off all interrupts */
1969	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
1970
1971	/* MSI RID is 1 */
1972	if (adapter->msix == 1)
1973		rid = 1;
1974
1975	/* We allocate a single interrupt resource */
1976	adapter->res = bus_alloc_resource_any(dev,
1977	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
1978	if (adapter->res == NULL) {
1979		device_printf(dev, "Unable to allocate bus resource: "
1980		    "interrupt\n");
1981		return (ENXIO);
1982	}
1983
1984	/*
1985	 * Try allocating a fast interrupt and the associated deferred
1986	 * processing contexts.
1987	 */
1988	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
1989	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
1990	    taskqueue_thread_enqueue, &adapter->tq);
1991	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
1992	    device_get_nameunit(adapter->dev));
1993	if ((error = bus_setup_intr(dev, adapter->res,
1994	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
1995	    adapter, &adapter->tag)) != 0) {
1996		device_printf(dev, "Failed to register fast interrupt "
1997			    "handler: %d\n", error);
1998		taskqueue_free(adapter->tq);
1999		adapter->tq = NULL;
2000		return (error);
2001	}
2002
2003	return (0);
2004}
2005
2006
2007/*********************************************************************
2008 *
2009 *  Setup the MSIX Queue Interrupt handlers:
2010 *
2011 **********************************************************************/
2012static int
2013igb_allocate_msix(struct adapter *adapter)
2014{
2015	device_t		dev = adapter->dev;
2016	struct igb_queue	*que = adapter->queues;
2017	int			error, rid, vector = 0;
2018
2019
2020	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2021		rid = vector +1;
2022		que->res = bus_alloc_resource_any(dev,
2023		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2024		if (que->res == NULL) {
2025			device_printf(dev,
2026			    "Unable to allocate bus resource: "
2027			    "MSIX Queue Interrupt\n");
2028			return (ENXIO);
2029		}
2030		error = bus_setup_intr(dev, que->res,
2031	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2032		    igb_msix_que, que, &que->tag);
2033		if (error) {
2034			que->res = NULL;
2035			device_printf(dev, "Failed to register Queue handler");
2036			return (error);
2037		}
2038		que->msix = vector;
2039		if (adapter->hw.mac.type == e1000_82575)
2040			que->eims = E1000_EICR_TX_QUEUE0 << i;
2041		else
2042			que->eims = 1 << vector;
2043		/*
2044		** Bind the msix vector, and thus the
2045		** rings to the corresponding cpu.
2046		*/
2047		if (adapter->num_queues > 1)
2048			bus_bind_intr(dev, que->res, i);
2049		/* Make tasklet for deferred handling */
2050		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2051		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2052		    taskqueue_thread_enqueue, &que->tq);
2053		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2054		    device_get_nameunit(adapter->dev));
2055	}
2056
2057	/* And Link */
2058	rid = vector + 1;
2059	adapter->res = bus_alloc_resource_any(dev,
2060	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2061	if (adapter->res == NULL) {
2062		device_printf(dev,
2063		    "Unable to allocate bus resource: "
2064		    "MSIX Link Interrupt\n");
2065		return (ENXIO);
2066	}
2067	if ((error = bus_setup_intr(dev, adapter->res,
2068	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2069	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2070		device_printf(dev, "Failed to register Link handler");
2071		return (error);
2072	}
2073	adapter->linkvec = vector;
2074
2075	return (0);
2076}
2077
2078
2079static void
2080igb_configure_queues(struct adapter *adapter)
2081{
2082	struct	e1000_hw	*hw = &adapter->hw;
2083	struct	igb_queue	*que;
2084	u32			tmp, ivar = 0;
2085	u32			newitr = IGB_DEFAULT_ITR;
2086
2087	/* First turn on RSS capability */
2088	if (adapter->hw.mac.type > e1000_82575)
2089		E1000_WRITE_REG(hw, E1000_GPIE,
2090		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2091		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2092
2093	/* Turn on MSIX */
2094	switch (adapter->hw.mac.type) {
2095	case e1000_82580:
2096		/* RX entries */
2097		for (int i = 0; i < adapter->num_queues; i++) {
2098			u32 index = i >> 1;
2099			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2100			que = &adapter->queues[i];
2101			if (i & 1) {
2102				ivar &= 0xFF00FFFF;
2103				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2104			} else {
2105				ivar &= 0xFFFFFF00;
2106				ivar |= que->msix | E1000_IVAR_VALID;
2107			}
2108			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2109		}
2110		/* TX entries */
2111		for (int i = 0; i < adapter->num_queues; i++) {
2112			u32 index = i >> 1;
2113			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2114			que = &adapter->queues[i];
2115			if (i & 1) {
2116				ivar &= 0x00FFFFFF;
2117				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2118			} else {
2119				ivar &= 0xFFFF00FF;
2120				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2121			}
2122			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2123			adapter->eims_mask |= que->eims;
2124		}
2125
2126		/* And for the link interrupt */
2127		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2128		adapter->link_mask = 1 << adapter->linkvec;
2129		adapter->eims_mask |= adapter->link_mask;
2130		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2131		break;
2132	case e1000_82576:
2133		/* RX entries */
2134		for (int i = 0; i < adapter->num_queues; i++) {
2135			u32 index = i & 0x7; /* Each IVAR has two entries */
2136			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2137			que = &adapter->queues[i];
2138			if (i < 8) {
2139				ivar &= 0xFFFFFF00;
2140				ivar |= que->msix | E1000_IVAR_VALID;
2141			} else {
2142				ivar &= 0xFF00FFFF;
2143				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2144			}
2145			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2146			adapter->eims_mask |= que->eims;
2147		}
2148		/* TX entries */
2149		for (int i = 0; i < adapter->num_queues; i++) {
2150			u32 index = i & 0x7; /* Each IVAR has two entries */
2151			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2152			que = &adapter->queues[i];
2153			if (i < 8) {
2154				ivar &= 0xFFFF00FF;
2155				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2156			} else {
2157				ivar &= 0x00FFFFFF;
2158				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2159			}
2160			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2161			adapter->eims_mask |= que->eims;
2162		}
2163
2164		/* And for the link interrupt */
2165		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2166		adapter->link_mask = 1 << adapter->linkvec;
2167		adapter->eims_mask |= adapter->link_mask;
2168		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2169		break;
2170
2171	case e1000_82575:
2172                /* enable MSI-X support*/
2173		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2174                tmp |= E1000_CTRL_EXT_PBA_CLR;
2175                /* Auto-Mask interrupts upon ICR read. */
2176                tmp |= E1000_CTRL_EXT_EIAME;
2177                tmp |= E1000_CTRL_EXT_IRCA;
2178                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2179
2180		/* Queues */
2181		for (int i = 0; i < adapter->num_queues; i++) {
2182			que = &adapter->queues[i];
2183			tmp = E1000_EICR_RX_QUEUE0 << i;
2184			tmp |= E1000_EICR_TX_QUEUE0 << i;
2185			que->eims = tmp;
2186			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2187			    i, que->eims);
2188			adapter->eims_mask |= que->eims;
2189		}
2190
2191		/* Link */
2192		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2193		    E1000_EIMS_OTHER);
2194		adapter->link_mask |= E1000_EIMS_OTHER;
2195		adapter->eims_mask |= adapter->link_mask;
2196	default:
2197		break;
2198	}
2199
2200	/* Set the starting interrupt rate */
2201        if (hw->mac.type == e1000_82575)
2202                newitr |= newitr << 16;
2203        else
2204                newitr |= 0x8000000;
2205
2206	for (int i = 0; i < adapter->num_queues; i++) {
2207		que = &adapter->queues[i];
2208		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2209	}
2210
2211	return;
2212}
2213
2214
2215static void
2216igb_free_pci_resources(struct adapter *adapter)
2217{
2218	struct		igb_queue *que = adapter->queues;
2219	device_t	dev = adapter->dev;
2220	int		rid;
2221
2222	/*
2223	** There is a slight possibility of a failure mode
2224	** in attach that will result in entering this function
2225	** before interrupt resources have been initialized, and
2226	** in that case we do not want to execute the loops below
2227	** We can detect this reliably by the state of the adapter
2228	** res pointer.
2229	*/
2230	if (adapter->res == NULL)
2231		goto mem;
2232
2233	/*
2234	 * First release all the interrupt resources:
2235	 */
2236	for (int i = 0; i < adapter->num_queues; i++, que++) {
2237		rid = que->msix + 1;
2238		if (que->tag != NULL) {
2239			bus_teardown_intr(dev, que->res, que->tag);
2240			que->tag = NULL;
2241		}
2242		if (que->res != NULL)
2243			bus_release_resource(dev,
2244			    SYS_RES_IRQ, rid, que->res);
2245	}
2246
2247	/* Clean the Legacy or Link interrupt last */
2248	if (adapter->linkvec) /* we are doing MSIX */
2249		rid = adapter->linkvec + 1;
2250	else
2251		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2252
2253	if (adapter->tag != NULL) {
2254		bus_teardown_intr(dev, adapter->res, adapter->tag);
2255		adapter->tag = NULL;
2256	}
2257	if (adapter->res != NULL)
2258		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2259
2260mem:
2261	if (adapter->msix)
2262		pci_release_msi(dev);
2263
2264	if (adapter->msix_mem != NULL)
2265		bus_release_resource(dev, SYS_RES_MEMORY,
2266		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2267
2268	if (adapter->pci_mem != NULL)
2269		bus_release_resource(dev, SYS_RES_MEMORY,
2270		    PCIR_BAR(0), adapter->pci_mem);
2271
2272}
2273
2274/*
2275 * Setup Either MSI/X or MSI
2276 */
2277static int
2278igb_setup_msix(struct adapter *adapter)
2279{
2280	device_t dev = adapter->dev;
2281	int rid, want, queues, msgs;
2282
2283	/* tuneable override */
2284	if (igb_enable_msix == 0)
2285		goto msi;
2286
2287	/* First try MSI/X */
2288	rid = PCIR_BAR(IGB_MSIX_BAR);
2289	adapter->msix_mem = bus_alloc_resource_any(dev,
2290	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2291       	if (!adapter->msix_mem) {
2292		/* May not be enabled */
2293		device_printf(adapter->dev,
2294		    "Unable to map MSIX table \n");
2295		goto msi;
2296	}
2297
2298	msgs = pci_msix_count(dev);
2299	if (msgs == 0) { /* system has msix disabled */
2300		bus_release_resource(dev, SYS_RES_MEMORY,
2301		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2302		adapter->msix_mem = NULL;
2303		goto msi;
2304	}
2305
2306	/* Figure out a reasonable auto config value */
2307	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2308
2309	/* Can have max of 4 queues on 82575 */
2310	if (adapter->hw.mac.type == e1000_82575) {
2311		if (queues > 4)
2312			queues = 4;
2313		if (igb_num_queues > 4)
2314			igb_num_queues = 4;
2315	}
2316
2317	if (igb_num_queues == 0)
2318		igb_num_queues = queues;
2319
2320	/*
2321	** One vector (RX/TX pair) per queue
2322	** plus an additional for Link interrupt
2323	*/
2324	want = igb_num_queues + 1;
2325	if (msgs >= want)
2326		msgs = want;
2327	else {
2328               	device_printf(adapter->dev,
2329		    "MSIX Configuration Problem, "
2330		    "%d vectors configured, but %d queues wanted!\n",
2331		    msgs, want);
2332		return (ENXIO);
2333	}
2334	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2335               	device_printf(adapter->dev,
2336		    "Using MSIX interrupts with %d vectors\n", msgs);
2337		adapter->num_queues = igb_num_queues;
2338		return (msgs);
2339	}
2340msi:
2341       	msgs = pci_msi_count(dev);
2342       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2343               	device_printf(adapter->dev,"Using MSI interrupt\n");
2344	return (msgs);
2345}
2346
2347/*********************************************************************
2348 *
2349 *  Set up an fresh starting state
2350 *
2351 **********************************************************************/
2352static void
2353igb_reset(struct adapter *adapter)
2354{
2355	device_t	dev = adapter->dev;
2356	struct e1000_hw *hw = &adapter->hw;
2357	struct e1000_fc_info *fc = &hw->fc;
2358	struct ifnet	*ifp = adapter->ifp;
2359	u32		pba = 0;
2360	u16		hwm;
2361
2362	INIT_DEBUGOUT("igb_reset: begin");
2363
2364	/* Let the firmware know the OS is in control */
2365	igb_get_hw_control(adapter);
2366
2367	/*
2368	 * Packet Buffer Allocation (PBA)
2369	 * Writing PBA sets the receive portion of the buffer
2370	 * the remainder is used for the transmit buffer.
2371	 */
2372	switch (hw->mac.type) {
2373	case e1000_82575:
2374		pba = E1000_PBA_32K;
2375		break;
2376	case e1000_82576:
2377		pba = E1000_PBA_64K;
2378		break;
2379	case e1000_82580:
2380		pba = E1000_PBA_35K;
2381	default:
2382		break;
2383	}
2384
2385	/* Special needs in case of Jumbo frames */
2386	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2387		u32 tx_space, min_tx, min_rx;
2388		pba = E1000_READ_REG(hw, E1000_PBA);
2389		tx_space = pba >> 16;
2390		pba &= 0xffff;
2391		min_tx = (adapter->max_frame_size +
2392		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2393		min_tx = roundup2(min_tx, 1024);
2394		min_tx >>= 10;
2395                min_rx = adapter->max_frame_size;
2396                min_rx = roundup2(min_rx, 1024);
2397                min_rx >>= 10;
2398		if (tx_space < min_tx &&
2399		    ((min_tx - tx_space) < pba)) {
2400			pba = pba - (min_tx - tx_space);
2401			/*
2402                         * if short on rx space, rx wins
2403                         * and must trump tx adjustment
2404			 */
2405                        if (pba < min_rx)
2406                                pba = min_rx;
2407		}
2408		E1000_WRITE_REG(hw, E1000_PBA, pba);
2409	}
2410
2411	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2412
2413	/*
2414	 * These parameters control the automatic generation (Tx) and
2415	 * response (Rx) to Ethernet PAUSE frames.
2416	 * - High water mark should allow for at least two frames to be
2417	 *   received after sending an XOFF.
2418	 * - Low water mark works best when it is very near the high water mark.
2419	 *   This allows the receiver to restart by sending XON when it has
2420	 *   drained a bit.
2421	 */
2422	hwm = min(((pba << 10) * 9 / 10),
2423	    ((pba << 10) - 2 * adapter->max_frame_size));
2424
2425	if (hw->mac.type < e1000_82576) {
2426		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2427		fc->low_water = fc->high_water - 8;
2428	} else {
2429		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2430		fc->low_water = fc->high_water - 16;
2431	}
2432
2433	fc->pause_time = IGB_FC_PAUSE_TIME;
2434	fc->send_xon = TRUE;
2435
2436	/* Set Flow control, use the tunable location if sane */
2437	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2438		fc->requested_mode = igb_fc_setting;
2439	else
2440		fc->requested_mode = e1000_fc_none;
2441
2442	fc->current_mode = fc->requested_mode;
2443
2444	/* Issue a global reset */
2445	e1000_reset_hw(hw);
2446	E1000_WRITE_REG(hw, E1000_WUC, 0);
2447
2448	if (e1000_init_hw(hw) < 0)
2449		device_printf(dev, "Hardware Initialization Failed\n");
2450
2451	if (hw->mac.type == e1000_82580) {
2452		u32 reg;
2453
2454		hwm = (pba << 10) - (2 * adapter->max_frame_size);
2455		/*
2456		 * 0x80000000 - enable DMA COAL
2457		 * 0x10000000 - use L0s as low power
2458		 * 0x20000000 - use L1 as low power
2459		 * X << 16 - exit dma coal when rx data exceeds X kB
2460		 * Y - upper limit to stay in dma coal in units of 32usecs
2461		 */
2462		E1000_WRITE_REG(hw, E1000_DMACR,
2463		    0xA0000006 | ((hwm << 6) & 0x00FF0000));
2464
2465		/* set hwm to PBA -  2 * max frame size */
2466		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2467		/*
2468		 * This sets the time to wait before requesting transition to
2469		 * low power state to number of usecs needed to receive 1 512
2470		 * byte frame at gigabit line rate
2471		 */
2472		E1000_WRITE_REG(hw, E1000_DMCTLX, 4);
2473
2474		/* free space in tx packet buffer to wake from DMA coal */
2475		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2476		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2477
2478		/* make low power state decision controlled by DMA coal */
2479		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2480		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2481		    reg | E1000_PCIEMISC_LX_DECISION);
2482	}
2483
2484	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2485	e1000_get_phy_info(hw);
2486	e1000_check_for_link(hw);
2487	return;
2488}
2489
2490/*********************************************************************
2491 *
2492 *  Setup networking device structure and register an interface.
2493 *
2494 **********************************************************************/
2495static void
2496igb_setup_interface(device_t dev, struct adapter *adapter)
2497{
2498	struct ifnet   *ifp;
2499
2500	INIT_DEBUGOUT("igb_setup_interface: begin");
2501
2502	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2503	if (ifp == NULL)
2504		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2505	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2506	ifp->if_mtu = ETHERMTU;
2507	ifp->if_init =  igb_init;
2508	ifp->if_softc = adapter;
2509	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2510	ifp->if_ioctl = igb_ioctl;
2511	ifp->if_start = igb_start;
2512#if __FreeBSD_version >= 800000
2513	ifp->if_transmit = igb_mq_start;
2514	ifp->if_qflush = igb_qflush;
2515#endif
2516	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2517	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2518	IFQ_SET_READY(&ifp->if_snd);
2519
2520	ether_ifattach(ifp, adapter->hw.mac.addr);
2521
2522	ifp->if_capabilities = ifp->if_capenable = 0;
2523
2524	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2525	ifp->if_capabilities |= IFCAP_TSO4;
2526	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2527	if (igb_header_split)
2528		ifp->if_capabilities |= IFCAP_LRO;
2529
2530	ifp->if_capenable = ifp->if_capabilities;
2531
2532	/*
2533	 * Tell the upper layer(s) we support long frames.
2534	 */
2535	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2536	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2537	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2538
2539	/*
2540	 * Specify the media types supported by this adapter and register
2541	 * callbacks to update media and link information
2542	 */
2543	ifmedia_init(&adapter->media, IFM_IMASK,
2544	    igb_media_change, igb_media_status);
2545	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2546	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2547		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2548			    0, NULL);
2549		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2550	} else {
2551		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2552		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2553			    0, NULL);
2554		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2555			    0, NULL);
2556		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2557			    0, NULL);
2558		if (adapter->hw.phy.type != e1000_phy_ife) {
2559			ifmedia_add(&adapter->media,
2560				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2561			ifmedia_add(&adapter->media,
2562				IFM_ETHER | IFM_1000_T, 0, NULL);
2563		}
2564	}
2565	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2566	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2567}
2568
2569
2570/*
2571 * Manage DMA'able memory.
2572 */
2573static void
2574igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2575{
2576	if (error)
2577		return;
2578	*(bus_addr_t *) arg = segs[0].ds_addr;
2579}
2580
2581static int
2582igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2583        struct igb_dma_alloc *dma, int mapflags)
2584{
2585	int error;
2586
2587	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2588				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2589				BUS_SPACE_MAXADDR,	/* lowaddr */
2590				BUS_SPACE_MAXADDR,	/* highaddr */
2591				NULL, NULL,		/* filter, filterarg */
2592				size,			/* maxsize */
2593				1,			/* nsegments */
2594				size,			/* maxsegsize */
2595				0,			/* flags */
2596				NULL,			/* lockfunc */
2597				NULL,			/* lockarg */
2598				&dma->dma_tag);
2599	if (error) {
2600		device_printf(adapter->dev,
2601		    "%s: bus_dma_tag_create failed: %d\n",
2602		    __func__, error);
2603		goto fail_0;
2604	}
2605
2606	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2607	    BUS_DMA_NOWAIT, &dma->dma_map);
2608	if (error) {
2609		device_printf(adapter->dev,
2610		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2611		    __func__, (uintmax_t)size, error);
2612		goto fail_2;
2613	}
2614
2615	dma->dma_paddr = 0;
2616	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2617	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2618	if (error || dma->dma_paddr == 0) {
2619		device_printf(adapter->dev,
2620		    "%s: bus_dmamap_load failed: %d\n",
2621		    __func__, error);
2622		goto fail_3;
2623	}
2624
2625	return (0);
2626
2627fail_3:
2628	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2629fail_2:
2630	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2631	bus_dma_tag_destroy(dma->dma_tag);
2632fail_0:
2633	dma->dma_map = NULL;
2634	dma->dma_tag = NULL;
2635
2636	return (error);
2637}
2638
2639static void
2640igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2641{
2642	if (dma->dma_tag == NULL)
2643		return;
2644	if (dma->dma_map != NULL) {
2645		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2646		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2647		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2648		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2649		dma->dma_map = NULL;
2650	}
2651	bus_dma_tag_destroy(dma->dma_tag);
2652	dma->dma_tag = NULL;
2653}
2654
2655
2656/*********************************************************************
2657 *
2658 *  Allocate memory for the transmit and receive rings, and then
2659 *  the descriptors associated with each, called only once at attach.
2660 *
2661 **********************************************************************/
2662static int
2663igb_allocate_queues(struct adapter *adapter)
2664{
2665	device_t dev = adapter->dev;
2666	struct igb_queue	*que = NULL;
2667	struct tx_ring		*txr = NULL;
2668	struct rx_ring		*rxr = NULL;
2669	int rsize, tsize, error = E1000_SUCCESS;
2670	int txconf = 0, rxconf = 0;
2671
2672	/* First allocate the top level queue structs */
2673	if (!(adapter->queues =
2674	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
2675	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2676		device_printf(dev, "Unable to allocate queue memory\n");
2677		error = ENOMEM;
2678		goto fail;
2679	}
2680
2681	/* Next allocate the TX ring struct memory */
2682	if (!(adapter->tx_rings =
2683	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2684	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2685		device_printf(dev, "Unable to allocate TX ring memory\n");
2686		error = ENOMEM;
2687		goto tx_fail;
2688	}
2689
2690	/* Now allocate the RX */
2691	if (!(adapter->rx_rings =
2692	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2693	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2694		device_printf(dev, "Unable to allocate RX ring memory\n");
2695		error = ENOMEM;
2696		goto rx_fail;
2697	}
2698
2699	tsize = roundup2(adapter->num_tx_desc *
2700	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2701	/*
2702	 * Now set up the TX queues, txconf is needed to handle the
2703	 * possibility that things fail midcourse and we need to
2704	 * undo memory gracefully
2705	 */
2706	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2707		/* Set up some basics */
2708		txr = &adapter->tx_rings[i];
2709		txr->adapter = adapter;
2710		txr->me = i;
2711
2712		/* Initialize the TX lock */
2713		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2714		    device_get_nameunit(dev), txr->me);
2715		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2716
2717		if (igb_dma_malloc(adapter, tsize,
2718			&txr->txdma, BUS_DMA_NOWAIT)) {
2719			device_printf(dev,
2720			    "Unable to allocate TX Descriptor memory\n");
2721			error = ENOMEM;
2722			goto err_tx_desc;
2723		}
2724		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2725		bzero((void *)txr->tx_base, tsize);
2726
2727        	/* Now allocate transmit buffers for the ring */
2728        	if (igb_allocate_transmit_buffers(txr)) {
2729			device_printf(dev,
2730			    "Critical Failure setting up transmit buffers\n");
2731			error = ENOMEM;
2732			goto err_tx_desc;
2733        	}
2734#if __FreeBSD_version >= 800000
2735		/* Allocate a buf ring */
2736		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2737		    M_WAITOK, &txr->tx_mtx);
2738#endif
2739	}
2740
2741	/*
2742	 * Next the RX queues...
2743	 */
2744	rsize = roundup2(adapter->num_rx_desc *
2745	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2746	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2747		rxr = &adapter->rx_rings[i];
2748		rxr->adapter = adapter;
2749		rxr->me = i;
2750
2751		/* Initialize the RX lock */
2752		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2753		    device_get_nameunit(dev), txr->me);
2754		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2755
2756		if (igb_dma_malloc(adapter, rsize,
2757			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2758			device_printf(dev,
2759			    "Unable to allocate RxDescriptor memory\n");
2760			error = ENOMEM;
2761			goto err_rx_desc;
2762		}
2763		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2764		bzero((void *)rxr->rx_base, rsize);
2765
2766        	/* Allocate receive buffers for the ring*/
2767		if (igb_allocate_receive_buffers(rxr)) {
2768			device_printf(dev,
2769			    "Critical Failure setting up receive buffers\n");
2770			error = ENOMEM;
2771			goto err_rx_desc;
2772		}
2773	}
2774
2775	/*
2776	** Finally set up the queue holding structs
2777	*/
2778	for (int i = 0; i < adapter->num_queues; i++) {
2779		que = &adapter->queues[i];
2780		que->adapter = adapter;
2781		que->txr = &adapter->tx_rings[i];
2782		que->rxr = &adapter->rx_rings[i];
2783	}
2784
2785	return (0);
2786
2787err_rx_desc:
2788	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2789		igb_dma_free(adapter, &rxr->rxdma);
2790err_tx_desc:
2791	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2792		igb_dma_free(adapter, &txr->txdma);
2793	free(adapter->rx_rings, M_DEVBUF);
2794rx_fail:
2795	buf_ring_free(txr->br, M_DEVBUF);
2796	free(adapter->tx_rings, M_DEVBUF);
2797tx_fail:
2798	free(adapter->queues, M_DEVBUF);
2799fail:
2800	return (error);
2801}
2802
2803/*********************************************************************
2804 *
2805 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2806 *  the information needed to transmit a packet on the wire. This is
2807 *  called only once at attach, setup is done every reset.
2808 *
2809 **********************************************************************/
2810static int
2811igb_allocate_transmit_buffers(struct tx_ring *txr)
2812{
2813	struct adapter *adapter = txr->adapter;
2814	device_t dev = adapter->dev;
2815	struct igb_tx_buffer *txbuf;
2816	int error, i;
2817
2818	/*
2819	 * Setup DMA descriptor areas.
2820	 */
2821	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
2822			       1, 0,			/* alignment, bounds */
2823			       BUS_SPACE_MAXADDR,	/* lowaddr */
2824			       BUS_SPACE_MAXADDR,	/* highaddr */
2825			       NULL, NULL,		/* filter, filterarg */
2826			       IGB_TSO_SIZE,		/* maxsize */
2827			       IGB_MAX_SCATTER,		/* nsegments */
2828			       PAGE_SIZE,		/* maxsegsize */
2829			       0,			/* flags */
2830			       NULL,			/* lockfunc */
2831			       NULL,			/* lockfuncarg */
2832			       &txr->txtag))) {
2833		device_printf(dev,"Unable to allocate TX DMA tag\n");
2834		goto fail;
2835	}
2836
2837	if (!(txr->tx_buffers =
2838	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
2839	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2840		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2841		error = ENOMEM;
2842		goto fail;
2843	}
2844
2845        /* Create the descriptor buffer dma maps */
2846	txbuf = txr->tx_buffers;
2847	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2848		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2849		if (error != 0) {
2850			device_printf(dev, "Unable to create TX DMA map\n");
2851			goto fail;
2852		}
2853	}
2854
2855	return 0;
2856fail:
2857	/* We free all, it handles case where we are in the middle */
2858	igb_free_transmit_structures(adapter);
2859	return (error);
2860}
2861
2862/*********************************************************************
2863 *
2864 *  Initialize a transmit ring.
2865 *
2866 **********************************************************************/
2867static void
2868igb_setup_transmit_ring(struct tx_ring *txr)
2869{
2870	struct adapter *adapter = txr->adapter;
2871	struct igb_tx_buffer *txbuf;
2872	int i;
2873
2874	/* Clear the old descriptor contents */
2875	IGB_TX_LOCK(txr);
2876	bzero((void *)txr->tx_base,
2877	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2878	/* Reset indices */
2879	txr->next_avail_desc = 0;
2880	txr->next_to_clean = 0;
2881
2882	/* Free any existing tx buffers. */
2883        txbuf = txr->tx_buffers;
2884	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2885		if (txbuf->m_head != NULL) {
2886			bus_dmamap_sync(txr->txtag, txbuf->map,
2887			    BUS_DMASYNC_POSTWRITE);
2888			bus_dmamap_unload(txr->txtag, txbuf->map);
2889			m_freem(txbuf->m_head);
2890			txbuf->m_head = NULL;
2891		}
2892		/* clear the watch index */
2893		txbuf->next_eop = -1;
2894        }
2895
2896	/* Set number of descriptors available */
2897	txr->tx_avail = adapter->num_tx_desc;
2898
2899	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2900	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2901	IGB_TX_UNLOCK(txr);
2902}
2903
2904/*********************************************************************
2905 *
2906 *  Initialize all transmit rings.
2907 *
2908 **********************************************************************/
2909static void
2910igb_setup_transmit_structures(struct adapter *adapter)
2911{
2912	struct tx_ring *txr = adapter->tx_rings;
2913
2914	for (int i = 0; i < adapter->num_queues; i++, txr++)
2915		igb_setup_transmit_ring(txr);
2916
2917	return;
2918}
2919
2920/*********************************************************************
2921 *
2922 *  Enable transmit unit.
2923 *
2924 **********************************************************************/
2925static void
2926igb_initialize_transmit_units(struct adapter *adapter)
2927{
2928	struct tx_ring	*txr = adapter->tx_rings;
2929	struct e1000_hw *hw = &adapter->hw;
2930	u32		tctl, txdctl;
2931
2932	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2933
2934	/* Setup the Tx Descriptor Rings */
2935	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2936		u64 bus_addr = txr->txdma.dma_paddr;
2937
2938		E1000_WRITE_REG(hw, E1000_TDLEN(i),
2939		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2940		E1000_WRITE_REG(hw, E1000_TDBAH(i),
2941		    (uint32_t)(bus_addr >> 32));
2942		E1000_WRITE_REG(hw, E1000_TDBAL(i),
2943		    (uint32_t)bus_addr);
2944
2945		/* Setup the HW Tx Head and Tail descriptor pointers */
2946		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2947		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2948
2949		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2950		    E1000_READ_REG(hw, E1000_TDBAL(i)),
2951		    E1000_READ_REG(hw, E1000_TDLEN(i)));
2952
2953		txr->watchdog_check = FALSE;
2954
2955		txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2956		txdctl |= IGB_TX_PTHRESH;
2957		txdctl |= IGB_TX_HTHRESH << 8;
2958		txdctl |= IGB_TX_WTHRESH << 16;
2959		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2960		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2961	}
2962
2963	/* Program the Transmit Control Register */
2964	tctl = E1000_READ_REG(hw, E1000_TCTL);
2965	tctl &= ~E1000_TCTL_CT;
2966	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2967		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2968
2969	e1000_config_collision_dist(hw);
2970
2971	/* This write will effectively turn on the transmit unit. */
2972	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2973}
2974
2975/*********************************************************************
2976 *
2977 *  Free all transmit rings.
2978 *
2979 **********************************************************************/
2980static void
2981igb_free_transmit_structures(struct adapter *adapter)
2982{
2983	struct tx_ring *txr = adapter->tx_rings;
2984
2985	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2986		IGB_TX_LOCK(txr);
2987		igb_free_transmit_buffers(txr);
2988		igb_dma_free(adapter, &txr->txdma);
2989		IGB_TX_UNLOCK(txr);
2990		IGB_TX_LOCK_DESTROY(txr);
2991	}
2992	free(adapter->tx_rings, M_DEVBUF);
2993}
2994
2995/*********************************************************************
2996 *
2997 *  Free transmit ring related data structures.
2998 *
2999 **********************************************************************/
3000static void
3001igb_free_transmit_buffers(struct tx_ring *txr)
3002{
3003	struct adapter *adapter = txr->adapter;
3004	struct igb_tx_buffer *tx_buffer;
3005	int             i;
3006
3007	INIT_DEBUGOUT("free_transmit_ring: begin");
3008
3009	if (txr->tx_buffers == NULL)
3010		return;
3011
3012	tx_buffer = txr->tx_buffers;
3013	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3014		if (tx_buffer->m_head != NULL) {
3015			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3016			    BUS_DMASYNC_POSTWRITE);
3017			bus_dmamap_unload(txr->txtag,
3018			    tx_buffer->map);
3019			m_freem(tx_buffer->m_head);
3020			tx_buffer->m_head = NULL;
3021			if (tx_buffer->map != NULL) {
3022				bus_dmamap_destroy(txr->txtag,
3023				    tx_buffer->map);
3024				tx_buffer->map = NULL;
3025			}
3026		} else if (tx_buffer->map != NULL) {
3027			bus_dmamap_unload(txr->txtag,
3028			    tx_buffer->map);
3029			bus_dmamap_destroy(txr->txtag,
3030			    tx_buffer->map);
3031			tx_buffer->map = NULL;
3032		}
3033	}
3034#if __FreeBSD_version >= 800000
3035	if (txr->br != NULL)
3036		buf_ring_free(txr->br, M_DEVBUF);
3037#endif
3038	if (txr->tx_buffers != NULL) {
3039		free(txr->tx_buffers, M_DEVBUF);
3040		txr->tx_buffers = NULL;
3041	}
3042	if (txr->txtag != NULL) {
3043		bus_dma_tag_destroy(txr->txtag);
3044		txr->txtag = NULL;
3045	}
3046	return;
3047}
3048
3049/**********************************************************************
3050 *
3051 *  Setup work for hardware segmentation offload (TSO)
3052 *
3053 **********************************************************************/
3054static boolean_t
3055igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3056{
3057	struct adapter *adapter = txr->adapter;
3058	struct e1000_adv_tx_context_desc *TXD;
3059	struct igb_tx_buffer        *tx_buffer;
3060	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3061	u32 mss_l4len_idx = 0;
3062	u16 vtag = 0;
3063	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3064	struct ether_vlan_header *eh;
3065	struct ip *ip;
3066	struct tcphdr *th;
3067
3068
3069	/*
3070	 * Determine where frame payload starts.
3071	 * Jump over vlan headers if already present
3072	 */
3073	eh = mtod(mp, struct ether_vlan_header *);
3074	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3075		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3076	else
3077		ehdrlen = ETHER_HDR_LEN;
3078
3079	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3080	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3081		return FALSE;
3082
3083	/* Only supports IPV4 for now */
3084	ctxd = txr->next_avail_desc;
3085	tx_buffer = &txr->tx_buffers[ctxd];
3086	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3087
3088	ip = (struct ip *)(mp->m_data + ehdrlen);
3089	if (ip->ip_p != IPPROTO_TCP)
3090                return FALSE;   /* 0 */
3091	ip->ip_sum = 0;
3092	ip_hlen = ip->ip_hl << 2;
3093	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3094	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3095	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3096	tcp_hlen = th->th_off << 2;
3097	/*
3098	 * Calculate header length, this is used
3099	 * in the transmit desc in igb_xmit
3100	 */
3101	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3102
3103	/* VLAN MACLEN IPLEN */
3104	if (mp->m_flags & M_VLANTAG) {
3105		vtag = htole16(mp->m_pkthdr.ether_vtag);
3106		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3107	}
3108
3109	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3110	vlan_macip_lens |= ip_hlen;
3111	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3112
3113	/* ADV DTYPE TUCMD */
3114	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3115	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3116	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3117	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3118
3119	/* MSS L4LEN IDX */
3120	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3121	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3122	/* 82575 needs the queue index added */
3123	if (adapter->hw.mac.type == e1000_82575)
3124		mss_l4len_idx |= txr->me << 4;
3125	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3126
3127	TXD->seqnum_seed = htole32(0);
3128	tx_buffer->m_head = NULL;
3129	tx_buffer->next_eop = -1;
3130
3131	if (++ctxd == adapter->num_tx_desc)
3132		ctxd = 0;
3133
3134	txr->tx_avail--;
3135	txr->next_avail_desc = ctxd;
3136	return TRUE;
3137}
3138
3139
3140/*********************************************************************
3141 *
3142 *  Context Descriptor setup for VLAN or CSUM
3143 *
3144 **********************************************************************/
3145
3146static bool
3147igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3148{
3149	struct adapter *adapter = txr->adapter;
3150	struct e1000_adv_tx_context_desc *TXD;
3151	struct igb_tx_buffer        *tx_buffer;
3152	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3153	struct ether_vlan_header *eh;
3154	struct ip *ip = NULL;
3155	struct ip6_hdr *ip6;
3156	int  ehdrlen, ctxd, ip_hlen = 0;
3157	u16	etype, vtag = 0;
3158	u8	ipproto = 0;
3159	bool	offload = TRUE;
3160
3161	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3162		offload = FALSE;
3163
3164	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3165	ctxd = txr->next_avail_desc;
3166	tx_buffer = &txr->tx_buffers[ctxd];
3167	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3168
3169	/*
3170	** In advanced descriptors the vlan tag must
3171	** be placed into the context descriptor, thus
3172	** we need to be here just for that setup.
3173	*/
3174	if (mp->m_flags & M_VLANTAG) {
3175		vtag = htole16(mp->m_pkthdr.ether_vtag);
3176		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3177	} else if (offload == FALSE)
3178		return FALSE;
3179
3180	/*
3181	 * Determine where frame payload starts.
3182	 * Jump over vlan headers if already present,
3183	 * helpful for QinQ too.
3184	 */
3185	eh = mtod(mp, struct ether_vlan_header *);
3186	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3187		etype = ntohs(eh->evl_proto);
3188		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3189	} else {
3190		etype = ntohs(eh->evl_encap_proto);
3191		ehdrlen = ETHER_HDR_LEN;
3192	}
3193
3194	/* Set the ether header length */
3195	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3196
3197	switch (etype) {
3198		case ETHERTYPE_IP:
3199			ip = (struct ip *)(mp->m_data + ehdrlen);
3200			ip_hlen = ip->ip_hl << 2;
3201			if (mp->m_len < ehdrlen + ip_hlen) {
3202				offload = FALSE;
3203				break;
3204			}
3205			ipproto = ip->ip_p;
3206			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3207			break;
3208		case ETHERTYPE_IPV6:
3209			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3210			ip_hlen = sizeof(struct ip6_hdr);
3211			if (mp->m_len < ehdrlen + ip_hlen)
3212				return (FALSE);
3213			ipproto = ip6->ip6_nxt;
3214			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3215			break;
3216		default:
3217			offload = FALSE;
3218			break;
3219	}
3220
3221	vlan_macip_lens |= ip_hlen;
3222	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3223
3224	switch (ipproto) {
3225		case IPPROTO_TCP:
3226			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3227				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3228			break;
3229		case IPPROTO_UDP:
3230			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3231				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3232			break;
3233#if __FreeBSD_version >= 800000
3234		case IPPROTO_SCTP:
3235			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3236				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3237			break;
3238#endif
3239		default:
3240			offload = FALSE;
3241			break;
3242	}
3243
3244	/* 82575 needs the queue index added */
3245	if (adapter->hw.mac.type == e1000_82575)
3246		mss_l4len_idx = txr->me << 4;
3247
3248	/* Now copy bits into descriptor */
3249	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3250	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3251	TXD->seqnum_seed = htole32(0);
3252	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3253
3254	tx_buffer->m_head = NULL;
3255	tx_buffer->next_eop = -1;
3256
3257	/* We've consumed the first desc, adjust counters */
3258	if (++ctxd == adapter->num_tx_desc)
3259		ctxd = 0;
3260	txr->next_avail_desc = ctxd;
3261	--txr->tx_avail;
3262
3263        return (offload);
3264}
3265
3266
3267/**********************************************************************
3268 *
3269 *  Examine each tx_buffer in the used queue. If the hardware is done
3270 *  processing the packet then free associated resources. The
3271 *  tx_buffer is put back on the free queue.
3272 *
3273 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3274 **********************************************************************/
3275static bool
3276igb_txeof(struct tx_ring *txr)
3277{
3278	struct adapter	*adapter = txr->adapter;
3279        int first, last, done;
3280        struct igb_tx_buffer *tx_buffer;
3281        struct e1000_tx_desc   *tx_desc, *eop_desc;
3282	struct ifnet   *ifp = adapter->ifp;
3283
3284	IGB_TX_LOCK_ASSERT(txr);
3285
3286        if (txr->tx_avail == adapter->num_tx_desc)
3287                return FALSE;
3288
3289        first = txr->next_to_clean;
3290        tx_desc = &txr->tx_base[first];
3291        tx_buffer = &txr->tx_buffers[first];
3292	last = tx_buffer->next_eop;
3293        eop_desc = &txr->tx_base[last];
3294
3295	/*
3296	 * What this does is get the index of the
3297	 * first descriptor AFTER the EOP of the
3298	 * first packet, that way we can do the
3299	 * simple comparison on the inner while loop.
3300	 */
3301	if (++last == adapter->num_tx_desc)
3302 		last = 0;
3303	done = last;
3304
3305        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3306            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3307
3308        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3309		/* We clean the range of the packet */
3310		while (first != done) {
3311                	tx_desc->upper.data = 0;
3312                	tx_desc->lower.data = 0;
3313                	tx_desc->buffer_addr = 0;
3314                	++txr->tx_avail;
3315
3316			if (tx_buffer->m_head) {
3317				txr->bytes +=
3318				    tx_buffer->m_head->m_pkthdr.len;
3319				bus_dmamap_sync(txr->txtag,
3320				    tx_buffer->map,
3321				    BUS_DMASYNC_POSTWRITE);
3322				bus_dmamap_unload(txr->txtag,
3323				    tx_buffer->map);
3324
3325                        	m_freem(tx_buffer->m_head);
3326                        	tx_buffer->m_head = NULL;
3327                	}
3328			tx_buffer->next_eop = -1;
3329			txr->watchdog_time = ticks;
3330
3331	                if (++first == adapter->num_tx_desc)
3332				first = 0;
3333
3334	                tx_buffer = &txr->tx_buffers[first];
3335			tx_desc = &txr->tx_base[first];
3336		}
3337		++txr->packets;
3338		++ifp->if_opackets;
3339		/* See if we can continue to the next packet */
3340		last = tx_buffer->next_eop;
3341		if (last != -1) {
3342        		eop_desc = &txr->tx_base[last];
3343			/* Get new done point */
3344			if (++last == adapter->num_tx_desc) last = 0;
3345			done = last;
3346		} else
3347			break;
3348        }
3349        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3350            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3351
3352        txr->next_to_clean = first;
3353
3354        /*
3355         * If we have enough room, clear IFF_DRV_OACTIVE
3356         * to tell the stack that it is OK to send packets.
3357         */
3358        if (txr->tx_avail > IGB_TX_CLEANUP_THRESHOLD) {
3359                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3360		/* All clean, turn off the watchdog */
3361                if (txr->tx_avail == adapter->num_tx_desc) {
3362			txr->watchdog_check = FALSE;
3363			return FALSE;
3364		}
3365        }
3366
3367	return (TRUE);
3368}
3369
3370
3371/*********************************************************************
3372 *
3373 *  Setup descriptor buffer(s) from system mbuf buffer pools.
3374 *  		i - designates the ring index
3375 *		clean - tells the function whether to update
3376 *		        the header, the packet buffer, or both.
3377 *
3378 **********************************************************************/
3379static int
3380igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3381{
3382	struct adapter		*adapter = rxr->adapter;
3383	struct igb_rx_buf	*rxbuf;
3384	struct mbuf		*mh, *mp;
3385	bus_dma_segment_t	hseg[1];
3386	bus_dma_segment_t	pseg[1];
3387	bus_dmamap_t		map;
3388	int			nsegs, error;
3389
3390
3391	rxbuf = &rxr->rx_buffers[i];
3392	mh = mp = NULL;
3393	if ((clean & IGB_CLEAN_HEADER) != 0) {
3394		mh = m_gethdr(M_DONTWAIT, MT_DATA);
3395		if (mh == NULL) {
3396			adapter->mbuf_header_failed++;
3397			return (ENOBUFS);
3398		}
3399		mh->m_pkthdr.len = mh->m_len = MHLEN;
3400		/*
3401		 * Because IGB_HDR_BUF size is less than MHLEN
3402		 * and we configure controller to split headers
3403		 * we can align mbuf on ETHER_ALIGN boundary.
3404		 */
3405		m_adj(mh, ETHER_ALIGN);
3406		error = bus_dmamap_load_mbuf_sg(rxr->rx_htag,
3407		    rxr->rx_hspare_map, mh, hseg, &nsegs, 0);
3408		if (error != 0) {
3409			m_freem(mh);
3410			return (error);
3411		}
3412		mh->m_flags &= ~M_PKTHDR;
3413	}
3414	if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3415		mp = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR,
3416		    adapter->rx_mbuf_sz);
3417		if (mp == NULL) {
3418			if (mh != NULL) {
3419				adapter->mbuf_packet_failed++;
3420				bus_dmamap_unload(rxr->rx_htag,
3421				    rxbuf->head_map);
3422				mh->m_flags |= M_PKTHDR;
3423				m_freem(mh);
3424			}
3425			return (ENOBUFS);
3426		}
3427		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3428		error = bus_dmamap_load_mbuf_sg(rxr->rx_ptag,
3429		    rxr->rx_pspare_map, mp, pseg, &nsegs, 0);
3430		if (error != 0) {
3431			if (mh != NULL) {
3432				bus_dmamap_unload(rxr->rx_htag,
3433				    rxbuf->head_map);
3434				mh->m_flags |= M_PKTHDR;
3435				m_freem(mh);
3436			}
3437			m_freem(mp);
3438			return (error);
3439		}
3440		mp->m_flags &= ~M_PKTHDR;
3441	}
3442
3443	/* Loading new DMA maps complete, unload maps for received buffers. */
3444	if ((clean & IGB_CLEAN_HEADER) != 0 && rxbuf->m_head != NULL) {
3445		bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3446		    BUS_DMASYNC_POSTREAD);
3447		bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3448	}
3449	if ((clean & IGB_CLEAN_PAYLOAD) != 0 && rxbuf->m_pack != NULL) {
3450		bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3451		    BUS_DMASYNC_POSTREAD);
3452		bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3453	}
3454
3455	/* Reflect loaded dmamaps. */
3456	if ((clean & IGB_CLEAN_HEADER) != 0) {
3457		map = rxbuf->head_map;
3458		rxbuf->head_map = rxr->rx_hspare_map;
3459		rxr->rx_hspare_map = map;
3460		rxbuf->m_head = mh;
3461		bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3462		    BUS_DMASYNC_PREREAD);
3463		rxr->rx_base[i].read.hdr_addr = htole64(hseg[0].ds_addr);
3464	}
3465	if ((clean & IGB_CLEAN_PAYLOAD) != 0) {
3466		map = rxbuf->pack_map;
3467		rxbuf->pack_map = rxr->rx_pspare_map;
3468		rxr->rx_pspare_map = map;
3469		rxbuf->m_pack = mp;
3470		bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3471		    BUS_DMASYNC_PREREAD);
3472		rxr->rx_base[i].read.pkt_addr = htole64(pseg[0].ds_addr);
3473	}
3474
3475	return (0);
3476}
3477
3478/*********************************************************************
3479 *
3480 *  Allocate memory for rx_buffer structures. Since we use one
3481 *  rx_buffer per received packet, the maximum number of rx_buffer's
3482 *  that we'll need is equal to the number of receive descriptors
3483 *  that we've allocated.
3484 *
3485 **********************************************************************/
3486static int
3487igb_allocate_receive_buffers(struct rx_ring *rxr)
3488{
3489	struct	adapter 	*adapter = rxr->adapter;
3490	device_t 		dev = adapter->dev;
3491	struct igb_rx_buf	*rxbuf;
3492	int             	i, bsize, error;
3493
3494	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3495	if (!(rxr->rx_buffers =
3496	    (struct igb_rx_buf *) malloc(bsize,
3497	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3498		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3499		error = ENOMEM;
3500		goto fail;
3501	}
3502
3503	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3504				   1, 0,		/* alignment, bounds */
3505				   BUS_SPACE_MAXADDR,	/* lowaddr */
3506				   BUS_SPACE_MAXADDR,	/* highaddr */
3507				   NULL, NULL,		/* filter, filterarg */
3508				   MSIZE,		/* maxsize */
3509				   1,			/* nsegments */
3510				   MSIZE,		/* maxsegsize */
3511				   0,			/* flags */
3512				   NULL,		/* lockfunc */
3513				   NULL,		/* lockfuncarg */
3514				   &rxr->rx_htag))) {
3515		device_printf(dev, "Unable to create RX DMA tag\n");
3516		goto fail;
3517	}
3518
3519	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3520				   1, 0,		/* alignment, bounds */
3521				   BUS_SPACE_MAXADDR,	/* lowaddr */
3522				   BUS_SPACE_MAXADDR,	/* highaddr */
3523				   NULL, NULL,		/* filter, filterarg */
3524				   MJUMPAGESIZE,	/* maxsize */
3525				   1,			/* nsegments */
3526				   MJUMPAGESIZE,	/* maxsegsize */
3527				   0,			/* flags */
3528				   NULL,		/* lockfunc */
3529				   NULL,		/* lockfuncarg */
3530				   &rxr->rx_ptag))) {
3531		device_printf(dev, "Unable to create RX payload DMA tag\n");
3532		goto fail;
3533	}
3534
3535	/* Create the spare maps (used by getbuf) */
3536        error = bus_dmamap_create(rxr->rx_htag, BUS_DMA_NOWAIT,
3537	     &rxr->rx_hspare_map);
3538	if (error) {
3539		device_printf(dev,
3540		    "%s: bus_dmamap_create header spare failed: %d\n",
3541		    __func__, error);
3542		goto fail;
3543	}
3544        error = bus_dmamap_create(rxr->rx_ptag, BUS_DMA_NOWAIT,
3545	     &rxr->rx_pspare_map);
3546	if (error) {
3547		device_printf(dev,
3548		    "%s: bus_dmamap_create packet spare failed: %d\n",
3549		    __func__, error);
3550		goto fail;
3551	}
3552
3553	for (i = 0; i < adapter->num_rx_desc; i++) {
3554		rxbuf = &rxr->rx_buffers[i];
3555		error = bus_dmamap_create(rxr->rx_htag,
3556		    BUS_DMA_NOWAIT, &rxbuf->head_map);
3557		if (error) {
3558			device_printf(dev,
3559			    "Unable to create RX head DMA maps\n");
3560			goto fail;
3561		}
3562		error = bus_dmamap_create(rxr->rx_ptag,
3563		    BUS_DMA_NOWAIT, &rxbuf->pack_map);
3564		if (error) {
3565			device_printf(dev,
3566			    "Unable to create RX packet DMA maps\n");
3567			goto fail;
3568		}
3569	}
3570
3571	return (0);
3572
3573fail:
3574	/* Frees all, but can handle partial completion */
3575	igb_free_receive_structures(adapter);
3576	return (error);
3577}
3578
3579
3580static void
3581igb_free_receive_ring(struct rx_ring *rxr)
3582{
3583	struct	adapter		*adapter;
3584	struct igb_rx_buf	*rxbuf;
3585	int i;
3586
3587	adapter = rxr->adapter;
3588	for (i = 0; i < adapter->num_rx_desc; i++) {
3589		rxbuf = &rxr->rx_buffers[i];
3590		if (rxbuf->m_head != NULL) {
3591			bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3592			    BUS_DMASYNC_POSTREAD);
3593			bus_dmamap_unload(rxr->rx_htag, rxbuf->head_map);
3594			rxbuf->m_head->m_flags |= M_PKTHDR;
3595			m_freem(rxbuf->m_head);
3596		}
3597		if (rxbuf->m_pack != NULL) {
3598			bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3599			    BUS_DMASYNC_POSTREAD);
3600			bus_dmamap_unload(rxr->rx_ptag, rxbuf->pack_map);
3601			rxbuf->m_pack->m_flags |= M_PKTHDR;
3602			m_freem(rxbuf->m_pack);
3603		}
3604		rxbuf->m_head = NULL;
3605		rxbuf->m_pack = NULL;
3606	}
3607}
3608
3609
3610/*********************************************************************
3611 *
3612 *  Initialize a receive ring and its buffers.
3613 *
3614 **********************************************************************/
3615static int
3616igb_setup_receive_ring(struct rx_ring *rxr)
3617{
3618	struct	adapter		*adapter;
3619	struct  ifnet		*ifp;
3620	device_t		dev;
3621	struct lro_ctrl		*lro = &rxr->lro;
3622	int			j, rsize, error = 0;
3623
3624	adapter = rxr->adapter;
3625	dev = adapter->dev;
3626	ifp = adapter->ifp;
3627
3628	/* Clear the ring contents */
3629	IGB_RX_LOCK(rxr);
3630	rsize = roundup2(adapter->num_rx_desc *
3631	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3632	bzero((void *)rxr->rx_base, rsize);
3633
3634	/*
3635	** Free current RX buffer structures and their mbufs
3636	*/
3637	igb_free_receive_ring(rxr);
3638
3639	/* Now replenish the ring mbufs */
3640	for (j = 0; j < adapter->num_rx_desc; j++) {
3641		if ((error = igb_get_buf(rxr, j, IGB_CLEAN_BOTH)) != 0)
3642			goto fail;
3643	}
3644
3645	/* Setup our descriptor indices */
3646	rxr->next_to_check = 0;
3647	rxr->last_cleaned = 0;
3648	rxr->lro_enabled = FALSE;
3649
3650	if (igb_header_split)
3651		rxr->hdr_split = TRUE;
3652	else
3653		ifp->if_capabilities &= ~IFCAP_LRO;
3654
3655	rxr->fmp = NULL;
3656	rxr->lmp = NULL;
3657	rxr->discard = FALSE;
3658
3659	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3660	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3661
3662	/*
3663	** Now set up the LRO interface, we
3664	** also only do head split when LRO
3665	** is enabled, since so often they
3666	** are undesireable in similar setups.
3667	*/
3668	if (ifp->if_capenable & IFCAP_LRO) {
3669		int err = tcp_lro_init(lro);
3670		if (err) {
3671			device_printf(dev, "LRO Initialization failed!\n");
3672			goto fail;
3673		}
3674		INIT_DEBUGOUT("RX LRO Initialized\n");
3675		rxr->lro_enabled = TRUE;
3676		lro->ifp = adapter->ifp;
3677	}
3678
3679	IGB_RX_UNLOCK(rxr);
3680	return (0);
3681
3682fail:
3683	igb_free_receive_ring(rxr);
3684	IGB_RX_UNLOCK(rxr);
3685	return (error);
3686}
3687
3688/*********************************************************************
3689 *
3690 *  Initialize all receive rings.
3691 *
3692 **********************************************************************/
3693static int
3694igb_setup_receive_structures(struct adapter *adapter)
3695{
3696	struct rx_ring *rxr = adapter->rx_rings;
3697	int i, j;
3698
3699	for (i = 0; i < adapter->num_queues; i++, rxr++)
3700		if (igb_setup_receive_ring(rxr))
3701			goto fail;
3702
3703	return (0);
3704fail:
3705	/*
3706	 * Free RX buffers allocated so far, we will only handle
3707	 * the rings that completed, the failing case will have
3708	 * cleaned up for itself. The value of 'i' will be the
3709	 * failed ring so we must pre-decrement it.
3710	 */
3711	rxr = adapter->rx_rings;
3712	for (--i; i > 0; i--, rxr++) {
3713		for (j = 0; j < adapter->num_rx_desc; j++)
3714			igb_free_receive_ring(rxr);
3715	}
3716
3717	return (ENOBUFS);
3718}
3719
3720/*********************************************************************
3721 *
3722 *  Enable receive unit.
3723 *
3724 **********************************************************************/
3725static void
3726igb_initialize_receive_units(struct adapter *adapter)
3727{
3728	struct rx_ring	*rxr = adapter->rx_rings;
3729	struct ifnet	*ifp = adapter->ifp;
3730	struct e1000_hw *hw = &adapter->hw;
3731	u32		rctl, rxcsum, psize, srrctl = 0;
3732
3733	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3734
3735	/*
3736	 * Make sure receives are disabled while setting
3737	 * up the descriptor ring
3738	 */
3739	rctl = E1000_READ_REG(hw, E1000_RCTL);
3740	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3741
3742	/*
3743	** Set up for header split
3744	*/
3745	if (rxr->hdr_split) {
3746		/* Use a standard mbuf for the header */
3747		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3748		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3749	} else
3750		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3751
3752	/*
3753	** Set up for jumbo frames
3754	*/
3755	if (ifp->if_mtu > ETHERMTU) {
3756		rctl |= E1000_RCTL_LPE;
3757		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3758		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3759
3760		/* Set maximum packet len */
3761		psize = adapter->max_frame_size;
3762		/* are we on a vlan? */
3763		if (adapter->ifp->if_vlantrunk != NULL)
3764			psize += VLAN_TAG_SIZE;
3765		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3766	} else {
3767		rctl &= ~E1000_RCTL_LPE;
3768		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3769		rctl |= E1000_RCTL_SZ_2048;
3770	}
3771
3772	/* Setup the Base and Length of the Rx Descriptor Rings */
3773	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3774		u64 bus_addr = rxr->rxdma.dma_paddr;
3775		u32 rxdctl;
3776
3777		E1000_WRITE_REG(hw, E1000_RDLEN(i),
3778		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3779		E1000_WRITE_REG(hw, E1000_RDBAH(i),
3780		    (uint32_t)(bus_addr >> 32));
3781		E1000_WRITE_REG(hw, E1000_RDBAL(i),
3782		    (uint32_t)bus_addr);
3783		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
3784		/* Enable this Queue */
3785		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
3786		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3787		rxdctl &= 0xFFF00000;
3788		rxdctl |= IGB_RX_PTHRESH;
3789		rxdctl |= IGB_RX_HTHRESH << 8;
3790		rxdctl |= IGB_RX_WTHRESH << 16;
3791		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
3792	}
3793
3794	/*
3795	** Setup for RX MultiQueue
3796	*/
3797	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
3798	if (adapter->num_queues >1) {
3799		u32 random[10], mrqc, shift = 0;
3800		union igb_reta {
3801			u32 dword;
3802			u8  bytes[4];
3803		} reta;
3804
3805		arc4rand(&random, sizeof(random), 0);
3806		if (adapter->hw.mac.type == e1000_82575)
3807			shift = 6;
3808		/* Warning FM follows */
3809		for (int i = 0; i < 128; i++) {
3810			reta.bytes[i & 3] =
3811			    (i % adapter->num_queues) << shift;
3812			if ((i & 3) == 3)
3813				E1000_WRITE_REG(hw,
3814				    E1000_RETA(i >> 2), reta.dword);
3815		}
3816		/* Now fill in hash table */
3817		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3818		for (int i = 0; i < 10; i++)
3819			E1000_WRITE_REG_ARRAY(hw,
3820			    E1000_RSSRK(0), i, random[i]);
3821
3822		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3823		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3824		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3825		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3826		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3827		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3828		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3829		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3830
3831		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
3832
3833		/*
3834		** NOTE: Receive Full-Packet Checksum Offload
3835		** is mutually exclusive with Multiqueue. However
3836		** this is not the same as TCP/IP checksums which
3837		** still work.
3838		*/
3839		rxcsum |= E1000_RXCSUM_PCSD;
3840#if __FreeBSD_version >= 800000
3841		/* For SCTP Offload */
3842		if ((hw->mac.type == e1000_82576)
3843		    && (ifp->if_capenable & IFCAP_RXCSUM))
3844			rxcsum |= E1000_RXCSUM_CRCOFL;
3845#endif
3846	} else {
3847		/* Non RSS setup */
3848		if (ifp->if_capenable & IFCAP_RXCSUM) {
3849			rxcsum |= E1000_RXCSUM_IPPCSE;
3850#if __FreeBSD_version >= 800000
3851			if (adapter->hw.mac.type == e1000_82576)
3852				rxcsum |= E1000_RXCSUM_CRCOFL;
3853#endif
3854		} else
3855			rxcsum &= ~E1000_RXCSUM_TUOFL;
3856	}
3857	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
3858
3859	/* Setup the Receive Control Register */
3860	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3861	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3862		   E1000_RCTL_RDMTS_HALF |
3863		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3864	/* Strip CRC bytes. */
3865	rctl |= E1000_RCTL_SECRC;
3866	/* Make sure VLAN Filters are off */
3867	rctl &= ~E1000_RCTL_VFE;
3868	/* Don't store bad packets */
3869	rctl &= ~E1000_RCTL_SBP;
3870
3871	/* Enable Receives */
3872	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
3873
3874	/*
3875	 * Setup the HW Rx Head and Tail Descriptor Pointers
3876	 *   - needs to be after enable
3877	 */
3878	for (int i = 0; i < adapter->num_queues; i++) {
3879		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
3880		E1000_WRITE_REG(hw, E1000_RDT(i),
3881		     adapter->num_rx_desc - 1);
3882	}
3883	return;
3884}
3885
3886/*********************************************************************
3887 *
3888 *  Free receive rings.
3889 *
3890 **********************************************************************/
3891static void
3892igb_free_receive_structures(struct adapter *adapter)
3893{
3894	struct rx_ring *rxr = adapter->rx_rings;
3895
3896	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3897		struct lro_ctrl	*lro = &rxr->lro;
3898		igb_free_receive_buffers(rxr);
3899		tcp_lro_free(lro);
3900		igb_dma_free(adapter, &rxr->rxdma);
3901	}
3902
3903	free(adapter->rx_rings, M_DEVBUF);
3904}
3905
3906/*********************************************************************
3907 *
3908 *  Free receive ring data structures.
3909 *
3910 **********************************************************************/
3911static void
3912igb_free_receive_buffers(struct rx_ring *rxr)
3913{
3914	struct adapter		*adapter = rxr->adapter;
3915	struct igb_rx_buf	*rxbuf;
3916	int i;
3917
3918	INIT_DEBUGOUT("free_receive_structures: begin");
3919
3920	if (rxr->rx_hspare_map != NULL) {
3921		bus_dmamap_destroy(rxr->rx_htag, rxr->rx_hspare_map);
3922		rxr->rx_hspare_map = NULL;
3923	}
3924
3925	if (rxr->rx_hspare_map != NULL) {
3926		bus_dmamap_destroy(rxr->rx_ptag, rxr->rx_pspare_map);
3927		rxr->rx_pspare_map = NULL;
3928	}
3929
3930	/* Cleanup any existing buffers */
3931	if (rxr->rx_buffers != NULL) {
3932		for (i = 0; i < adapter->num_rx_desc; i++) {
3933			rxbuf = &rxr->rx_buffers[i];
3934			if (rxbuf->m_head != NULL) {
3935				bus_dmamap_sync(rxr->rx_htag, rxbuf->head_map,
3936				    BUS_DMASYNC_POSTREAD);
3937				bus_dmamap_unload(rxr->rx_htag,
3938				    rxbuf->head_map);
3939				rxbuf->m_head->m_flags |= M_PKTHDR;
3940				m_freem(rxbuf->m_head);
3941			}
3942			if (rxbuf->m_pack != NULL) {
3943				bus_dmamap_sync(rxr->rx_ptag, rxbuf->pack_map,
3944				    BUS_DMASYNC_POSTREAD);
3945				bus_dmamap_unload(rxr->rx_ptag,
3946				    rxbuf->pack_map);
3947				rxbuf->m_pack->m_flags |= M_PKTHDR;
3948				m_freem(rxbuf->m_pack);
3949			}
3950			rxbuf->m_head = NULL;
3951			rxbuf->m_pack = NULL;
3952			if (rxbuf->head_map != NULL) {
3953				bus_dmamap_destroy(rxr->rx_htag,
3954				    rxbuf->head_map);
3955				rxbuf->head_map = NULL;
3956			}
3957			if (rxbuf->pack_map != NULL) {
3958				bus_dmamap_destroy(rxr->rx_ptag,
3959				    rxbuf->pack_map);
3960				rxbuf->pack_map = NULL;
3961			}
3962		}
3963		if (rxr->rx_buffers != NULL) {
3964			free(rxr->rx_buffers, M_DEVBUF);
3965			rxr->rx_buffers = NULL;
3966		}
3967	}
3968
3969	if (rxr->rx_htag != NULL) {
3970		bus_dma_tag_destroy(rxr->rx_htag);
3971		rxr->rx_htag = NULL;
3972	}
3973	if (rxr->rx_ptag != NULL) {
3974		bus_dma_tag_destroy(rxr->rx_ptag);
3975		rxr->rx_ptag = NULL;
3976	}
3977}
3978
3979static __inline void
3980igb_rx_discard(struct rx_ring *rxr, union e1000_adv_rx_desc *cur, int i)
3981{
3982
3983	if (rxr->fmp != NULL) {
3984		rxr->fmp->m_flags |= M_PKTHDR;
3985		m_freem(rxr->fmp);
3986		rxr->fmp = NULL;
3987		rxr->lmp = NULL;
3988	}
3989}
3990
3991static __inline void
3992igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
3993{
3994
3995	/*
3996	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
3997	 * should be computed by hardware. Also it should not have VLAN tag in
3998	 * ethernet header.
3999	 */
4000	if (rxr->lro_enabled &&
4001	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4002	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4003	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4004	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4005	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4006	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4007		/*
4008		 * Send to the stack if:
4009		 **  - LRO not enabled, or
4010		 **  - no LRO resources, or
4011		 **  - lro enqueue fails
4012		 */
4013		if (rxr->lro.lro_cnt != 0)
4014			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4015				return;
4016	}
4017	(*ifp->if_input)(ifp, m);
4018}
4019
4020/*********************************************************************
4021 *
4022 *  This routine executes in interrupt context. It replenishes
4023 *  the mbufs in the descriptor and sends data which has been
4024 *  dma'ed into host memory to upper layer.
4025 *
4026 *  We loop at most count times if count is > 0, or until done if
4027 *  count < 0.
4028 *
4029 *  Return TRUE if more to clean, FALSE otherwise
4030 *********************************************************************/
4031static bool
4032igb_rxeof(struct rx_ring *rxr, int count)
4033{
4034	struct adapter		*adapter = rxr->adapter;
4035	struct ifnet		*ifp = adapter->ifp;
4036	struct lro_ctrl		*lro = &rxr->lro;
4037	struct lro_entry	*queued;
4038	int			i, prog = 0;
4039	u32			ptype, staterr = 0;
4040	union e1000_adv_rx_desc	*cur;
4041
4042	IGB_RX_LOCK(rxr);
4043
4044	/* Main clean loop */
4045	for (i = rxr->next_to_check; count > 0; prog++) {
4046		struct mbuf *sendmp, *mh, *mp;
4047		u16 hlen, plen, hdr, vtag;
4048		bool eop = FALSE;
4049		u8 dopayload;
4050
4051		/* Sync the ring. */
4052		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4053		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4054		cur = &rxr->rx_base[i];
4055		staterr = le32toh(cur->wb.upper.status_error);
4056		if ((staterr & E1000_RXD_STAT_DD) == 0)
4057			break;
4058		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4059			break;
4060		count--;
4061		sendmp = mh = mp = NULL;
4062		cur->wb.upper.status_error = 0;
4063		plen = le16toh(cur->wb.upper.length);
4064		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4065		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4066		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4067
4068		/* Make sure all segments of a bad packet are discarded */
4069		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4070		    (rxr->discard)) {
4071			ifp->if_ierrors++;
4072			++rxr->rx_discarded;
4073			if (!eop) /* Catch subsequent segs */
4074				rxr->discard = TRUE;
4075			else
4076				rxr->discard = FALSE;
4077			igb_rx_discard(rxr, cur, i);
4078			goto next_desc;
4079		}
4080
4081		/*
4082		** The way the hardware is configured to
4083		** split, it will ONLY use the header buffer
4084		** when header split is enabled, otherwise we
4085		** get normal behavior, ie, both header and
4086		** payload are DMA'd into the payload buffer.
4087		**
4088		** The fmp test is to catch the case where a
4089		** packet spans multiple descriptors, in that
4090		** case only the first header is valid.
4091		*/
4092		if (rxr->hdr_split && rxr->fmp == NULL) {
4093			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4094			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4095			if (hlen > IGB_HDR_BUF)
4096				hlen = IGB_HDR_BUF;
4097			/* Handle the header mbuf */
4098			mh = rxr->rx_buffers[i].m_head;
4099			mh->m_len = hlen;
4100			dopayload = IGB_CLEAN_HEADER;
4101			/*
4102			** Get the payload length, this
4103			** could be zero if its a small
4104			** packet.
4105			*/
4106			if (plen > 0) {
4107				mp = rxr->rx_buffers[i].m_pack;
4108				mp->m_len = plen;
4109				mh->m_next = mp;
4110				dopayload = IGB_CLEAN_BOTH;
4111				rxr->rx_split_packets++;
4112			}
4113		} else {
4114			/*
4115			** Either no header split, or a
4116			** secondary piece of a fragmented
4117			** split packet.
4118			*/
4119			mh = rxr->rx_buffers[i].m_pack;
4120			mh->m_len = plen;
4121			dopayload = IGB_CLEAN_PAYLOAD;
4122		}
4123
4124		/*
4125		** get_buf will overwrite the writeback
4126		** descriptor so save the VLAN tag now.
4127		*/
4128		vtag = le16toh(cur->wb.upper.vlan);
4129		if (igb_get_buf(rxr, i, dopayload) != 0) {
4130			ifp->if_iqdrops++;
4131			/*
4132			 * We've dropped a frame due to lack of resources
4133			 * so we should drop entire multi-segmented
4134			 * frames until we encounter EOP.
4135			 */
4136			if ((staterr & E1000_RXD_STAT_EOP) != 0)
4137				rxr->discard = TRUE;
4138			igb_rx_discard(rxr, cur, i);
4139			goto next_desc;
4140		}
4141
4142		/* Initial frame - setup */
4143		if (rxr->fmp == NULL) {
4144			mh->m_pkthdr.len = mh->m_len;
4145			/* Store the first mbuf */
4146			rxr->fmp = mh;
4147			rxr->lmp = mh;
4148			if (mp != NULL) {
4149				/* Add payload if split */
4150				mh->m_pkthdr.len += mp->m_len;
4151				rxr->lmp = mh->m_next;
4152			}
4153		} else {
4154			/* Chain mbuf's together */
4155			rxr->lmp->m_next = mh;
4156			rxr->lmp = rxr->lmp->m_next;
4157			rxr->fmp->m_pkthdr.len += mh->m_len;
4158		}
4159
4160		if (eop) {
4161			rxr->fmp->m_pkthdr.rcvif = ifp;
4162			ifp->if_ipackets++;
4163			rxr->rx_packets++;
4164			/* capture data for AIM */
4165			rxr->packets++;
4166			rxr->bytes += rxr->fmp->m_pkthdr.len;
4167			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4168
4169			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4170				igb_rx_checksum(staterr, rxr->fmp, ptype);
4171			/* XXX igb(4) always strips VLAN. */
4172			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4173			    (staterr & E1000_RXD_STAT_VP) != 0) {
4174				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4175				rxr->fmp->m_flags |= M_VLANTAG;
4176			}
4177#if __FreeBSD_version >= 800000
4178			rxr->fmp->m_pkthdr.flowid = curcpu;
4179			rxr->fmp->m_flags |= M_FLOWID;
4180#endif
4181			sendmp = rxr->fmp;
4182			/* Make sure to set M_PKTHDR. */
4183			sendmp->m_flags |= M_PKTHDR;
4184			rxr->fmp = NULL;
4185			rxr->lmp = NULL;
4186		}
4187
4188next_desc:
4189		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4190		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4191
4192		rxr->last_cleaned = i; /* For updating tail */
4193
4194		/* Advance our pointers to the next descriptor. */
4195		if (++i == adapter->num_rx_desc)
4196			i = 0;
4197
4198		/*
4199		** Note that we hold the RX lock thru
4200		** the following call so this ring's
4201		** next_to_check is not gonna change.
4202		*/
4203		if (sendmp != NULL)
4204			igb_rx_input(rxr, ifp, sendmp, ptype);
4205	}
4206
4207	if (prog == 0) {
4208		IGB_RX_UNLOCK(rxr);
4209		return (FALSE);
4210	}
4211
4212	rxr->next_to_check = i;
4213
4214	/* Advance the E1000's Receive Queue "Tail Pointer". */
4215	E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4216
4217	/*
4218	 * Flush any outstanding LRO work
4219	 */
4220	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4221		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4222		tcp_lro_flush(lro, queued);
4223	}
4224
4225	IGB_RX_UNLOCK(rxr);
4226
4227	/*
4228	** We still have cleaning to do?
4229	** Schedule another interrupt if so.
4230	*/
4231	if ((staterr & E1000_RXD_STAT_DD) != 0)
4232		return (TRUE);
4233
4234	return (FALSE);
4235}
4236
4237/*********************************************************************
4238 *
4239 *  Verify that the hardware indicated that the checksum is valid.
4240 *  Inform the stack about the status of checksum so that stack
4241 *  doesn't spend time verifying the checksum.
4242 *
4243 *********************************************************************/
4244static void
4245igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4246{
4247	u16 status = (u16)staterr;
4248	u8  errors = (u8) (staterr >> 24);
4249	int sctp;
4250
4251	/* Ignore Checksum bit is set */
4252	if (status & E1000_RXD_STAT_IXSM) {
4253		mp->m_pkthdr.csum_flags = 0;
4254		return;
4255	}
4256
4257	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4258	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4259		sctp = 1;
4260	else
4261		sctp = 0;
4262	if (status & E1000_RXD_STAT_IPCS) {
4263		/* Did it pass? */
4264		if (!(errors & E1000_RXD_ERR_IPE)) {
4265			/* IP Checksum Good */
4266			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4267			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4268		} else
4269			mp->m_pkthdr.csum_flags = 0;
4270	}
4271
4272	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4273		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4274#if __FreeBSD_version >= 800000
4275		if (sctp) /* reassign */
4276			type = CSUM_SCTP_VALID;
4277#endif
4278		/* Did it pass? */
4279		if (!(errors & E1000_RXD_ERR_TCPE)) {
4280			mp->m_pkthdr.csum_flags |= type;
4281			if (sctp == 0)
4282				mp->m_pkthdr.csum_data = htons(0xffff);
4283		}
4284	}
4285	return;
4286}
4287
4288/*
4289 * This routine is run via an vlan
4290 * config EVENT
4291 */
4292static void
4293igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4294{
4295	struct adapter	*adapter = ifp->if_softc;
4296	u32		index, bit;
4297
4298	if (ifp->if_softc !=  arg)   /* Not our event */
4299		return;
4300
4301	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4302                return;
4303
4304	index = (vtag >> 5) & 0x7F;
4305	bit = vtag & 0x1F;
4306	igb_shadow_vfta[index] |= (1 << bit);
4307	++adapter->num_vlans;
4308	/* Re-init to load the changes */
4309	igb_init(adapter);
4310}
4311
4312/*
4313 * This routine is run via an vlan
4314 * unconfig EVENT
4315 */
4316static void
4317igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4318{
4319	struct adapter	*adapter = ifp->if_softc;
4320	u32		index, bit;
4321
4322	if (ifp->if_softc !=  arg)
4323		return;
4324
4325	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4326                return;
4327
4328	index = (vtag >> 5) & 0x7F;
4329	bit = vtag & 0x1F;
4330	igb_shadow_vfta[index] &= ~(1 << bit);
4331	--adapter->num_vlans;
4332	/* Re-init to load the changes */
4333	igb_init(adapter);
4334}
4335
4336static void
4337igb_setup_vlan_hw_support(struct adapter *adapter)
4338{
4339	struct e1000_hw *hw = &adapter->hw;
4340	u32             reg;
4341
4342	/*
4343	** We get here thru init_locked, meaning
4344	** a soft reset, this has already cleared
4345	** the VFTA and other state, so if there
4346	** have been no vlan's registered do nothing.
4347	*/
4348	if (adapter->num_vlans == 0)
4349                return;
4350
4351	/*
4352	** A soft reset zero's out the VFTA, so
4353	** we need to repopulate it now.
4354	*/
4355	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4356                if (igb_shadow_vfta[i] != 0)
4357			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4358                            i, igb_shadow_vfta[i]);
4359
4360	reg = E1000_READ_REG(hw, E1000_CTRL);
4361	reg |= E1000_CTRL_VME;
4362	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4363
4364	/* Enable the Filter Table */
4365	reg = E1000_READ_REG(hw, E1000_RCTL);
4366	reg &= ~E1000_RCTL_CFIEN;
4367	reg |= E1000_RCTL_VFE;
4368	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4369
4370	/* Update the frame size */
4371	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4372	    adapter->max_frame_size + VLAN_TAG_SIZE);
4373}
4374
4375static void
4376igb_enable_intr(struct adapter *adapter)
4377{
4378	/* With RSS set up what to auto clear */
4379	if (adapter->msix_mem) {
4380		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4381		    adapter->eims_mask);
4382		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4383		    adapter->eims_mask);
4384		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4385		    adapter->eims_mask);
4386		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4387		    E1000_IMS_LSC);
4388	} else {
4389		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4390		    IMS_ENABLE_MASK);
4391	}
4392	E1000_WRITE_FLUSH(&adapter->hw);
4393
4394	return;
4395}
4396
4397static void
4398igb_disable_intr(struct adapter *adapter)
4399{
4400	if (adapter->msix_mem) {
4401		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4402		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4403	}
4404	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4405	E1000_WRITE_FLUSH(&adapter->hw);
4406	return;
4407}
4408
4409/*
4410 * Bit of a misnomer, what this really means is
4411 * to enable OS management of the system... aka
4412 * to disable special hardware management features
4413 */
4414static void
4415igb_init_manageability(struct adapter *adapter)
4416{
4417	if (adapter->has_manage) {
4418		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4419		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4420
4421		/* disable hardware interception of ARP */
4422		manc &= ~(E1000_MANC_ARP_EN);
4423
4424                /* enable receiving management packets to the host */
4425		manc |= E1000_MANC_EN_MNG2HOST;
4426		manc2h |= 1 << 5;  /* Mng Port 623 */
4427		manc2h |= 1 << 6;  /* Mng Port 664 */
4428		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4429		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4430	}
4431}
4432
4433/*
4434 * Give control back to hardware management
4435 * controller if there is one.
4436 */
4437static void
4438igb_release_manageability(struct adapter *adapter)
4439{
4440	if (adapter->has_manage) {
4441		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4442
4443		/* re-enable hardware interception of ARP */
4444		manc |= E1000_MANC_ARP_EN;
4445		manc &= ~E1000_MANC_EN_MNG2HOST;
4446
4447		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4448	}
4449}
4450
4451/*
4452 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4453 * For ASF and Pass Through versions of f/w this means that
4454 * the driver is loaded.
4455 *
4456 */
4457static void
4458igb_get_hw_control(struct adapter *adapter)
4459{
4460	u32 ctrl_ext;
4461
4462	/* Let firmware know the driver has taken over */
4463	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4464	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4465	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4466}
4467
4468/*
4469 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4470 * For ASF and Pass Through versions of f/w this means that the
4471 * driver is no longer loaded.
4472 *
4473 */
4474static void
4475igb_release_hw_control(struct adapter *adapter)
4476{
4477	u32 ctrl_ext;
4478
4479	/* Let firmware taken over control of h/w */
4480	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4481	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4482	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4483}
4484
4485static int
4486igb_is_valid_ether_addr(uint8_t *addr)
4487{
4488	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4489
4490	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4491		return (FALSE);
4492	}
4493
4494	return (TRUE);
4495}
4496
4497
4498/*
4499 * Enable PCI Wake On Lan capability
4500 */
4501void
4502igb_enable_wakeup(device_t dev)
4503{
4504	u16     cap, status;
4505	u8      id;
4506
4507	/* First find the capabilities pointer*/
4508	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4509	/* Read the PM Capabilities */
4510	id = pci_read_config(dev, cap, 1);
4511	if (id != PCIY_PMG)     /* Something wrong */
4512		return;
4513	/* OK, we have the power capabilities, so
4514	   now get the status register */
4515	cap += PCIR_POWER_STATUS;
4516	status = pci_read_config(dev, cap, 2);
4517	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4518	pci_write_config(dev, cap, status, 2);
4519	return;
4520}
4521
4522
4523/**********************************************************************
4524 *
4525 *  Update the board statistics counters.
4526 *
4527 **********************************************************************/
4528static void
4529igb_update_stats_counters(struct adapter *adapter)
4530{
4531	struct ifnet   *ifp;
4532
4533	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4534	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4535		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4536		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4537	}
4538	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4539	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4540	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4541	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4542
4543	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4544	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4545	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4546	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4547	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4548	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4549	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4550	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4551	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4552	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4553	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4554	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4555	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4556	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4557	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4558	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4559	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4560	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4561	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4562	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4563
4564	/* For the 64-bit byte counters the low dword must be read first. */
4565	/* Both registers clear on the read of the high dword */
4566
4567	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4568	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4569
4570	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4571	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4572	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4573	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4574	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4575
4576	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4577	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4578
4579	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4580	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4581	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4582	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4583	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4584	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4585	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4586	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4587	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4588	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4589
4590	adapter->stats.algnerrc +=
4591		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4592	adapter->stats.rxerrc +=
4593		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4594	adapter->stats.tncrs +=
4595		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4596	adapter->stats.cexterr +=
4597		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4598	adapter->stats.tsctc +=
4599		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4600	adapter->stats.tsctfc +=
4601		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4602	ifp = adapter->ifp;
4603
4604	ifp->if_collisions = adapter->stats.colc;
4605
4606	/* Rx Errors */
4607	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4608	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4609	    adapter->stats.ruc + adapter->stats.roc +
4610	    adapter->stats.mpc + adapter->stats.cexterr;
4611
4612	/* Tx Errors */
4613	ifp->if_oerrors = adapter->stats.ecol +
4614	    adapter->stats.latecol + adapter->watchdog_events;
4615}
4616
4617
4618/**********************************************************************
4619 *
4620 *  This routine is called only when igb_display_debug_stats is enabled.
4621 *  This routine provides a way to take a look at important statistics
4622 *  maintained by the driver and hardware.
4623 *
4624 **********************************************************************/
4625static void
4626igb_print_debug_info(struct adapter *adapter)
4627{
4628	device_t dev = adapter->dev;
4629	struct igb_queue *que = adapter->queues;
4630	struct rx_ring *rxr = adapter->rx_rings;
4631	struct tx_ring *txr = adapter->tx_rings;
4632	uint8_t *hw_addr = adapter->hw.hw_addr;
4633
4634	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4635	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4636	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4637	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4638
4639#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4640	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4641	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4642	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4643#endif
4644
4645	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4646	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4647	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4648	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4649	    adapter->hw.fc.high_water,
4650	    adapter->hw.fc.low_water);
4651
4652	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
4653		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d  ", i,
4654		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4655		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4656		device_printf(dev, "rdh = %d, rdt = %d\n",
4657		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4658		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4659		device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4660		    txr->me, (long long)txr->no_desc_avail);
4661		device_printf(dev, "TX(%d) Packets sent = %lld\n",
4662		    txr->me, (long long)txr->tx_packets);
4663		device_printf(dev, "RX(%d) Packets received = %lld  ",
4664		    rxr->me, (long long)rxr->rx_packets);
4665	}
4666
4667	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4668		struct lro_ctrl *lro = &rxr->lro;
4669		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4670		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4671		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4672		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4673		    (long long)rxr->rx_packets);
4674		device_printf(dev, " Split Packets = %lld ",
4675		    (long long)rxr->rx_split_packets);
4676		device_printf(dev, " Byte count = %lld\n",
4677		    (long long)rxr->rx_bytes);
4678		device_printf(dev,"RX(%d) LRO Queued= %d  ",
4679		    i, lro->lro_queued);
4680		device_printf(dev,"LRO Flushed= %d\n",lro->lro_flushed);
4681	}
4682
4683	for (int i = 0; i < adapter->num_queues; i++, que++)
4684		device_printf(dev,"QUE(%d) IRQs = %llx\n",
4685		    i, (long long)que->irqs);
4686
4687	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4688	device_printf(dev, "Mbuf defrag failed = %ld\n",
4689	    adapter->mbuf_defrag_failed);
4690	device_printf(dev, "Std mbuf header failed = %ld\n",
4691	    adapter->mbuf_header_failed);
4692	device_printf(dev, "Std mbuf packet failed = %ld\n",
4693	    adapter->mbuf_packet_failed);
4694	device_printf(dev, "Driver dropped packets = %ld\n",
4695	    adapter->dropped_pkts);
4696	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4697		adapter->no_tx_dma_setup);
4698}
4699
4700static void
4701igb_print_hw_stats(struct adapter *adapter)
4702{
4703	device_t dev = adapter->dev;
4704
4705	device_printf(dev, "Excessive collisions = %lld\n",
4706	    (long long)adapter->stats.ecol);
4707#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4708	device_printf(dev, "Symbol errors = %lld\n",
4709	    (long long)adapter->stats.symerrs);
4710#endif
4711	device_printf(dev, "Sequence errors = %lld\n",
4712	    (long long)adapter->stats.sec);
4713	device_printf(dev, "Defer count = %lld\n",
4714	    (long long)adapter->stats.dc);
4715	device_printf(dev, "Missed Packets = %lld\n",
4716	    (long long)adapter->stats.mpc);
4717	device_printf(dev, "Receive No Buffers = %lld\n",
4718	    (long long)adapter->stats.rnbc);
4719	/* RLEC is inaccurate on some hardware, calculate our own. */
4720	device_printf(dev, "Receive Length Errors = %lld\n",
4721	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4722	device_printf(dev, "Receive errors = %lld\n",
4723	    (long long)adapter->stats.rxerrc);
4724	device_printf(dev, "Crc errors = %lld\n",
4725	    (long long)adapter->stats.crcerrs);
4726	device_printf(dev, "Alignment errors = %lld\n",
4727	    (long long)adapter->stats.algnerrc);
4728	/* On 82575 these are collision counts */
4729	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4730	    (long long)adapter->stats.cexterr);
4731	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4732	device_printf(dev, "watchdog timeouts = %ld\n",
4733	    adapter->watchdog_events);
4734	device_printf(dev, "XON Rcvd = %lld\n",
4735	    (long long)adapter->stats.xonrxc);
4736	device_printf(dev, "XON Xmtd = %lld\n",
4737	    (long long)adapter->stats.xontxc);
4738	device_printf(dev, "XOFF Rcvd = %lld\n",
4739	    (long long)adapter->stats.xoffrxc);
4740	device_printf(dev, "XOFF Xmtd = %lld\n",
4741	    (long long)adapter->stats.xofftxc);
4742	device_printf(dev, "Good Packets Rcvd = %lld\n",
4743	    (long long)adapter->stats.gprc);
4744	device_printf(dev, "Good Packets Xmtd = %lld\n",
4745	    (long long)adapter->stats.gptc);
4746	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4747	    (long long)adapter->stats.tsctc);
4748	device_printf(dev, "TSO Contexts Failed = %lld\n",
4749	    (long long)adapter->stats.tsctfc);
4750}
4751
4752/**********************************************************************
4753 *
4754 *  This routine provides a way to dump out the adapter eeprom,
4755 *  often a useful debug/service tool. This only dumps the first
4756 *  32 words, stuff that matters is in that extent.
4757 *
4758 **********************************************************************/
4759static void
4760igb_print_nvm_info(struct adapter *adapter)
4761{
4762	u16	eeprom_data;
4763	int	i, j, row = 0;
4764
4765	/* Its a bit crude, but it gets the job done */
4766	printf("\nInterface EEPROM Dump:\n");
4767	printf("Offset\n0x0000  ");
4768	for (i = 0, j = 0; i < 32; i++, j++) {
4769		if (j == 8) { /* Make the offset block */
4770			j = 0; ++row;
4771			printf("\n0x00%x0  ",row);
4772		}
4773		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4774		printf("%04x ", eeprom_data);
4775	}
4776	printf("\n");
4777}
4778
4779static int
4780igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4781{
4782	struct adapter *adapter;
4783	int error;
4784	int result;
4785
4786	result = -1;
4787	error = sysctl_handle_int(oidp, &result, 0, req);
4788
4789	if (error || !req->newptr)
4790		return (error);
4791
4792	if (result == 1) {
4793		adapter = (struct adapter *)arg1;
4794		igb_print_debug_info(adapter);
4795	}
4796	/*
4797	 * This value will cause a hex dump of the
4798	 * first 32 16-bit words of the EEPROM to
4799	 * the screen.
4800	 */
4801	if (result == 2) {
4802		adapter = (struct adapter *)arg1;
4803		igb_print_nvm_info(adapter);
4804        }
4805
4806	return (error);
4807}
4808
4809
4810static int
4811igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4812{
4813	struct adapter *adapter;
4814	int error;
4815	int result;
4816
4817	result = -1;
4818	error = sysctl_handle_int(oidp, &result, 0, req);
4819
4820	if (error || !req->newptr)
4821		return (error);
4822
4823	if (result == 1) {
4824		adapter = (struct adapter *)arg1;
4825		igb_print_hw_stats(adapter);
4826	}
4827
4828	return (error);
4829}
4830
4831static void
4832igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4833	const char *description, int *limit, int value)
4834{
4835	*limit = value;
4836	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4837	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4838	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4839}
4840