if_igb.c revision 176685
1/******************************************************************************
2
3  Copyright (c) 2001-2008, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/igb/if_igb.c 176685 2008-03-01 08:23:35Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#endif
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/bus.h>
42#include <sys/endian.h>
43#include <sys/kernel.h>
44#include <sys/kthread.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/module.h>
48#include <sys/rman.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/sysctl.h>
52#include <sys/taskqueue.h>
53#include <sys/pcpu.h>
54#include <machine/bus.h>
55#include <machine/resource.h>
56
57#include <net/bpf.h>
58#include <net/ethernet.h>
59#include <net/if.h>
60#include <net/if_arp.h>
61#include <net/if_dl.h>
62#include <net/if_media.h>
63
64#include <net/if_types.h>
65#include <net/if_vlan_var.h>
66
67#include <netinet/in_systm.h>
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <netinet/ip.h>
71#include <netinet/ip6.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <machine/in_cksum.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pcireg.h>
78
79#include "e1000_api.h"
80#include "e1000_82575.h"
81#include "if_igb.h"
82
83/*********************************************************************
84 *  Set this to one to display debug statistics
85 *********************************************************************/
86int	igb_display_debug_stats = 0;
87
88/*********************************************************************
89 *  Driver version:
90 *********************************************************************/
91char igb_driver_version[] = "1.1.4";
92
93
94/*********************************************************************
95 *  PCI Device ID Table
96 *
97 *  Used by probe to select devices to load on
98 *  Last field stores an index into e1000_strings
99 *  Last entry must be all 0s
100 *
101 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102 *********************************************************************/
103
104static igb_vendor_info_t igb_vendor_info_array[] =
105{
106	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
107	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
108						PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
110						PCI_ANY_ID, PCI_ANY_ID, 0},
111	/* required last entry */
112	{ 0, 0, 0, 0, 0}
113};
114
115/*********************************************************************
116 *  Table of branding strings for all supported NICs.
117 *********************************************************************/
118
119static char *igb_strings[] = {
120	"Intel(R) PRO/1000 Network Connection"
121};
122
123/*********************************************************************
124 *  Function prototypes
125 *********************************************************************/
126static int	igb_probe(device_t);
127static int	igb_attach(device_t);
128static int	igb_detach(device_t);
129static int	igb_shutdown(device_t);
130static int	igb_suspend(device_t);
131static int	igb_resume(device_t);
132static void	igb_start(struct ifnet *);
133static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
134static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
135static void	igb_watchdog(struct adapter *);
136static void	igb_init(void *);
137static void	igb_init_locked(struct adapter *);
138static void	igb_stop(void *);
139static void	igb_media_status(struct ifnet *, struct ifmediareq *);
140static int	igb_media_change(struct ifnet *);
141static void	igb_identify_hardware(struct adapter *);
142static int	igb_allocate_pci_resources(struct adapter *);
143static int	igb_allocate_msix(struct adapter *);
144static int	igb_allocate_legacy(struct adapter *);
145static int	igb_setup_msix(struct adapter *);
146static void	igb_free_pci_resources(struct adapter *);
147static void	igb_local_timer(void *);
148static int	igb_hardware_init(struct adapter *);
149static void	igb_setup_interface(device_t, struct adapter *);
150static int	igb_allocate_queues(struct adapter *);
151static void	igb_configure_queues(struct adapter *);
152
153static int	igb_allocate_transmit_buffers(struct tx_ring *);
154static void	igb_setup_transmit_structures(struct adapter *);
155static void	igb_setup_transmit_ring(struct tx_ring *);
156static void	igb_initialize_transmit_units(struct adapter *);
157static void	igb_free_transmit_structures(struct adapter *);
158static void	igb_free_transmit_buffers(struct tx_ring *);
159
160static int	igb_allocate_receive_buffers(struct rx_ring *);
161static int	igb_setup_receive_structures(struct adapter *);
162static int	igb_setup_receive_ring(struct rx_ring *);
163static void	igb_initialize_receive_units(struct adapter *);
164static void	igb_free_receive_structures(struct adapter *);
165static void	igb_free_receive_buffers(struct rx_ring *);
166
167static void	igb_enable_intr(struct adapter *);
168static void	igb_disable_intr(struct adapter *);
169static void	igb_update_stats_counters(struct adapter *);
170static bool	igb_txeof(struct tx_ring *);
171static bool	igb_rxeof(struct rx_ring *, int);
172#ifndef __NO_STRICT_ALIGNMENT
173static int	igb_fixup_rx(struct rx_ring *);
174#endif
175static void	igb_rx_checksum(u32, struct mbuf *);
176static bool	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
177static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
178static void	igb_set_promisc(struct adapter *);
179static void	igb_disable_promisc(struct adapter *);
180static void	igb_set_multi(struct adapter *);
181static void	igb_print_hw_stats(struct adapter *);
182static void	igb_update_link_status(struct adapter *);
183static int	igb_get_buf(struct rx_ring *, int);
184static void	igb_enable_hw_vlans(struct adapter *);
185static int	igb_xmit(struct tx_ring *, struct mbuf **);
186static int	igb_dma_malloc(struct adapter *, bus_size_t,
187		    struct igb_dma_alloc *, int);
188static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
189static void	igb_print_debug_info(struct adapter *);
190static void	igb_print_nvm_info(struct adapter *);
191static int 	igb_is_valid_ether_addr(u8 *);
192static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
193static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
194static int	igb_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
195static void	igb_add_int_delay_sysctl(struct adapter *, const char *,
196		    const char *, struct igb_int_delay_info *, int, int);
197/* Management and WOL Support */
198static void	igb_init_manageability(struct adapter *);
199static void	igb_release_manageability(struct adapter *);
200static void     igb_get_hw_control(struct adapter *);
201static void     igb_release_hw_control(struct adapter *);
202static void     igb_enable_wakeup(device_t);
203
204
205static int	igb_irq_fast(void *);
206static void	igb_add_rx_process_limit(struct adapter *, const char *,
207		    const char *, int *, int);
208static void	igb_handle_rxtx(void *context, int pending);
209static void	igb_handle_tx(void *context, int pending);
210static void	igb_handle_rx(void *context, int pending);
211static void	igb_handle_link(void *context, int pending);
212
213/* These are MSIX only irq handlers */
214static void	igb_msix_rx(void *);
215static void	igb_msix_tx(void *);
216static void	igb_msix_link(void *);
217
218#ifdef DEVICE_POLLING
219static poll_handler_t igb_poll;
220#endif
221
222/*********************************************************************
223 *  FreeBSD Device Interface Entry Points
224 *********************************************************************/
225
226static device_method_t igb_methods[] = {
227	/* Device interface */
228	DEVMETHOD(device_probe, igb_probe),
229	DEVMETHOD(device_attach, igb_attach),
230	DEVMETHOD(device_detach, igb_detach),
231	DEVMETHOD(device_shutdown, igb_shutdown),
232	DEVMETHOD(device_suspend, igb_suspend),
233	DEVMETHOD(device_resume, igb_resume),
234	{0, 0}
235};
236
237static driver_t igb_driver = {
238	"igb", igb_methods, sizeof(struct adapter),
239};
240
241static devclass_t igb_devclass;
242DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
243MODULE_DEPEND(igb, pci, 1, 1, 1);
244MODULE_DEPEND(igb, ether, 1, 1, 1);
245
246/*********************************************************************
247 *  Tunable default values.
248 *********************************************************************/
249
250#define IGB_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
251#define IGB_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
252#define M_TSO_LEN			66
253
254/* Allow common code without TSO */
255#ifndef CSUM_TSO
256#define CSUM_TSO	0
257#endif
258
259static int igb_tx_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_TIDV);
260static int igb_rx_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_RDTR);
261static int igb_tx_abs_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_TADV);
262static int igb_rx_abs_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_RADV);
263static int igb_rxd = IGB_DEFAULT_RXD;
264static int igb_txd = IGB_DEFAULT_TXD;
265static int igb_smart_pwr_down = FALSE;
266TUNABLE_INT("hw.igb.tx_int_delay", &igb_tx_int_delay_dflt);
267TUNABLE_INT("hw.igb.rx_int_delay", &igb_rx_int_delay_dflt);
268TUNABLE_INT("hw.igb.tx_abs_int_delay", &igb_tx_abs_int_delay_dflt);
269TUNABLE_INT("hw.igb.rx_abs_int_delay", &igb_rx_abs_int_delay_dflt);
270TUNABLE_INT("hw.igb.rxd", &igb_rxd);
271TUNABLE_INT("hw.igb.txd", &igb_txd);
272TUNABLE_INT("hw.igb.smart_pwr_down", &igb_smart_pwr_down);
273
274/* These auto configure if set to 0, based on number of cpus */
275extern int mp_ncpus;
276static int igb_tx_queues = 1;
277static int igb_rx_queues = 1;
278TUNABLE_INT("hw.igb.tx_queues", &igb_tx_queues);
279TUNABLE_INT("hw.igb.rx_queues", &igb_rx_queues);
280
281/* How many packets rxeof tries to clean at a time */
282static int igb_rx_process_limit = 100;
283TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
284
285/*********************************************************************
286 *  Device identification routine
287 *
288 *  igb_probe determines if the driver should be loaded on
289 *  adapter based on PCI vendor/device id of the adapter.
290 *
291 *  return BUS_PROBE_DEFAULT on success, positive on failure
292 *********************************************************************/
293
294static int
295igb_probe(device_t dev)
296{
297	char		adapter_name[60];
298	uint16_t	pci_vendor_id = 0;
299	uint16_t	pci_device_id = 0;
300	uint16_t	pci_subvendor_id = 0;
301	uint16_t	pci_subdevice_id = 0;
302	igb_vendor_info_t *ent;
303
304	INIT_DEBUGOUT("igb_probe: begin");
305
306	pci_vendor_id = pci_get_vendor(dev);
307	if (pci_vendor_id != IGB_VENDOR_ID)
308		return (ENXIO);
309
310	pci_device_id = pci_get_device(dev);
311	pci_subvendor_id = pci_get_subvendor(dev);
312	pci_subdevice_id = pci_get_subdevice(dev);
313
314	ent = igb_vendor_info_array;
315	while (ent->vendor_id != 0) {
316		if ((pci_vendor_id == ent->vendor_id) &&
317		    (pci_device_id == ent->device_id) &&
318
319		    ((pci_subvendor_id == ent->subvendor_id) ||
320		    (ent->subvendor_id == PCI_ANY_ID)) &&
321
322		    ((pci_subdevice_id == ent->subdevice_id) ||
323		    (ent->subdevice_id == PCI_ANY_ID))) {
324			sprintf(adapter_name, "%s %s",
325				igb_strings[ent->index],
326				igb_driver_version);
327			device_set_desc_copy(dev, adapter_name);
328			return (BUS_PROBE_DEFAULT);
329		}
330		ent++;
331	}
332
333	return (ENXIO);
334}
335
336/*********************************************************************
337 *  Device initialization routine
338 *
339 *  The attach entry point is called when the driver is being loaded.
340 *  This routine identifies the type of hardware, allocates all resources
341 *  and initializes the hardware.
342 *
343 *  return 0 on success, positive on failure
344 *********************************************************************/
345
346static int
347igb_attach(device_t dev)
348{
349	struct adapter	*adapter;
350	int		error = 0;
351	u16		eeprom_data;
352
353	INIT_DEBUGOUT("igb_attach: begin");
354
355	adapter = device_get_softc(dev);
356	adapter->dev = adapter->osdep.dev = dev;
357	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
358
359	/* SYSCTL stuff */
360	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
361	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
362	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
363	    igb_sysctl_debug_info, "I", "Debug Information");
364
365	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
366	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
367	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
368	    igb_sysctl_stats, "I", "Statistics");
369
370	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
371
372	/* Determine hardware and mac info */
373	igb_identify_hardware(adapter);
374
375	/* Setup PCI resources */
376	if (igb_allocate_pci_resources(adapter)) {
377		device_printf(dev, "Allocation of PCI resources failed\n");
378		error = ENXIO;
379		goto err_pci;
380	}
381
382	/* Do Shared Code initialization */
383	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
384		device_printf(dev, "Setup of Shared code failed\n");
385		error = ENXIO;
386		goto err_pci;
387	}
388
389	e1000_get_bus_info(&adapter->hw);
390
391	/* Set up some sysctls for the tunable interrupt delays */
392	igb_add_int_delay_sysctl(adapter, "rx_int_delay",
393	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
394	    E1000_REGISTER(&adapter->hw, E1000_RDTR), igb_rx_int_delay_dflt);
395	igb_add_int_delay_sysctl(adapter, "tx_int_delay",
396	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
397	    E1000_REGISTER(&adapter->hw, E1000_TIDV), igb_tx_int_delay_dflt);
398	igb_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
399	    "receive interrupt delay limit in usecs",
400	    &adapter->rx_abs_int_delay,
401	    E1000_REGISTER(&adapter->hw, E1000_RADV),
402	    igb_rx_abs_int_delay_dflt);
403	igb_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
404	    "transmit interrupt delay limit in usecs",
405	    &adapter->tx_abs_int_delay,
406	    E1000_REGISTER(&adapter->hw, E1000_TADV),
407	    igb_tx_abs_int_delay_dflt);
408
409	/* Sysctls for limiting the amount of work done in the taskqueue */
410	igb_add_rx_process_limit(adapter, "rx_processing_limit",
411	    "max number of rx packets to process", &adapter->rx_process_limit,
412	    igb_rx_process_limit);
413
414	/*
415	 * Validate number of transmit and receive descriptors. It
416	 * must not exceed hardware maximum, and must be multiple
417	 * of E1000_DBA_ALIGN.
418	 */
419	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
420	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
421		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
422		    IGB_DEFAULT_TXD, igb_txd);
423		adapter->num_tx_desc = IGB_DEFAULT_TXD;
424	} else
425		adapter->num_tx_desc = igb_txd;
426	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
427	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
428		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
429		    IGB_DEFAULT_RXD, igb_rxd);
430		adapter->num_rx_desc = IGB_DEFAULT_RXD;
431	} else
432		adapter->num_rx_desc = igb_rxd;
433
434	adapter->hw.mac.autoneg = DO_AUTO_NEG;
435	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
436	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
437	adapter->rx_buffer_len = 2048;
438
439	/* Copper options */
440	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
441		adapter->hw.phy.mdix = AUTO_ALL_MODES;
442		adapter->hw.phy.disable_polarity_correction = FALSE;
443		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
444	}
445
446	/*
447	 * Set the frame limits assuming
448	 * standard ethernet sized frames.
449	 */
450	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
451	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
452
453	/*
454	 * This controls when hardware reports transmit completion
455	 * status.
456	 */
457	adapter->hw.mac.report_tx_early = 1;
458
459	/*
460	** Allocate and Setup Queues
461	*/
462	if (igb_allocate_queues(adapter)) {
463		error = ENOMEM;
464		goto err_hw_init;
465	}
466
467	/* Make sure we have a good EEPROM before we read from it */
468	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
469		/*
470		** Some PCI-E parts fail the first check due to
471		** the link being in sleep state, call it again,
472		** if it fails a second time its a real issue.
473		*/
474		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
475			device_printf(dev,
476			    "The EEPROM Checksum Is Not Valid\n");
477			error = EIO;
478			goto err_late;
479		}
480	}
481
482	/* Initialize the hardware */
483	if (igb_hardware_init(adapter)) {
484		device_printf(dev, "Unable to initialize the hardware\n");
485		error = EIO;
486		goto err_late;
487	}
488
489	/* Copy the permanent MAC address out of the EEPROM */
490	if (e1000_read_mac_addr(&adapter->hw) < 0) {
491		device_printf(dev, "EEPROM read error while reading MAC"
492		    " address\n");
493		error = EIO;
494		goto err_late;
495	}
496
497	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
498		device_printf(dev, "Invalid MAC address\n");
499		error = EIO;
500		goto err_late;
501	}
502
503	/*
504	** Configure Interrupts
505	*/
506	if (adapter->msix > 1) /* MSIX */
507		error = igb_allocate_msix(adapter);
508	else /* MSI or Legacy */
509		error = igb_allocate_legacy(adapter);
510	if (error)
511		goto err_late;
512
513	/* Setup OS specific network interface */
514	igb_setup_interface(dev, adapter);
515
516	/* Initialize statistics */
517	igb_update_stats_counters(adapter);
518
519	adapter->hw.mac.get_link_status = 1;
520	igb_update_link_status(adapter);
521
522	/* Indicate SOL/IDER usage */
523	if (e1000_check_reset_block(&adapter->hw))
524		device_printf(dev,
525		    "PHY reset is blocked due to SOL/IDER session.\n");
526
527	/* Determine if we have to control management hardware */
528	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
529
530	/*
531	 * Setup Wake-on-Lan
532	 */
533	/* APME bit in EEPROM is mapped to WUC.APME */
534	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
535	if (eeprom_data)
536		adapter->wol = E1000_WUFC_MAG;
537
538	/* Tell the stack that the interface is not active */
539	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
540
541	INIT_DEBUGOUT("igb_attach: end");
542
543	return (0);
544
545err_late:
546	igb_free_transmit_structures(adapter);
547	igb_free_receive_structures(adapter);
548	igb_release_hw_control(adapter);
549err_hw_init:
550	e1000_remove_device(&adapter->hw);
551err_pci:
552	igb_free_pci_resources(adapter);
553	IGB_CORE_LOCK_DESTROY(adapter);
554
555	return (error);
556}
557
558/*********************************************************************
559 *  Device removal routine
560 *
561 *  The detach entry point is called when the driver is being removed.
562 *  This routine stops the adapter and deallocates all the resources
563 *  that were allocated for driver operation.
564 *
565 *  return 0 on success, positive on failure
566 *********************************************************************/
567
568static int
569igb_detach(device_t dev)
570{
571	struct adapter	*adapter = device_get_softc(dev);
572	struct ifnet	*ifp = adapter->ifp;
573
574	INIT_DEBUGOUT("igb_detach: begin");
575
576	/* Make sure VLANS are not using driver */
577	if (adapter->ifp->if_vlantrunk != NULL) {
578		device_printf(dev,"Vlan in use, detach first\n");
579		return (EBUSY);
580	}
581
582#ifdef DEVICE_POLLING
583	if (ifp->if_capenable & IFCAP_POLLING)
584		ether_poll_deregister(ifp);
585#endif
586
587	IGB_CORE_LOCK(adapter);
588	adapter->in_detach = 1;
589	igb_stop(adapter);
590	IGB_CORE_UNLOCK(adapter);
591
592	e1000_phy_hw_reset(&adapter->hw);
593
594	/* Give control back to firmware */
595	igb_release_manageability(adapter);
596	igb_release_hw_control(adapter);
597
598	if (adapter->wol) {
599		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
600		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
601		igb_enable_wakeup(dev);
602	}
603
604	ether_ifdetach(adapter->ifp);
605
606	callout_drain(&adapter->timer);
607
608	igb_free_pci_resources(adapter);
609	bus_generic_detach(dev);
610	if_free(ifp);
611
612	e1000_remove_device(&adapter->hw);
613	igb_free_transmit_structures(adapter);
614	igb_free_receive_structures(adapter);
615
616	IGB_CORE_LOCK_DESTROY(adapter);
617
618	return (0);
619}
620
621/*********************************************************************
622 *
623 *  Shutdown entry point
624 *
625 **********************************************************************/
626
627static int
628igb_shutdown(device_t dev)
629{
630	return igb_suspend(dev);
631}
632
633/*
634 * Suspend/resume device methods.
635 */
636static int
637igb_suspend(device_t dev)
638{
639	struct adapter *adapter = device_get_softc(dev);
640
641	IGB_CORE_LOCK(adapter);
642
643	igb_stop(adapter);
644
645        igb_release_manageability(adapter);
646	igb_release_hw_control(adapter);
647
648        if (adapter->wol) {
649                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
650                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
651                igb_enable_wakeup(dev);
652        }
653
654	IGB_CORE_UNLOCK(adapter);
655
656	return bus_generic_suspend(dev);
657}
658
659static int
660igb_resume(device_t dev)
661{
662	struct adapter *adapter = device_get_softc(dev);
663	struct ifnet *ifp = adapter->ifp;
664
665	IGB_CORE_LOCK(adapter);
666	igb_init_locked(adapter);
667	igb_init_manageability(adapter);
668
669	if ((ifp->if_flags & IFF_UP) &&
670	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
671		igb_start(ifp);
672
673	IGB_CORE_UNLOCK(adapter);
674
675	return bus_generic_resume(dev);
676}
677
678
679/*********************************************************************
680 *  Transmit entry point
681 *
682 *  igb_start is called by the stack to initiate a transmit.
683 *  The driver will remain in this routine as long as there are
684 *  packets to transmit and transmit resources are available.
685 *  In case resources are not available stack is notified and
686 *  the packet is requeued.
687 **********************************************************************/
688
689static void
690igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
691{
692	struct adapter	*adapter = ifp->if_softc;
693	struct mbuf	*m_head;
694
695	IGB_TX_LOCK_ASSERT(txr);
696
697	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
698	    IFF_DRV_RUNNING)
699		return;
700	if (!adapter->link_active)
701		return;
702
703	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
704
705		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
706		if (m_head == NULL)
707			break;
708		/*
709		 *  Encapsulation can modify our pointer, and or make it
710		 *  NULL on failure.  In that event, we can't requeue.
711		 */
712		if (igb_xmit(txr, &m_head)) {
713			if (m_head == NULL)
714				break;
715			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
716			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
717			break;
718		}
719
720		/* Send a copy of the frame to the BPF listener */
721		ETHER_BPF_MTAP(ifp, m_head);
722
723		/* Set timeout in case hardware has problems transmitting. */
724		txr->watchdog_timer = IGB_TX_TIMEOUT;
725	}
726}
727
728static void
729igb_start(struct ifnet *ifp)
730{
731	struct adapter	*adapter = ifp->if_softc;
732	struct tx_ring	*txr;
733	u32		queue = 0;
734
735	/*
736	** This is really just here for testing
737	** TX multiqueue, ultimately what is
738	** needed is the flow support in the stack
739	** and appropriate logic here to deal with
740	** it. -jfv
741	*/
742	if (adapter->num_tx_queues > 1)
743		queue = (curcpu % adapter->num_tx_queues);
744
745	txr = &adapter->tx_rings[queue];
746	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
747		IGB_TX_LOCK(txr);
748		igb_start_locked(txr, ifp);
749		IGB_TX_UNLOCK(txr);
750	}
751}
752
753/*********************************************************************
754 *  Ioctl entry point
755 *
756 *  igb_ioctl is called when the user wants to configure the
757 *  interface.
758 *
759 *  return 0 on success, positive on failure
760 **********************************************************************/
761
762static int
763igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
764{
765	struct adapter	*adapter = ifp->if_softc;
766	struct ifreq *ifr = (struct ifreq *)data;
767	struct ifaddr *ifa = (struct ifaddr *)data;
768	int error = 0;
769
770	if (adapter->in_detach)
771		return (error);
772
773	switch (command) {
774	case SIOCSIFADDR:
775		if (ifa->ifa_addr->sa_family == AF_INET) {
776			/*
777			 * XXX
778			 * Since resetting hardware takes a very long time
779			 * and results in link renegotiation we only
780			 * initialize the hardware only when it is absolutely
781			 * required.
782			 */
783			ifp->if_flags |= IFF_UP;
784			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
785				IGB_CORE_LOCK(adapter);
786				igb_init_locked(adapter);
787				IGB_CORE_UNLOCK(adapter);
788			}
789			arp_ifinit(ifp, ifa);
790		} else
791			error = ether_ioctl(ifp, command, data);
792		break;
793	case SIOCSIFMTU:
794	    {
795		int max_frame_size;
796
797		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
798
799		IGB_CORE_LOCK(adapter);
800		max_frame_size = 9234;
801		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
802		    ETHER_CRC_LEN) {
803			IGB_CORE_UNLOCK(adapter);
804			error = EINVAL;
805			break;
806		}
807
808		ifp->if_mtu = ifr->ifr_mtu;
809		adapter->max_frame_size =
810		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
811		igb_init_locked(adapter);
812		IGB_CORE_UNLOCK(adapter);
813		break;
814	    }
815	case SIOCSIFFLAGS:
816		IOCTL_DEBUGOUT("ioctl rcv'd:\
817		    SIOCSIFFLAGS (Set Interface Flags)");
818		IGB_CORE_LOCK(adapter);
819		if (ifp->if_flags & IFF_UP) {
820			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
821				if ((ifp->if_flags ^ adapter->if_flags) &
822				    IFF_PROMISC) {
823					igb_disable_promisc(adapter);
824					igb_set_promisc(adapter);
825				}
826			} else
827				igb_init_locked(adapter);
828		} else
829			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
830				igb_stop(adapter);
831		adapter->if_flags = ifp->if_flags;
832		IGB_CORE_UNLOCK(adapter);
833		break;
834	case SIOCADDMULTI:
835	case SIOCDELMULTI:
836		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
837		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
838			IGB_CORE_LOCK(adapter);
839			igb_disable_intr(adapter);
840			igb_set_multi(adapter);
841#ifdef DEVICE_POLLING
842			if (!(ifp->if_capenable & IFCAP_POLLING))
843#endif
844				igb_enable_intr(adapter);
845			IGB_CORE_UNLOCK(adapter);
846		}
847		break;
848	case SIOCSIFMEDIA:
849		/* Check SOL/IDER usage */
850		IGB_CORE_LOCK(adapter);
851		if (e1000_check_reset_block(&adapter->hw)) {
852			IGB_CORE_UNLOCK(adapter);
853			device_printf(adapter->dev, "Media change is"
854			    " blocked due to SOL/IDER session.\n");
855			break;
856		}
857		IGB_CORE_UNLOCK(adapter);
858	case SIOCGIFMEDIA:
859		IOCTL_DEBUGOUT("ioctl rcv'd: \
860		    SIOCxIFMEDIA (Get/Set Interface Media)");
861		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
862		break;
863	case SIOCSIFCAP:
864	    {
865		int mask, reinit;
866
867		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
868		reinit = 0;
869		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
870#ifdef DEVICE_POLLING
871		if (mask & IFCAP_POLLING) {
872			if (ifr->ifr_reqcap & IFCAP_POLLING) {
873				error = ether_poll_register(igb_poll, ifp);
874				if (error)
875					return (error);
876				IGB_CORE_LOCK(adapter);
877				igb_disable_intr(adapter);
878				ifp->if_capenable |= IFCAP_POLLING;
879				IGB_CORE_UNLOCK(adapter);
880			} else {
881				error = ether_poll_deregister(ifp);
882				/* Enable interrupt even in error case */
883				IGB_CORE_LOCK(adapter);
884				igb_enable_intr(adapter);
885				ifp->if_capenable &= ~IFCAP_POLLING;
886				IGB_CORE_UNLOCK(adapter);
887			}
888		}
889#endif
890		if (mask & IFCAP_HWCSUM) {
891			ifp->if_capenable ^= IFCAP_HWCSUM;
892			reinit = 1;
893		}
894		if (mask & IFCAP_TSO4) {
895			ifp->if_capenable ^= IFCAP_TSO4;
896			reinit = 1;
897		}
898		if (mask & IFCAP_VLAN_HWTAGGING) {
899			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
900			reinit = 1;
901		}
902		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
903			igb_init(adapter);
904		VLAN_CAPABILITIES(ifp);
905		break;
906	    }
907
908
909	default:
910		error = ether_ioctl(ifp, command, data);
911		break;
912	}
913
914	return (error);
915}
916
917/*********************************************************************
918 *  Watchdog timer:
919 *
920 *  This routine is called from the local timer every second.
921 *  As long as transmit descriptors are being cleaned the value
922 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
923 *  and we then reset the device.
924 *
925 **********************************************************************/
926
927static void
928igb_watchdog(struct adapter *adapter)
929{
930	struct tx_ring	*txr = adapter->tx_rings;
931	bool		tx_hang = FALSE;
932
933	IGB_CORE_LOCK_ASSERT(adapter);
934
935	/*
936	** The timer is set to 5 every time start() queues a packet.
937	** Then txeof keeps resetting it as long as it cleans at
938	** least one descriptor.
939	** Finally, anytime all descriptors are clean the timer is
940	** set to 0.
941	**
942	** With TX Multiqueue we need to check every queue's timer,
943	** if any time out we do the reset.
944	*/
945	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
946		if (txr->watchdog_timer == 0 ||
947		    (--txr->watchdog_timer))
948			continue;
949		else {
950			tx_hang = TRUE;
951			break;
952		}
953	}
954	if (tx_hang == FALSE)
955		return;
956
957	/* If we are in this routine because of pause frames, then
958	 * don't reset the hardware.
959	 */
960	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
961	    E1000_STATUS_TXOFF) {
962		txr = adapter->tx_rings; /* reset pointer */
963		for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
964			txr->watchdog_timer = IGB_TX_TIMEOUT;
965		return;
966	}
967
968	if (e1000_check_for_link(&adapter->hw) == 0)
969		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
970
971	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
972		device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
973		    i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
974		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
975		device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
976		    " Next Desc to Clean = %d\n", i, txr->tx_avail,
977		    txr->next_to_clean);
978	}
979
980	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
981	adapter->watchdog_events++;
982
983	igb_init_locked(adapter);
984}
985
986/*********************************************************************
987 *  Init entry point
988 *
989 *  This routine is used in two ways. It is used by the stack as
990 *  init entry point in network interface structure. It is also used
991 *  by the driver as a hw/sw initialization routine to get to a
992 *  consistent state.
993 *
994 *  return 0 on success, positive on failure
995 **********************************************************************/
996
997static void
998igb_init_locked(struct adapter *adapter)
999{
1000	struct ifnet	*ifp = adapter->ifp;
1001	device_t	dev = adapter->dev;
1002	u32		pba = 0;
1003
1004	INIT_DEBUGOUT("igb_init: begin");
1005
1006	IGB_CORE_LOCK_ASSERT(adapter);
1007
1008	igb_stop(adapter);
1009
1010	/*
1011	 * Packet Buffer Allocation (PBA)
1012	 * Writing PBA sets the receive portion of the buffer
1013	 * the remainder is used for the transmit buffer.
1014	 */
1015	if (adapter->hw.mac.type == e1000_82575) {
1016		INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1017		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1018		E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1019	}
1020
1021	/* Get the latest mac address, User can use a LAA */
1022        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1023              ETHER_ADDR_LEN);
1024
1025	/* Put the address into the Receive Address Array */
1026	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1027
1028	/* Initialize the hardware */
1029	if (igb_hardware_init(adapter)) {
1030		device_printf(dev, "Unable to initialize the hardware\n");
1031		return;
1032	}
1033	igb_update_link_status(adapter);
1034
1035	/* Setup VLAN support, basic and offload if available */
1036	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1037	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1038		igb_enable_hw_vlans(adapter);
1039
1040	/* Set hardware offload abilities */
1041	ifp->if_hwassist = 0;
1042	if (ifp->if_capenable & IFCAP_TXCSUM)
1043		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1044	if (ifp->if_capenable & IFCAP_TSO4)
1045		ifp->if_hwassist |= CSUM_TSO;
1046
1047	/* Configure for OS presence */
1048	igb_init_manageability(adapter);
1049
1050	/* Prepare transmit descriptors and buffers */
1051	igb_setup_transmit_structures(adapter);
1052	igb_initialize_transmit_units(adapter);
1053
1054	/* Setup Multicast table */
1055	igb_set_multi(adapter);
1056
1057	/* Prepare receive descriptors and buffers */
1058	if (igb_setup_receive_structures(adapter)) {
1059		device_printf(dev, "Could not setup receive structures\n");
1060		igb_stop(adapter);
1061		return;
1062	}
1063	igb_initialize_receive_units(adapter);
1064
1065	/* Don't lose promiscuous settings */
1066	igb_set_promisc(adapter);
1067
1068	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1069	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1070
1071	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1072	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1073
1074	if (adapter->msix > 1) /* Set up queue routing */
1075		igb_configure_queues(adapter);
1076
1077#ifdef DEVICE_POLLING
1078	/*
1079	 * Only enable interrupts if we are not polling, make sure
1080	 * they are off otherwise.
1081	 */
1082	if (ifp->if_capenable & IFCAP_POLLING)
1083		igb_disable_intr(adapter);
1084	else
1085#endif /* DEVICE_POLLING */
1086	{
1087		E1000_READ_REG(&adapter->hw, E1000_ICR);
1088		igb_enable_intr(adapter);
1089	}
1090
1091
1092	/* Don't reset the phy next time init gets called */
1093	adapter->hw.phy.reset_disable = TRUE;
1094}
1095
1096static void
1097igb_init(void *arg)
1098{
1099	struct adapter *adapter = arg;
1100
1101	IGB_CORE_LOCK(adapter);
1102	igb_init_locked(adapter);
1103	IGB_CORE_UNLOCK(adapter);
1104}
1105
1106
1107#ifdef DEVICE_POLLING
1108/*********************************************************************
1109 *
1110 *  Legacy polling routine
1111 *
1112 *********************************************************************/
1113static void
1114igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1115{
1116	struct adapter *adapter = ifp->if_softc;
1117	struct rx_ring *rxr = adapter->rx_rings;
1118	struct tx_ring *txr = adapter->tx_rings;
1119	uint32_t reg_icr;
1120
1121	IGB_CORE_LOCK(adapter);
1122	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1123		IGB_CORE_UNLOCK(adapter);
1124		return;
1125	}
1126
1127	if (cmd == POLL_AND_CHECK_STATUS) {
1128		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1129		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1130			callout_stop(&adapter->timer);
1131			adapter->hw.mac.get_link_status = 1;
1132			e1000_check_for_link(&adapter->hw);
1133			igb_update_link_status(adapter);
1134			callout_reset(&adapter->timer, hz,
1135			    igb_local_timer, adapter);
1136		}
1137	}
1138	igb_rxeof(rxr, count);
1139	IGB_CORE_UNLOCK(adapter);
1140
1141	/* With polling we cannot do multiqueue */
1142	IGB_TX_LOCK(txr);
1143	igb_txeof(txr);
1144
1145	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1146		igb_start_locked(txr, ifp);
1147	IGB_TX_UNLOCK(txr);
1148}
1149#endif /* DEVICE_POLLING */
1150
1151
1152static void
1153igb_handle_link(void *context, int pending)
1154{
1155	struct adapter	*adapter = context;
1156	struct ifnet *ifp;
1157
1158	ifp = adapter->ifp;
1159
1160	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1161		return;
1162
1163	IGB_CORE_LOCK(adapter);
1164	callout_stop(&adapter->timer);
1165	adapter->hw.mac.get_link_status = 1;
1166	e1000_check_for_link(&adapter->hw);
1167	igb_update_link_status(adapter);
1168	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1169	IGB_CORE_UNLOCK(adapter);
1170	/* Rearm this interrupt */
1171	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1172	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, E1000_EIMS_OTHER);
1173}
1174
1175static void
1176igb_handle_rxtx(void *context, int pending)
1177{
1178	struct adapter	*adapter = context;
1179	struct tx_ring	*txr = adapter->tx_rings;
1180	struct rx_ring	*rxr = adapter->rx_rings;
1181	struct ifnet	*ifp;
1182
1183	ifp = adapter->ifp;
1184
1185	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1186		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1187			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1188		IGB_TX_LOCK(txr);
1189		igb_txeof(txr);
1190
1191		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1192			igb_start_locked(txr, ifp);
1193		IGB_TX_UNLOCK(txr);
1194	}
1195
1196	igb_enable_intr(adapter);
1197}
1198
1199static void
1200igb_handle_rx(void *context, int pending)
1201{
1202	struct rx_ring	*rxr = context;
1203	struct adapter	*adapter = rxr->adapter;
1204	struct ifnet	*ifp = adapter->ifp;
1205
1206	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1207		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1208			/* More to clean, schedule another task */
1209			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1210
1211}
1212
1213static void
1214igb_handle_tx(void *context, int pending)
1215{
1216	struct tx_ring	*txr = context;
1217	struct adapter	*adapter = txr->adapter;
1218	struct ifnet	*ifp = adapter->ifp;
1219
1220	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1221		IGB_TX_LOCK(txr);
1222		igb_txeof(txr);
1223		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1224			igb_start_locked(txr, ifp);
1225		IGB_TX_UNLOCK(txr);
1226	}
1227}
1228
1229
1230/*********************************************************************
1231 *
1232 *  MSI/Legacy Deferred
1233 *  Interrupt Service routine
1234 *
1235 *********************************************************************/
1236static int
1237igb_irq_fast(void *arg)
1238{
1239	struct adapter	*adapter = arg;
1240	struct ifnet	*ifp;
1241	uint32_t	reg_icr;
1242
1243	ifp = adapter->ifp;
1244
1245	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1246
1247	/* Hot eject?  */
1248	if (reg_icr == 0xffffffff)
1249		return FILTER_STRAY;
1250
1251	/* Definitely not our interrupt.  */
1252	if (reg_icr == 0x0)
1253		return FILTER_STRAY;
1254
1255	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1256		return FILTER_STRAY;
1257
1258	/*
1259	 * Mask interrupts until the taskqueue is finished running.  This is
1260	 * cheap, just assume that it is needed.  This also works around the
1261	 * MSI message reordering errata on certain systems.
1262	 */
1263	igb_disable_intr(adapter);
1264	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1265
1266	/* Link status change */
1267	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1268		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1269
1270	if (reg_icr & E1000_ICR_RXO)
1271		adapter->rx_overruns++;
1272	return FILTER_HANDLED;
1273}
1274
1275
1276/*********************************************************************
1277 *
1278 *  MSIX TX Interrupt Service routine
1279 *
1280 **********************************************************************/
1281
1282static void
1283igb_msix_tx(void *arg)
1284{
1285	struct tx_ring *txr = arg;
1286	struct adapter *adapter = txr->adapter;
1287	struct ifnet	*ifp = adapter->ifp;
1288
1289	++txr->tx_irq;
1290	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1291		IGB_TX_LOCK(txr);
1292		igb_txeof(txr);
1293		IGB_TX_UNLOCK(txr);
1294		taskqueue_enqueue(adapter->tq, &txr->tx_task);
1295	}
1296	/* Reenable this interrupt */
1297	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1298	return;
1299}
1300
1301/*********************************************************************
1302 *
1303 *  MSIX RX Interrupt Service routine
1304 *
1305 **********************************************************************/
1306
1307static void
1308igb_msix_rx(void *arg)
1309{
1310	struct rx_ring *rxr = arg;
1311	struct adapter *adapter = rxr->adapter;
1312	struct ifnet	*ifp = adapter->ifp;
1313
1314	++rxr->rx_irq;
1315	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1316		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1317			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1318	/* Reenable this interrupt */
1319	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1320	return;
1321}
1322
1323/*********************************************************************
1324 *
1325 *  MSIX Link Interrupt Service routine
1326 *
1327 **********************************************************************/
1328
1329static void
1330igb_msix_link(void *arg)
1331{
1332	struct adapter	*adapter = arg;
1333	u32       	eicr, icr;
1334
1335	++adapter->link_irq;
1336	eicr = E1000_READ_REG(&adapter->hw, E1000_EICR);
1337	if (eicr & E1000_EIMS_OTHER) {
1338		icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1339		if (!(icr & E1000_ICR_LSC))
1340			goto spurious;
1341	}
1342	taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1343
1344spurious:
1345	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1346	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, E1000_EIMS_OTHER);
1347	return;
1348}
1349
1350
1351/*********************************************************************
1352 *
1353 *  Media Ioctl callback
1354 *
1355 *  This routine is called whenever the user queries the status of
1356 *  the interface using ifconfig.
1357 *
1358 **********************************************************************/
1359static void
1360igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1361{
1362	struct adapter *adapter = ifp->if_softc;
1363	u_char fiber_type = IFM_1000_SX;
1364
1365	INIT_DEBUGOUT("igb_media_status: begin");
1366
1367	IGB_CORE_LOCK(adapter);
1368	e1000_check_for_link(&adapter->hw);
1369	igb_update_link_status(adapter);
1370
1371	ifmr->ifm_status = IFM_AVALID;
1372	ifmr->ifm_active = IFM_ETHER;
1373
1374	if (!adapter->link_active) {
1375		IGB_CORE_UNLOCK(adapter);
1376		return;
1377	}
1378
1379	ifmr->ifm_status |= IFM_ACTIVE;
1380
1381	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1382	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1383		ifmr->ifm_active |= fiber_type | IFM_FDX;
1384	else {
1385		switch (adapter->link_speed) {
1386		case 10:
1387			ifmr->ifm_active |= IFM_10_T;
1388			break;
1389		case 100:
1390			ifmr->ifm_active |= IFM_100_TX;
1391			break;
1392		case 1000:
1393			ifmr->ifm_active |= IFM_1000_T;
1394			break;
1395		}
1396		if (adapter->link_duplex == FULL_DUPLEX)
1397			ifmr->ifm_active |= IFM_FDX;
1398		else
1399			ifmr->ifm_active |= IFM_HDX;
1400	}
1401	IGB_CORE_UNLOCK(adapter);
1402}
1403
1404/*********************************************************************
1405 *
1406 *  Media Ioctl callback
1407 *
1408 *  This routine is called when the user changes speed/duplex using
1409 *  media/mediopt option with ifconfig.
1410 *
1411 **********************************************************************/
1412static int
1413igb_media_change(struct ifnet *ifp)
1414{
1415	struct adapter *adapter = ifp->if_softc;
1416	struct ifmedia  *ifm = &adapter->media;
1417
1418	INIT_DEBUGOUT("igb_media_change: begin");
1419
1420	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1421		return (EINVAL);
1422
1423	IGB_CORE_LOCK(adapter);
1424	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1425	case IFM_AUTO:
1426		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1427		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1428		break;
1429	case IFM_1000_LX:
1430	case IFM_1000_SX:
1431	case IFM_1000_T:
1432		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1433		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1434		break;
1435	case IFM_100_TX:
1436		adapter->hw.mac.autoneg = FALSE;
1437		adapter->hw.phy.autoneg_advertised = 0;
1438		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1439			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1440		else
1441			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1442		break;
1443	case IFM_10_T:
1444		adapter->hw.mac.autoneg = FALSE;
1445		adapter->hw.phy.autoneg_advertised = 0;
1446		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1447			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1448		else
1449			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1450		break;
1451	default:
1452		device_printf(adapter->dev, "Unsupported media type\n");
1453	}
1454
1455	/* As the speed/duplex settings my have changed we need to
1456	 * reset the PHY.
1457	 */
1458	adapter->hw.phy.reset_disable = FALSE;
1459
1460	igb_init_locked(adapter);
1461	IGB_CORE_UNLOCK(adapter);
1462
1463	return (0);
1464}
1465
1466
1467/*********************************************************************
1468 *
1469 *  This routine maps the mbufs to Advanced TX descriptors.
1470 *  used by the 82575 adapter.
1471 *
1472 **********************************************************************/
1473
1474static int
1475igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1476{
1477	struct adapter		*adapter = txr->adapter;
1478	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1479	bus_dmamap_t		map;
1480	struct igb_buffer	*tx_buffer, *tx_buffer_mapped;
1481	union e1000_adv_tx_desc	*txd = NULL;
1482	struct mbuf		*m_head;
1483	u32			olinfo_status = 0, cmd_type_len = 0;
1484	int			nsegs, i, j, error, first, last = 0;
1485	u32			hdrlen = 0;
1486
1487	m_head = *m_headp;
1488
1489
1490	/* Set basic descriptor constants */
1491	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1492	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1493	if (m_head->m_flags & M_VLANTAG)
1494		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1495
1496        /*
1497         * Force a cleanup if number of TX descriptors
1498         * available hits the threshold
1499         */
1500	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1501		igb_txeof(txr);
1502		/* Now do we at least have a minimal? */
1503		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1504			txr->no_desc_avail++;
1505			return (ENOBUFS);
1506		}
1507	}
1508
1509	/*
1510         * Map the packet for DMA.
1511	 *
1512	 * Capture the first descriptor index,
1513	 * this descriptor will have the index
1514	 * of the EOP which is the only one that
1515	 * now gets a DONE bit writeback.
1516	 */
1517	first = txr->next_avail_desc;
1518	tx_buffer = &txr->tx_buffers[first];
1519	tx_buffer_mapped = tx_buffer;
1520	map = tx_buffer->map;
1521
1522	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1523	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1524
1525	if (error == EFBIG) {
1526		struct mbuf *m;
1527
1528		m = m_defrag(*m_headp, M_DONTWAIT);
1529		if (m == NULL) {
1530			adapter->mbuf_alloc_failed++;
1531			m_freem(*m_headp);
1532			*m_headp = NULL;
1533			return (ENOBUFS);
1534		}
1535		*m_headp = m;
1536
1537		/* Try it again */
1538		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1539		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1540
1541		if (error == ENOMEM) {
1542			adapter->no_tx_dma_setup++;
1543			return (error);
1544		} else if (error != 0) {
1545			adapter->no_tx_dma_setup++;
1546			m_freem(*m_headp);
1547			*m_headp = NULL;
1548			return (error);
1549		}
1550	} else if (error == ENOMEM) {
1551		adapter->no_tx_dma_setup++;
1552		return (error);
1553	} else if (error != 0) {
1554		adapter->no_tx_dma_setup++;
1555		m_freem(*m_headp);
1556		*m_headp = NULL;
1557		return (error);
1558	}
1559
1560	/* Check again to be sure we have enough descriptors */
1561        if (nsegs > (txr->tx_avail - 2)) {
1562                txr->no_desc_avail++;
1563		bus_dmamap_unload(txr->txtag, map);
1564		return (ENOBUFS);
1565        }
1566	m_head = *m_headp;
1567
1568        /*
1569         * Set up the context descriptor:
1570         * used when any hardware offload is done.
1571	 * This includes CSUM, VLAN, and TSO. It
1572	 * will use the first descriptor.
1573         */
1574        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1575		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1576			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1577			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1578			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1579		} else
1580			return (ENXIO);
1581	} else
1582		/* Do all other context descriptor setup */
1583	if (igb_tx_ctx_setup(txr, m_head))
1584		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1585
1586	/* Calculate payload length */
1587	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1588	    << E1000_ADVTXD_PAYLEN_SHIFT);
1589
1590	/* Set up our transmit descriptors */
1591	i = txr->next_avail_desc;
1592	for (j = 0; j < nsegs; j++) {
1593		bus_size_t seg_len;
1594		bus_addr_t seg_addr;
1595
1596		tx_buffer = &txr->tx_buffers[i];
1597		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1598		seg_addr = segs[j].ds_addr;
1599		seg_len  = segs[j].ds_len;
1600
1601		txd->read.buffer_addr = htole64(seg_addr);
1602		txd->read.cmd_type_len = htole32(
1603		    adapter->txd_cmd | cmd_type_len | seg_len);
1604		txd->read.olinfo_status = htole32(olinfo_status);
1605		last = i;
1606		if (++i == adapter->num_tx_desc)
1607			i = 0;
1608		tx_buffer->m_head = NULL;
1609		tx_buffer->next_eop = -1;
1610	}
1611
1612	txr->next_avail_desc = i;
1613	txr->tx_avail -= nsegs;
1614
1615        tx_buffer->m_head = m_head;
1616	tx_buffer_mapped->map = tx_buffer->map;
1617	tx_buffer->map = map;
1618        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1619
1620        /*
1621         * Last Descriptor of Packet
1622	 * needs End Of Packet (EOP)
1623	 * and Report Status (RS)
1624         */
1625        txd->read.cmd_type_len |=
1626	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1627	/*
1628	 * Keep track in the first buffer which
1629	 * descriptor will be written back
1630	 */
1631	tx_buffer = &txr->tx_buffers[first];
1632	tx_buffer->next_eop = last;
1633
1634	/*
1635	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1636	 * that this frame is available to transmit.
1637	 */
1638	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1639	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1640	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1641	++txr->tx_packets;
1642
1643	return (0);
1644
1645}
1646
1647static void
1648igb_set_promisc(struct adapter *adapter)
1649{
1650	struct ifnet	*ifp = adapter->ifp;
1651	uint32_t	reg_rctl;
1652
1653	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1654
1655	if (ifp->if_flags & IFF_PROMISC) {
1656		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1657		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1658	} else if (ifp->if_flags & IFF_ALLMULTI) {
1659		reg_rctl |= E1000_RCTL_MPE;
1660		reg_rctl &= ~E1000_RCTL_UPE;
1661		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1662	}
1663}
1664
1665static void
1666igb_disable_promisc(struct adapter *adapter)
1667{
1668	uint32_t	reg_rctl;
1669
1670	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1671
1672	reg_rctl &=  (~E1000_RCTL_UPE);
1673	reg_rctl &=  (~E1000_RCTL_MPE);
1674	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1675}
1676
1677
1678/*********************************************************************
1679 *  Multicast Update
1680 *
1681 *  This routine is called whenever multicast address list is updated.
1682 *
1683 **********************************************************************/
1684
1685static void
1686igb_set_multi(struct adapter *adapter)
1687{
1688	struct ifnet	*ifp = adapter->ifp;
1689	struct ifmultiaddr *ifma;
1690	uint32_t reg_rctl = 0;
1691	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
1692	int mcnt = 0;
1693
1694	IOCTL_DEBUGOUT("igb_set_multi: begin");
1695
1696	IF_ADDR_LOCK(ifp);
1697	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1698		if (ifma->ifma_addr->sa_family != AF_LINK)
1699			continue;
1700
1701		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1702			break;
1703
1704		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1705		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1706		mcnt++;
1707	}
1708	IF_ADDR_UNLOCK(ifp);
1709
1710	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1711		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1712		reg_rctl |= E1000_RCTL_MPE;
1713		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1714	} else
1715		e1000_update_mc_addr_list(&adapter->hw, mta,
1716		    mcnt, 1, adapter->hw.mac.rar_entry_count);
1717}
1718
1719
1720/*********************************************************************
1721 *  Timer routine
1722 *
1723 *  This routine checks for link status and updates statistics.
1724 *
1725 **********************************************************************/
1726
1727static void
1728igb_local_timer(void *arg)
1729{
1730	struct adapter	*adapter = arg;
1731	struct ifnet	*ifp = adapter->ifp;
1732
1733	IGB_CORE_LOCK_ASSERT(adapter);
1734
1735	e1000_check_for_link(&adapter->hw);
1736	igb_update_link_status(adapter);
1737	igb_update_stats_counters(adapter);
1738
1739	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1740		igb_print_hw_stats(adapter);
1741
1742	/*
1743	 * Each second we check the watchdog to
1744	 * protect against hardware hangs.
1745	 */
1746	igb_watchdog(adapter);
1747
1748	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1749
1750}
1751
1752static void
1753igb_update_link_status(struct adapter *adapter)
1754{
1755	struct ifnet *ifp = adapter->ifp;
1756	device_t dev = adapter->dev;
1757	struct tx_ring *txr = adapter->tx_rings;
1758
1759	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1760	    E1000_STATUS_LU) {
1761		if (adapter->link_active == 0) {
1762			e1000_get_speed_and_duplex(&adapter->hw,
1763			    &adapter->link_speed, &adapter->link_duplex);
1764			if (bootverbose)
1765				device_printf(dev, "Link is up %d Mbps %s\n",
1766				    adapter->link_speed,
1767				    ((adapter->link_duplex == FULL_DUPLEX) ?
1768				    "Full Duplex" : "Half Duplex"));
1769			adapter->link_active = 1;
1770			ifp->if_baudrate = adapter->link_speed * 1000000;
1771			if_link_state_change(ifp, LINK_STATE_UP);
1772		}
1773	} else {
1774		if (adapter->link_active == 1) {
1775			ifp->if_baudrate = adapter->link_speed = 0;
1776			adapter->link_duplex = 0;
1777			if (bootverbose)
1778				device_printf(dev, "Link is Down\n");
1779			adapter->link_active = 0;
1780			if_link_state_change(ifp, LINK_STATE_DOWN);
1781			/* Turn off watchdogs */
1782			for (int i = 0; i < adapter->num_tx_queues;
1783			    i++, txr++)
1784				txr->watchdog_timer = FALSE;
1785		}
1786	}
1787}
1788
1789/*********************************************************************
1790 *
1791 *  This routine disables all traffic on the adapter by issuing a
1792 *  global reset on the MAC and deallocates TX/RX buffers.
1793 *
1794 **********************************************************************/
1795
1796static void
1797igb_stop(void *arg)
1798{
1799	struct adapter	*adapter = arg;
1800	struct ifnet	*ifp = adapter->ifp;
1801
1802	IGB_CORE_LOCK_ASSERT(adapter);
1803
1804	INIT_DEBUGOUT("igb_stop: begin");
1805
1806	igb_disable_intr(adapter);
1807
1808	callout_stop(&adapter->timer);
1809
1810	/* Tell the stack that the interface is no longer active */
1811	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1812
1813
1814	e1000_reset_hw(&adapter->hw);
1815	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
1816}
1817
1818
1819/*********************************************************************
1820 *
1821 *  Determine hardware revision.
1822 *
1823 **********************************************************************/
1824static void
1825igb_identify_hardware(struct adapter *adapter)
1826{
1827	device_t dev = adapter->dev;
1828
1829	/* Make sure our PCI config space has the necessary stuff set */
1830	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
1831	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
1832	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
1833		device_printf(dev, "Memory Access and/or Bus Master bits "
1834		    "were not set!\n");
1835		adapter->hw.bus.pci_cmd_word |=
1836		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
1837		pci_write_config(dev, PCIR_COMMAND,
1838		    adapter->hw.bus.pci_cmd_word, 2);
1839	}
1840
1841	/* Save off the information about this board */
1842	adapter->hw.vendor_id = pci_get_vendor(dev);
1843	adapter->hw.device_id = pci_get_device(dev);
1844	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
1845	adapter->hw.subsystem_vendor_id =
1846	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
1847	adapter->hw.subsystem_device_id =
1848	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
1849
1850	/* Do Shared Code Init and Setup */
1851	if (e1000_set_mac_type(&adapter->hw)) {
1852		device_printf(dev, "Setup init failure\n");
1853		return;
1854	}
1855}
1856
1857static int
1858igb_allocate_pci_resources(struct adapter *adapter)
1859{
1860	device_t	dev = adapter->dev;
1861	int		rid, error = 0;
1862
1863	rid = PCIR_BAR(0);
1864	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1865	    &rid, RF_ACTIVE);
1866	if (adapter->pci_mem == NULL) {
1867		device_printf(dev, "Unable to allocate bus resource: memory\n");
1868		return (ENXIO);
1869	}
1870	adapter->osdep.mem_bus_space_tag =
1871	    rman_get_bustag(adapter->pci_mem);
1872	adapter->osdep.mem_bus_space_handle =
1873	    rman_get_bushandle(adapter->pci_mem);
1874	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
1875
1876	/*
1877	** Init the resource arrays
1878	*/
1879	for (int i = 0; i < IGB_MSIX_VEC; i++) {
1880		adapter->rid[i] = i + 1; /* MSI/X RID starts at 1 */
1881		adapter->tag[i] = NULL;
1882		adapter->res[i] = NULL;
1883	}
1884
1885	adapter->num_tx_queues = 1; /* Defaults for Legacy or MSI */
1886	adapter->num_rx_queues = 1;
1887
1888	/* This will setup either MSI/X or MSI */
1889	adapter->msix = igb_setup_msix(adapter);
1890
1891	adapter->hw.back = &adapter->osdep;
1892
1893	return (error);
1894}
1895
1896/*********************************************************************
1897 *
1898 *  Setup the Legacy or MSI Interrupt handler
1899 *
1900 **********************************************************************/
1901static int
1902igb_allocate_legacy(struct adapter *adapter)
1903{
1904	device_t dev = adapter->dev;
1905	int error;
1906
1907	/* Turn off all interrupts */
1908	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
1909
1910	/* Legacy RID at 0 */
1911	if (adapter->msix == 0)
1912		adapter->rid[0] = 0;
1913
1914	/* We allocate a single interrupt resource */
1915	adapter->res[0] = bus_alloc_resource_any(dev,
1916	    SYS_RES_IRQ, &adapter->rid[0], RF_SHAREABLE | RF_ACTIVE);
1917	if (adapter->res[0] == NULL) {
1918		device_printf(dev, "Unable to allocate bus resource: "
1919		    "interrupt\n");
1920		return (ENXIO);
1921	}
1922
1923	/*
1924	 * Try allocating a fast interrupt and the associated deferred
1925	 * processing contexts.
1926	 */
1927	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
1928	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
1929	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
1930	    taskqueue_thread_enqueue, &adapter->tq);
1931	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
1932	    device_get_nameunit(adapter->dev));
1933	if ((error = bus_setup_intr(dev, adapter->res[0],
1934	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter,
1935	    &adapter->tag[0])) != 0) {
1936		device_printf(dev, "Failed to register fast interrupt "
1937			    "handler: %d\n", error);
1938		taskqueue_free(adapter->tq);
1939		adapter->tq = NULL;
1940		return (error);
1941	}
1942
1943	return (0);
1944}
1945
1946
1947/*********************************************************************
1948 *
1949 *  Setup the MSIX Interrupt handlers:
1950 *
1951 **********************************************************************/
1952static int
1953igb_allocate_msix(struct adapter *adapter)
1954{
1955	device_t dev = adapter->dev;
1956	struct tx_ring *txr = adapter->tx_rings;
1957	struct rx_ring *rxr = adapter->rx_rings;
1958	int error, vector = 0;
1959
1960	/*
1961	 * Setup the interrupt handlers
1962	 */
1963
1964	/* TX Setup */
1965	for (int i = 0; i < adapter->num_tx_queues; i++, vector++, txr++) {
1966		adapter->res[vector] = bus_alloc_resource_any(dev,
1967		    SYS_RES_IRQ, &adapter->rid[vector],
1968		    RF_SHAREABLE | RF_ACTIVE);
1969		if (adapter->res[vector] == NULL) {
1970			device_printf(dev,
1971			    "Unable to allocate bus resource: "
1972			    "MSIX TX Interrupt\n");
1973			return (ENXIO);
1974		}
1975		error = bus_setup_intr(dev, adapter->res[vector],
1976	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_tx,
1977		    txr, &adapter->tag[vector]);
1978		if (error) {
1979			adapter->res[vector] = NULL;
1980			device_printf(dev, "Failed to register TX handler");
1981			return (error);
1982		}
1983		/* Make tasklet for deferred handling - one per queue */
1984		TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
1985		if (adapter->hw.mac.type == e1000_82575) {
1986			txr->eims = E1000_EICR_TX_QUEUE0 << i;
1987			/* MSIXBM registers start at 0 */
1988			txr->msix = adapter->rid[vector] - 1;
1989		} else {
1990			txr->eims = 1 << vector;
1991			txr->msix = adapter->rid[vector];
1992		}
1993	}
1994
1995	/* RX Setup */
1996	for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rxr++) {
1997		adapter->res[vector] = bus_alloc_resource_any(dev,
1998		    SYS_RES_IRQ, &adapter->rid[vector],
1999		    RF_SHAREABLE | RF_ACTIVE);
2000		if (adapter->res[vector] == NULL) {
2001			device_printf(dev,
2002			    "Unable to allocate bus resource: "
2003			    "MSIX RX Interrupt\n");
2004			return (ENXIO);
2005		}
2006		error = bus_setup_intr(dev, adapter->res[vector],
2007	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_rx,
2008		    rxr, &adapter->tag[vector]);
2009		if (error) {
2010			adapter->res[vector] = NULL;
2011			device_printf(dev, "Failed to register RX handler");
2012			return (error);
2013		}
2014		TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2015		if (adapter->hw.mac.type == e1000_82575) {
2016			rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2017			rxr->msix = adapter->rid[vector] - 1;
2018		} else {
2019			rxr->eims = 1 << vector;
2020			rxr->msix = adapter->rid[vector];
2021		}
2022	}
2023
2024	/* And Link */
2025	adapter->res[vector] = bus_alloc_resource_any(dev,
2026	    SYS_RES_IRQ, &adapter->rid[vector],
2027		    RF_SHAREABLE | RF_ACTIVE);
2028	if (adapter->res[vector] == NULL) {
2029		device_printf(dev,
2030		    "Unable to allocate bus resource: "
2031		    "MSIX Link Interrupt\n");
2032		return (ENXIO);
2033	}
2034	if ((error = bus_setup_intr(dev, adapter->res[vector],
2035	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link,
2036	    adapter, &adapter->tag[vector])) != 0) {
2037		device_printf(dev, "Failed to register Link handler");
2038		return (error);
2039	}
2040	if (adapter->hw.mac.type == e1000_82575)
2041		adapter->linkvec = adapter->rid[vector] - 1;
2042	else
2043		adapter->linkvec = adapter->rid[vector];
2044
2045	/* Make tasklet for deferred link interrupt handling */
2046	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2047
2048	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2049	    taskqueue_thread_enqueue, &adapter->tq);
2050	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2051	    device_get_nameunit(adapter->dev));
2052
2053	return (0);
2054}
2055
2056static void
2057igb_configure_queues(struct adapter *adapter)
2058{
2059	struct	e1000_hw *hw = &adapter->hw;
2060	struct	tx_ring	*txr;
2061	struct	rx_ring	*rxr;
2062
2063	/* Turn on MSIX */
2064	{ /* 82575 */
2065		int tmp;
2066
2067                /* enable MSI-X PBA support*/
2068		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2069                tmp |= E1000_CTRL_EXT_PBA_CLR;
2070                /* Auto-Mask interrupts upon ICR read. */
2071                tmp |= E1000_CTRL_EXT_EIAME;
2072                tmp |= E1000_CTRL_EXT_IRCA;
2073                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2074
2075	 	/* Set the interrupt throttling rate. */
2076		for (int i = 0; i < 10; i++)
2077			E1000_WRITE_REG(&adapter->hw,
2078			    E1000_EITR(i), DEFAULT_ITR);
2079
2080		/* TX */
2081		for (int i = 0; i < adapter->num_tx_queues; i++) {
2082			txr = &adapter->tx_rings[i];
2083			E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2084			    txr->eims);
2085			adapter->eims_mask |= txr->eims;
2086		}
2087
2088		/* RX */
2089		for (int i = 0; i < adapter->num_rx_queues; i++) {
2090			rxr = &adapter->rx_rings[i];
2091			E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2092			    rxr->eims);
2093			adapter->eims_mask |= rxr->eims;
2094		}
2095
2096		/* Link */
2097		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2098		    E1000_EIMS_OTHER);
2099		adapter->eims_mask |= E1000_EIMS_OTHER;
2100	}
2101	return;
2102}
2103
2104
2105static void
2106igb_free_pci_resources(struct adapter *adapter)
2107{
2108	device_t dev = adapter->dev;
2109
2110	/* Make sure the for loop below runs once */
2111	if (adapter->msix == 0)
2112		adapter->msix = 1;
2113
2114	/*
2115	 * First release all the interrupt resources:
2116	 *      notice that since these are just kept
2117	 *      in an array we can do the same logic
2118	 *      whether its MSIX or just legacy.
2119	 */
2120	for (int i = 0; i < adapter->msix; i++) {
2121		if (adapter->tag[i] != NULL) {
2122			bus_teardown_intr(dev, adapter->res[i],
2123			    adapter->tag[i]);
2124			adapter->tag[i] = NULL;
2125		}
2126		if (adapter->res[i] != NULL) {
2127			bus_release_resource(dev, SYS_RES_IRQ,
2128			    adapter->rid[i], adapter->res[i]);
2129		}
2130	}
2131
2132	if (adapter->msix)
2133		pci_release_msi(dev);
2134
2135	if (adapter->msix_mem != NULL)
2136		bus_release_resource(dev, SYS_RES_MEMORY,
2137		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2138
2139	if (adapter->pci_mem != NULL)
2140		bus_release_resource(dev, SYS_RES_MEMORY,
2141		    PCIR_BAR(0), adapter->pci_mem);
2142
2143}
2144
2145/*
2146 * Setup Either MSI/X or MSI
2147 */
2148static int
2149igb_setup_msix(struct adapter *adapter)
2150{
2151	device_t dev = adapter->dev;
2152	int rid, want, queues, msgs;
2153
2154	/* First try MSI/X */
2155	rid = PCIR_BAR(IGB_MSIX_BAR);
2156	adapter->msix_mem = bus_alloc_resource_any(dev,
2157	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2158       	if (!adapter->msix_mem) {
2159		/* May not be enabled */
2160		device_printf(adapter->dev,
2161		    "Unable to map MSIX table \n");
2162		goto msi;
2163	}
2164
2165	msgs = pci_msix_count(dev);
2166	if (msgs == 0) { /* system has msix disabled */
2167		bus_release_resource(dev, SYS_RES_MEMORY,
2168		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2169		adapter->msix_mem = NULL;
2170		goto msi;
2171	}
2172
2173	/* Figure out a reasonable auto config value */
2174	queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2175
2176	if (igb_tx_queues == 0)
2177		igb_tx_queues = queues;
2178	if (igb_rx_queues == 0)
2179		igb_rx_queues = queues;
2180	want = igb_tx_queues + igb_rx_queues + 1;
2181	if (msgs >= want)
2182		msgs = want;
2183	else {
2184               	device_printf(adapter->dev,
2185		    "MSIX Configuration Problem, "
2186		    "%d vectors but %d queues wanted!\n",
2187		    msgs, want);
2188		return (ENXIO);
2189	}
2190	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2191               	device_printf(adapter->dev,
2192		    "Using MSIX interrupts with %d vectors\n", msgs);
2193		adapter->num_tx_queues = igb_tx_queues;
2194		adapter->num_rx_queues = igb_rx_queues;
2195		return (msgs);
2196	}
2197msi:
2198       	msgs = pci_msi_count(dev);
2199       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2200               	device_printf(adapter->dev,"Using MSI interrupt\n");
2201	return (msgs);
2202}
2203
2204/*********************************************************************
2205 *
2206 *  Initialize the hardware to a configuration
2207 *  as specified by the adapter structure.
2208 *
2209 **********************************************************************/
2210static int
2211igb_hardware_init(struct adapter *adapter)
2212{
2213	device_t	dev = adapter->dev;
2214	u32		rx_buffer_size;
2215
2216	INIT_DEBUGOUT("igb_hardware_init: begin");
2217
2218	/* Issue a global reset */
2219	e1000_reset_hw(&adapter->hw);
2220
2221	/* Let the firmware know the OS is in control */
2222	igb_get_hw_control(adapter);
2223
2224	/*
2225	 * These parameters control the automatic generation (Tx) and
2226	 * response (Rx) to Ethernet PAUSE frames.
2227	 * - High water mark should allow for at least two frames to be
2228	 *   received after sending an XOFF.
2229	 * - Low water mark works best when it is very near the high water mark.
2230	 *   This allows the receiver to restart by sending XON when it has
2231	 *   drained a bit. Here we use an arbitary value of 1500 which will
2232	 *   restart after one full frame is pulled from the buffer. There
2233	 *   could be several smaller frames in the buffer and if so they will
2234	 *   not trigger the XON until their total number reduces the buffer
2235	 *   by 1500.
2236	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2237	 */
2238		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2239		    E1000_PBA) & 0xffff) << 10 );
2240
2241	adapter->hw.fc.high_water = rx_buffer_size -
2242	    roundup2(adapter->max_frame_size, 1024);
2243	adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2244
2245	adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2246	adapter->hw.fc.send_xon = TRUE;
2247	adapter->hw.fc.type = e1000_fc_full;
2248
2249	if (e1000_init_hw(&adapter->hw) < 0) {
2250		device_printf(dev, "Hardware Initialization Failed\n");
2251		return (EIO);
2252	}
2253
2254	e1000_check_for_link(&adapter->hw);
2255
2256	return (0);
2257}
2258
2259/*********************************************************************
2260 *
2261 *  Setup networking device structure and register an interface.
2262 *
2263 **********************************************************************/
2264static void
2265igb_setup_interface(device_t dev, struct adapter *adapter)
2266{
2267	struct ifnet   *ifp;
2268
2269	INIT_DEBUGOUT("igb_setup_interface: begin");
2270
2271	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2272	if (ifp == NULL)
2273		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2274	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2275	ifp->if_mtu = ETHERMTU;
2276	ifp->if_init =  igb_init;
2277	ifp->if_softc = adapter;
2278	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2279	ifp->if_ioctl = igb_ioctl;
2280	ifp->if_start = igb_start;
2281	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2282	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2283	IFQ_SET_READY(&ifp->if_snd);
2284
2285	ether_ifattach(ifp, adapter->hw.mac.addr);
2286
2287	ifp->if_capabilities = ifp->if_capenable = 0;
2288
2289	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2290	ifp->if_capabilities |= IFCAP_TSO4;
2291	ifp->if_capenable = ifp->if_capabilities;
2292
2293	/*
2294	 * Tell the upper layer(s) we support long frames.
2295	 */
2296	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2297	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2298	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2299
2300#ifdef DEVICE_POLLING
2301	ifp->if_capabilities |= IFCAP_POLLING;
2302#endif
2303
2304	/*
2305	 * Specify the media types supported by this adapter and register
2306	 * callbacks to update media and link information
2307	 */
2308	ifmedia_init(&adapter->media, IFM_IMASK,
2309	    igb_media_change, igb_media_status);
2310	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2311	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2312		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2313			    0, NULL);
2314		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2315	} else {
2316		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2317		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2318			    0, NULL);
2319		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2320			    0, NULL);
2321		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2322			    0, NULL);
2323		if (adapter->hw.phy.type != e1000_phy_ife) {
2324			ifmedia_add(&adapter->media,
2325				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2326			ifmedia_add(&adapter->media,
2327				IFM_ETHER | IFM_1000_T, 0, NULL);
2328		}
2329	}
2330	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2331	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2332}
2333
2334
2335/*
2336 * Manage DMA'able memory.
2337 */
2338static void
2339igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2340{
2341	if (error)
2342		return;
2343	*(bus_addr_t *) arg = segs[0].ds_addr;
2344}
2345
2346static int
2347igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2348        struct igb_dma_alloc *dma, int mapflags)
2349{
2350	int error;
2351
2352	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2353				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2354				BUS_SPACE_MAXADDR,	/* lowaddr */
2355				BUS_SPACE_MAXADDR,	/* highaddr */
2356				NULL, NULL,		/* filter, filterarg */
2357				size,			/* maxsize */
2358				1,			/* nsegments */
2359				size,			/* maxsegsize */
2360				0,			/* flags */
2361				NULL,			/* lockfunc */
2362				NULL,			/* lockarg */
2363				&dma->dma_tag);
2364	if (error) {
2365		device_printf(adapter->dev,
2366		    "%s: bus_dma_tag_create failed: %d\n",
2367		    __func__, error);
2368		goto fail_0;
2369	}
2370
2371	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2372	    BUS_DMA_NOWAIT, &dma->dma_map);
2373	if (error) {
2374		device_printf(adapter->dev,
2375		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2376		    __func__, (uintmax_t)size, error);
2377		goto fail_2;
2378	}
2379
2380	dma->dma_paddr = 0;
2381	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2382	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2383	if (error || dma->dma_paddr == 0) {
2384		device_printf(adapter->dev,
2385		    "%s: bus_dmamap_load failed: %d\n",
2386		    __func__, error);
2387		goto fail_3;
2388	}
2389
2390	return (0);
2391
2392fail_3:
2393	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2394fail_2:
2395	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2396	bus_dma_tag_destroy(dma->dma_tag);
2397fail_0:
2398	dma->dma_map = NULL;
2399	dma->dma_tag = NULL;
2400
2401	return (error);
2402}
2403
2404static void
2405igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2406{
2407	if (dma->dma_tag == NULL)
2408		return;
2409	if (dma->dma_map != NULL) {
2410		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2411		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2412		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2413		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2414		dma->dma_map = NULL;
2415	}
2416	bus_dma_tag_destroy(dma->dma_tag);
2417	dma->dma_tag = NULL;
2418}
2419
2420
2421/*********************************************************************
2422 *
2423 *  Allocate memory for the transmit and receive rings, and then
2424 *  the descriptors associated with each, called only once at attach.
2425 *
2426 **********************************************************************/
2427static int
2428igb_allocate_queues(struct adapter *adapter)
2429{
2430	device_t dev = adapter->dev;
2431	struct tx_ring *txr;
2432	struct rx_ring *rxr;
2433	int rsize, tsize, error = E1000_SUCCESS;
2434	int txconf = 0, rxconf = 0;
2435	char	name_string[16];
2436
2437	/* First allocate the TX ring struct memory */
2438	if (!(adapter->tx_rings =
2439	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2440	    adapter->num_tx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2441		device_printf(dev, "Unable to allocate TX ring memory\n");
2442		error = ENOMEM;
2443		goto fail;
2444	}
2445	txr = adapter->tx_rings;
2446
2447	/* Next allocate the RX */
2448	if (!(adapter->rx_rings =
2449	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2450	    adapter->num_rx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2451		device_printf(dev, "Unable to allocate RX ring memory\n");
2452		error = ENOMEM;
2453		goto rx_fail;
2454	}
2455	rxr = adapter->rx_rings;
2456
2457	tsize = roundup2(adapter->num_tx_desc *
2458	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2459	/*
2460	 * Now set up the TX queues, txconf is needed to handle the
2461	 * possibility that things fail midcourse and we need to
2462	 * undo memory gracefully
2463	 */
2464	for (int i = 0; i < adapter->num_tx_queues; i++, txconf++) {
2465		/* Set up some basics */
2466		txr = &adapter->tx_rings[i];
2467		txr->adapter = adapter;
2468		txr->me = i;
2469
2470		/* Initialize the TX lock */
2471		snprintf(name_string, sizeof(name_string), "%s:tx(%d)",
2472		    device_get_nameunit(dev), txr->me);
2473		mtx_init(&txr->tx_mtx, name_string, NULL, MTX_DEF);
2474
2475		if (igb_dma_malloc(adapter, tsize,
2476			&txr->txdma, BUS_DMA_NOWAIT)) {
2477			device_printf(dev,
2478			    "Unable to allocate TX Descriptor memory\n");
2479			error = ENOMEM;
2480			goto err_tx_desc;
2481		}
2482		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2483		bzero((void *)txr->tx_base, tsize);
2484
2485        	/* Now allocate transmit buffers for the ring */
2486        	if (igb_allocate_transmit_buffers(txr)) {
2487			device_printf(dev,
2488			    "Critical Failure setting up transmit buffers\n");
2489			error = ENOMEM;
2490			goto err_tx_desc;
2491        	}
2492
2493	}
2494
2495	/*
2496	 * Next the RX queues...
2497	 */
2498	rsize = roundup2(adapter->num_rx_desc *
2499	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2500	for (int i = 0; i < adapter->num_rx_queues; i++, rxconf++) {
2501		rxr = &adapter->rx_rings[i];
2502		rxr->adapter = adapter;
2503		rxr->me = i;
2504
2505		/* Initialize the RX lock */
2506		snprintf(name_string, sizeof(name_string), "%s:rx(%d)",
2507		    device_get_nameunit(dev), txr->me);
2508		mtx_init(&rxr->rx_mtx, name_string, NULL, MTX_DEF);
2509
2510		if (igb_dma_malloc(adapter, rsize,
2511			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2512			device_printf(dev,
2513			    "Unable to allocate RxDescriptor memory\n");
2514			error = ENOMEM;
2515			goto err_rx_desc;
2516		}
2517		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2518		bzero((void *)rxr->rx_base, rsize);
2519
2520        	/* Allocate receive buffers for the ring*/
2521		if (igb_allocate_receive_buffers(rxr)) {
2522			device_printf(dev,
2523			    "Critical Failure setting up receive buffers\n");
2524			error = ENOMEM;
2525			goto err_rx_desc;
2526		}
2527	}
2528
2529	return (0);
2530
2531err_rx_desc:
2532	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2533		igb_dma_free(adapter, &rxr->rxdma);
2534err_tx_desc:
2535	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2536		igb_dma_free(adapter, &txr->txdma);
2537	free(adapter->rx_rings, M_DEVBUF);
2538rx_fail:
2539	free(adapter->tx_rings, M_DEVBUF);
2540fail:
2541	return (error);
2542}
2543
2544/*********************************************************************
2545 *
2546 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2547 *  the information needed to transmit a packet on the wire. This is
2548 *  called only once at attach, setup is done every reset.
2549 *
2550 **********************************************************************/
2551static int
2552igb_allocate_transmit_buffers(struct tx_ring *txr)
2553{
2554	struct adapter *adapter = txr->adapter;
2555	device_t dev = adapter->dev;
2556	struct igb_buffer *txbuf;
2557	int error, i;
2558
2559	/*
2560	 * Setup DMA descriptor areas.
2561	 */
2562	if ((error = bus_dma_tag_create(NULL,		/* parent */
2563			       PAGE_SIZE, 0,		/* alignment, bounds */
2564			       BUS_SPACE_MAXADDR,	/* lowaddr */
2565			       BUS_SPACE_MAXADDR,	/* highaddr */
2566			       NULL, NULL,		/* filter, filterarg */
2567			       IGB_TSO_SIZE,		/* maxsize */
2568			       IGB_MAX_SCATTER,		/* nsegments */
2569			       PAGE_SIZE,		/* maxsegsize */
2570			       0,			/* flags */
2571			       NULL,			/* lockfunc */
2572			       NULL,			/* lockfuncarg */
2573			       &txr->txtag))) {
2574		device_printf(dev,"Unable to allocate TX DMA tag\n");
2575		goto fail;
2576	}
2577
2578	if (!(txr->tx_buffers =
2579	    (struct igb_buffer *) malloc(sizeof(struct igb_buffer) *
2580	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2581		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2582		error = ENOMEM;
2583		goto fail;
2584	}
2585
2586        /* Create the descriptor buffer dma maps */
2587	txbuf = txr->tx_buffers;
2588	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2589		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2590		if (error != 0) {
2591			device_printf(dev, "Unable to create TX DMA map\n");
2592			goto fail;
2593		}
2594	}
2595
2596	return 0;
2597fail:
2598	/* We free all, it handles case where we are in the middle */
2599	igb_free_transmit_structures(adapter);
2600	return (error);
2601}
2602
2603/*********************************************************************
2604 *
2605 *  Initialize a transmit ring.
2606 *
2607 **********************************************************************/
2608static void
2609igb_setup_transmit_ring(struct tx_ring *txr)
2610{
2611	struct adapter *adapter = txr->adapter;
2612	struct igb_buffer *txbuf;
2613	int i;
2614
2615	/* Clear the old ring contents */
2616	bzero((void *)txr->tx_base,
2617	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2618	/* Reset indices */
2619	txr->next_avail_desc = 0;
2620	txr->next_to_clean = 0;
2621
2622	/* Free any existing tx buffers. */
2623        txbuf = txr->tx_buffers;
2624	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2625		if (txbuf->m_head != NULL) {
2626			bus_dmamap_sync(txr->txtag, txbuf->map,
2627			    BUS_DMASYNC_POSTWRITE);
2628			bus_dmamap_unload(txr->txtag, txbuf->map);
2629			m_freem(txbuf->m_head);
2630			txbuf->m_head = NULL;
2631		}
2632		/* clear the watch index */
2633		txbuf->next_eop = -1;
2634        }
2635
2636	/* Set number of descriptors available */
2637	txr->tx_avail = adapter->num_tx_desc;
2638
2639	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2640	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2641
2642}
2643
2644/*********************************************************************
2645 *
2646 *  Initialize all transmit rings.
2647 *
2648 **********************************************************************/
2649static void
2650igb_setup_transmit_structures(struct adapter *adapter)
2651{
2652	struct tx_ring *txr = adapter->tx_rings;
2653
2654	for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
2655		igb_setup_transmit_ring(txr);
2656
2657	return;
2658}
2659
2660/*********************************************************************
2661 *
2662 *  Enable transmit unit.
2663 *
2664 **********************************************************************/
2665static void
2666igb_initialize_transmit_units(struct adapter *adapter)
2667{
2668	struct tx_ring	*txr = adapter->tx_rings;
2669	u32		tctl, txdctl, tipg = 0;
2670
2671	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2672
2673	/* Setup the Base and Length of the Tx Descriptor Rings */
2674	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2675		u64 bus_addr = txr->txdma.dma_paddr;
2676
2677		E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2678		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2679		E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2680		    (uint32_t)(bus_addr >> 32));
2681		E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2682		    (uint32_t)bus_addr);
2683
2684		/* Setup the HW Tx Head and Tail descriptor pointers */
2685		E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2686		E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2687
2688		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2689		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2690		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2691
2692		/* Setup Transmit Descriptor Base Settings */
2693		adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2694
2695		txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
2696		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2697		E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
2698	}
2699
2700	/* Set the default values for the Tx Inter Packet Gap timer */
2701	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2702	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
2703		tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2704	else
2705		tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2706
2707	tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2708	tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2709
2710	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
2711	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
2712	E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value);
2713
2714	/* Program the Transmit Control Register */
2715	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2716	tctl &= ~E1000_TCTL_CT;
2717	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2718		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2719
2720	/* This write will effectively turn on the transmit unit. */
2721	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2722
2723}
2724
2725/*********************************************************************
2726 *
2727 *  Free all transmit rings.
2728 *
2729 **********************************************************************/
2730static void
2731igb_free_transmit_structures(struct adapter *adapter)
2732{
2733	struct tx_ring *txr = adapter->tx_rings;
2734
2735	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2736		IGB_TX_LOCK(txr);
2737		igb_free_transmit_buffers(txr);
2738		igb_dma_free(adapter, &txr->txdma);
2739		IGB_TX_UNLOCK(txr);
2740		IGB_TX_LOCK_DESTROY(txr);
2741	}
2742	free(adapter->tx_rings, M_DEVBUF);
2743}
2744
2745/*********************************************************************
2746 *
2747 *  Free transmit ring related data structures.
2748 *
2749 **********************************************************************/
2750static void
2751igb_free_transmit_buffers(struct tx_ring *txr)
2752{
2753	struct adapter *adapter = txr->adapter;
2754	struct igb_buffer *tx_buffer;
2755	int             i;
2756
2757	INIT_DEBUGOUT("free_transmit_ring: begin");
2758
2759	if (txr->tx_buffers == NULL)
2760		return;
2761
2762	tx_buffer = txr->tx_buffers;
2763	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2764		if (tx_buffer->m_head != NULL) {
2765			bus_dmamap_sync(txr->txtag, tx_buffer->map,
2766			    BUS_DMASYNC_POSTWRITE);
2767			bus_dmamap_unload(txr->txtag,
2768			    tx_buffer->map);
2769			m_freem(tx_buffer->m_head);
2770			tx_buffer->m_head = NULL;
2771			if (tx_buffer->map != NULL) {
2772				bus_dmamap_destroy(txr->txtag,
2773				    tx_buffer->map);
2774				tx_buffer->map = NULL;
2775			}
2776		} else if (tx_buffer->map != NULL) {
2777			bus_dmamap_unload(txr->txtag,
2778			    tx_buffer->map);
2779			bus_dmamap_destroy(txr->txtag,
2780			    tx_buffer->map);
2781			tx_buffer->map = NULL;
2782		}
2783	}
2784
2785	if (txr->tx_buffers != NULL) {
2786		free(txr->tx_buffers, M_DEVBUF);
2787		txr->tx_buffers = NULL;
2788	}
2789	if (txr->txtag != NULL) {
2790		bus_dma_tag_destroy(txr->txtag);
2791		txr->txtag = NULL;
2792	}
2793	return;
2794}
2795
2796/**********************************************************************
2797 *
2798 *  Setup work for hardware segmentation offload (TSO) on
2799 *  adapters using advanced tx descriptors (82575)
2800 *
2801 **********************************************************************/
2802static boolean_t
2803igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
2804{
2805	struct adapter *adapter = txr->adapter;
2806	struct e1000_adv_tx_context_desc *TXD;
2807	struct igb_buffer        *tx_buffer;
2808	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2809	u32 mss_l4len_idx = 0;
2810	u16 vtag = 0;
2811	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2812	struct ether_vlan_header *eh;
2813	struct ip *ip;
2814	struct tcphdr *th;
2815
2816
2817	/*
2818	 * Determine where frame payload starts.
2819	 * Jump over vlan headers if already present
2820	 */
2821	eh = mtod(mp, struct ether_vlan_header *);
2822	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
2823		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2824	else
2825		ehdrlen = ETHER_HDR_LEN;
2826
2827	/* Ensure we have at least the IP+TCP header in the first mbuf. */
2828	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
2829		return FALSE;
2830
2831	/* Only supports IPV4 for now */
2832	ctxd = txr->next_avail_desc;
2833	tx_buffer = &txr->tx_buffers[ctxd];
2834	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
2835
2836	ip = (struct ip *)(mp->m_data + ehdrlen);
2837	if (ip->ip_p != IPPROTO_TCP)
2838                return FALSE;   /* 0 */
2839	ip->ip_len = 0;
2840	ip->ip_sum = 0;
2841	ip_hlen = ip->ip_hl << 2;
2842	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2843	th->th_sum = in_pseudo(ip->ip_src.s_addr,
2844	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2845	tcp_hlen = th->th_off << 2;
2846	/*
2847	 * Calculate header length, this is used
2848	 * in the transmit desc in igb_xmit
2849	 */
2850	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
2851
2852	/* VLAN MACLEN IPLEN */
2853	if (mp->m_flags & M_VLANTAG) {
2854		vtag = htole16(mp->m_pkthdr.ether_vtag);
2855		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
2856	}
2857
2858	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
2859	vlan_macip_lens |= ip_hlen;
2860	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
2861
2862	/* ADV DTYPE TUCMD */
2863	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
2864	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
2865	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2866	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
2867
2868	/* MSS L4LEN IDX */
2869	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
2870	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
2871	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2872
2873	TXD->seqnum_seed = htole32(0);
2874	tx_buffer->m_head = NULL;
2875	tx_buffer->next_eop = -1;
2876
2877	if (++ctxd == adapter->num_tx_desc)
2878		ctxd = 0;
2879
2880	txr->tx_avail--;
2881	txr->next_avail_desc = ctxd;
2882	return TRUE;
2883}
2884
2885
2886/*********************************************************************
2887 *
2888 *  Context Descriptor setup for VLAN or CSUM
2889 *
2890 **********************************************************************/
2891
2892static boolean_t
2893igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
2894{
2895	struct adapter *adapter = txr->adapter;
2896	struct e1000_adv_tx_context_desc *TXD;
2897	struct igb_buffer        *tx_buffer;
2898	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2899	struct ether_vlan_header *eh;
2900	struct ip *ip = NULL;
2901	struct ip6_hdr *ip6;
2902	int  ehdrlen, ip_hlen = 0;
2903	u16	etype;
2904	u8	ipproto = 0;
2905	bool	offload = TRUE;
2906	u16 vtag = 0;
2907
2908	int ctxd = txr->next_avail_desc;
2909	tx_buffer = &txr->tx_buffers[ctxd];
2910	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
2911
2912	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2913		offload = FALSE; /* Only here to handle VLANs */
2914	/*
2915	** In advanced descriptors the vlan tag must
2916	** be placed into the descriptor itself.
2917	*/
2918	if (mp->m_flags & M_VLANTAG) {
2919		vtag = htole16(mp->m_pkthdr.ether_vtag);
2920		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
2921	} else if (offload == FALSE)
2922		return FALSE;
2923	/*
2924	 * Determine where frame payload starts.
2925	 * Jump over vlan headers if already present,
2926	 * helpful for QinQ too.
2927	 */
2928	eh = mtod(mp, struct ether_vlan_header *);
2929	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2930		etype = ntohs(eh->evl_proto);
2931		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2932	} else {
2933		etype = ntohs(eh->evl_encap_proto);
2934		ehdrlen = ETHER_HDR_LEN;
2935	}
2936
2937	/* Set the ether header length */
2938	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
2939
2940	switch (etype) {
2941		case ETHERTYPE_IP:
2942			ip = (struct ip *)(mp->m_data + ehdrlen);
2943			ip_hlen = ip->ip_hl << 2;
2944			if (mp->m_len < ehdrlen + ip_hlen) {
2945				offload = FALSE;
2946				break;
2947			}
2948			ipproto = ip->ip_p;
2949			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2950			break;
2951		case ETHERTYPE_IPV6:
2952			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2953			ip_hlen = sizeof(struct ip6_hdr);
2954			if (mp->m_len < ehdrlen + ip_hlen)
2955				return FALSE; /* failure */
2956			ipproto = ip6->ip6_nxt;
2957			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
2958			break;
2959		default:
2960			offload = FALSE;
2961			break;
2962	}
2963
2964	vlan_macip_lens |= ip_hlen;
2965	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
2966
2967	switch (ipproto) {
2968		case IPPROTO_TCP:
2969			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2970				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
2971			break;
2972		case IPPROTO_UDP:
2973		{
2974			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2975				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
2976			break;
2977		}
2978		default:
2979			offload = FALSE;
2980			break;
2981	}
2982
2983	/* Now copy bits into descriptor */
2984	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
2985	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
2986	TXD->seqnum_seed = htole32(0);
2987	TXD->mss_l4len_idx = htole32(0);
2988
2989	tx_buffer->m_head = NULL;
2990	tx_buffer->next_eop = -1;
2991
2992	/* We've consumed the first desc, adjust counters */
2993	if (++ctxd == adapter->num_tx_desc)
2994		ctxd = 0;
2995	txr->next_avail_desc = ctxd;
2996	--txr->tx_avail;
2997
2998        return (offload);
2999}
3000
3001
3002/**********************************************************************
3003 *
3004 *  Examine each tx_buffer in the used queue. If the hardware is done
3005 *  processing the packet then free associated resources. The
3006 *  tx_buffer is put back on the free queue.
3007 *
3008 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3009 **********************************************************************/
3010static bool
3011igb_txeof(struct tx_ring *txr)
3012{
3013	struct adapter	*adapter = txr->adapter;
3014        int first, last, done, num_avail;
3015        struct igb_buffer *tx_buffer;
3016        struct e1000_tx_desc   *tx_desc, *eop_desc;
3017	struct ifnet   *ifp = adapter->ifp;
3018
3019	IGB_TX_LOCK_ASSERT(txr);
3020
3021        if (txr->tx_avail == adapter->num_tx_desc)
3022                return FALSE;
3023
3024        num_avail = txr->tx_avail;
3025        first = txr->next_to_clean;
3026        tx_desc = &txr->tx_base[first];
3027        tx_buffer = &txr->tx_buffers[first];
3028	last = tx_buffer->next_eop;
3029        eop_desc = &txr->tx_base[last];
3030
3031	/*
3032	 * What this does is get the index of the
3033	 * first descriptor AFTER the EOP of the
3034	 * first packet, that way we can do the
3035	 * simple comparison on the inner while loop.
3036	 */
3037	if (++last == adapter->num_tx_desc)
3038 		last = 0;
3039	done = last;
3040
3041        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3042            BUS_DMASYNC_POSTREAD);
3043
3044        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3045		/* We clean the range of the packet */
3046		while (first != done) {
3047                	tx_desc->upper.data = 0;
3048                	tx_desc->lower.data = 0;
3049                	tx_desc->buffer_addr = 0;
3050                	num_avail++;
3051
3052			if (tx_buffer->m_head) {
3053				ifp->if_opackets++;
3054				bus_dmamap_sync(txr->txtag,
3055				    tx_buffer->map,
3056				    BUS_DMASYNC_POSTWRITE);
3057				bus_dmamap_unload(txr->txtag,
3058				    tx_buffer->map);
3059
3060                        	m_freem(tx_buffer->m_head);
3061                        	tx_buffer->m_head = NULL;
3062                	}
3063			tx_buffer->next_eop = -1;
3064
3065	                if (++first == adapter->num_tx_desc)
3066				first = 0;
3067
3068	                tx_buffer = &txr->tx_buffers[first];
3069			tx_desc = &txr->tx_base[first];
3070		}
3071		/* See if we can continue to the next packet */
3072		last = tx_buffer->next_eop;
3073		if (last != -1) {
3074        		eop_desc = &txr->tx_base[last];
3075			/* Get new done point */
3076			if (++last == adapter->num_tx_desc) last = 0;
3077			done = last;
3078		} else
3079			break;
3080        }
3081        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3082            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3083
3084        txr->next_to_clean = first;
3085
3086        /*
3087         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3088         * that it is OK to send packets.
3089         * If there are no pending descriptors, clear the timeout. Otherwise,
3090         * if some descriptors have been freed, restart the timeout.
3091         */
3092        if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3093                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3094		/* All clean, turn off the timer */
3095                if (num_avail == adapter->num_tx_desc) {
3096			txr->watchdog_timer = 0;
3097        		txr->tx_avail = num_avail;
3098			return FALSE;
3099		}
3100		/* Some cleaned, reset the timer */
3101                else if (num_avail != txr->tx_avail)
3102			txr->watchdog_timer = IGB_TX_TIMEOUT;
3103        }
3104        txr->tx_avail = num_avail;
3105        return TRUE;
3106}
3107
3108
3109/*********************************************************************
3110 *
3111 *  Get a buffer from system mbuf buffer pool.
3112 *
3113 **********************************************************************/
3114static int
3115igb_get_buf(struct rx_ring *rxr, int i)
3116{
3117	struct adapter		*adapter = rxr->adapter;
3118	struct mbuf		*m;
3119	bus_dma_segment_t	segs[1];
3120	bus_dmamap_t		map;
3121	struct igb_buffer	*rx_buffer;
3122	int			error, nsegs;
3123
3124	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3125	if (m == NULL) {
3126		adapter->mbuf_cluster_failed++;
3127		return (ENOBUFS);
3128	}
3129	m->m_len = m->m_pkthdr.len = MCLBYTES;
3130
3131	if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3132		m_adj(m, ETHER_ALIGN);
3133
3134	/*
3135	 * Using memory from the mbuf cluster pool, invoke the
3136	 * bus_dma machinery to arrange the memory mapping.
3137	 */
3138	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3139	    rxr->rx_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT);
3140	if (error != 0) {
3141		m_free(m);
3142		return (error);
3143	}
3144
3145	/* If nsegs is wrong then the stack is corrupt. */
3146	KASSERT(nsegs == 1, ("Too many segments returned!"));
3147
3148	rx_buffer = &rxr->rx_buffers[i];
3149	if (rx_buffer->m_head != NULL)
3150		bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3151
3152	map = rx_buffer->map;
3153	rx_buffer->map = rxr->rx_spare_map;
3154	rxr->rx_spare_map = map;
3155	bus_dmamap_sync(rxr->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3156	rx_buffer->m_head = m;
3157
3158	rxr->rx_base[i].read.pkt_addr = htole64(segs[0].ds_addr);
3159	return (0);
3160}
3161
3162
3163/*********************************************************************
3164 *
3165 *  Allocate memory for rx_buffer structures. Since we use one
3166 *  rx_buffer per received packet, the maximum number of rx_buffer's
3167 *  that we'll need is equal to the number of receive descriptors
3168 *  that we've allocated.
3169 *
3170 **********************************************************************/
3171static int
3172igb_allocate_receive_buffers(struct rx_ring *rxr)
3173{
3174	struct	adapter 	*adapter = rxr->adapter;
3175	device_t 		dev = adapter->dev;
3176	struct igb_buffer 	*rxbuf;
3177	int             	i, bsize, error;
3178
3179	bsize = sizeof(struct igb_buffer) * adapter->num_rx_desc;
3180	if (!(rxr->rx_buffers =
3181	    (struct igb_buffer *) malloc(bsize,
3182	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3183		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3184		error = ENOMEM;
3185		goto fail;
3186	}
3187
3188	if ((error = bus_dma_tag_create(NULL,		/* parent */
3189				   PAGE_SIZE, 0,	/* alignment, bounds */
3190				   BUS_SPACE_MAXADDR,	/* lowaddr */
3191				   BUS_SPACE_MAXADDR,	/* highaddr */
3192				   NULL, NULL,		/* filter, filterarg */
3193				   MCLBYTES,		/* maxsize */
3194				   1,			/* nsegments */
3195				   MCLBYTES,		/* maxsegsize */
3196				   0,			/* flags */
3197				   NULL,		/* lockfunc */
3198				   NULL,		/* lockfuncarg */
3199				   &rxr->rxtag))) {
3200		device_printf(dev, "Unable to create RX Small DMA tag\n");
3201		goto fail;
3202	}
3203
3204	/* Create the spare map (used by getbuf) */
3205        error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3206	     &rxr->rx_spare_map);
3207	if (error) {
3208		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3209		    __func__, error);
3210		goto fail;
3211	}
3212
3213	for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3214		rxbuf = &rxr->rx_buffers[i];
3215		error = bus_dmamap_create(rxr->rxtag,
3216		    BUS_DMA_NOWAIT, &rxbuf->map);
3217		if (error) {
3218			device_printf(dev, "Unable to create Small RX DMA map\n");
3219			goto fail;
3220		}
3221	}
3222
3223	return (0);
3224
3225fail:
3226	/* Frees all, but can handle partial completion */
3227	igb_free_receive_structures(adapter);
3228	return (error);
3229}
3230
3231/*********************************************************************
3232 *
3233 *  Initialize a receive ring and its buffers.
3234 *
3235 **********************************************************************/
3236static int
3237igb_setup_receive_ring(struct rx_ring *rxr)
3238{
3239	struct	adapter	*adapter;
3240	struct igb_buffer *rxbuf;
3241	int j, rsize;
3242
3243	adapter = rxr->adapter;
3244	rsize = roundup2(adapter->num_rx_desc *
3245	    sizeof(union e1000_adv_rx_desc), 4096);
3246	/* Clear the ring contents */
3247	bzero((void *)rxr->rx_base, rsize);
3248
3249	/*
3250	** Free current RX buffers: the size buffer
3251	** that is loaded is indicated by the buffer
3252	** bigbuf value.
3253	*/
3254	for (int i = 0; i < adapter->num_rx_desc; i++) {
3255		rxbuf = &rxr->rx_buffers[i];
3256		if (rxbuf->m_head != NULL) {
3257			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3258			    BUS_DMASYNC_POSTREAD);
3259			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3260			m_freem(rxbuf->m_head);
3261			rxbuf->m_head = NULL;
3262		}
3263	}
3264
3265	for (j = 0; j < adapter->num_rx_desc; j++) {
3266		if (igb_get_buf(rxr, j) == ENOBUFS) {
3267			rxr->rx_buffers[j].m_head = NULL;
3268			rxr->rx_base[j].read.pkt_addr = 0;
3269			goto fail;
3270		}
3271	}
3272
3273	/* Setup our descriptor indices */
3274	rxr->next_to_check = 0;
3275	rxr->last_cleaned = 0;
3276
3277	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3278	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3279
3280	return (0);
3281fail:
3282	/*
3283	 * We need to clean up any buffers allocated so far
3284	 * 'j' is the failing index, decrement it to get the
3285	 * last success.
3286	 */
3287	for (--j; j < 0; j--) {
3288		rxbuf = &rxr->rx_buffers[j];
3289		if (rxbuf->m_head != NULL) {
3290			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3291			    BUS_DMASYNC_POSTREAD);
3292			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3293			m_freem(rxbuf->m_head);
3294			rxbuf->m_head = NULL;
3295		}
3296	}
3297	return (ENOBUFS);
3298}
3299
3300/*********************************************************************
3301 *
3302 *  Initialize all receive rings.
3303 *
3304 **********************************************************************/
3305static int
3306igb_setup_receive_structures(struct adapter *adapter)
3307{
3308	struct rx_ring *rxr = adapter->rx_rings;
3309	int i, j;
3310
3311	for (i = 0; i < adapter->num_rx_queues; i++, rxr++)
3312		if (igb_setup_receive_ring(rxr))
3313			goto fail;
3314
3315	return (0);
3316fail:
3317	/*
3318	 * Free RX buffers allocated so far, we will only handle
3319	 * the rings that completed, the failing case will have
3320	 * cleaned up for itself. The value of 'i' will be the
3321	 * failed ring so we must pre-decrement it.
3322	 */
3323	rxr = adapter->rx_rings;
3324	for (--i; i > 0; i--, rxr++) {
3325		for (j = 0; j < adapter->num_rx_desc; j++) {
3326			struct igb_buffer *rxbuf;
3327			rxbuf = &rxr->rx_buffers[j];
3328			if (rxbuf->m_head != NULL) {
3329				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3330			  	  BUS_DMASYNC_POSTREAD);
3331				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3332				m_freem(rxbuf->m_head);
3333				rxbuf->m_head = NULL;
3334			}
3335		}
3336	}
3337
3338	return (ENOBUFS);
3339}
3340
3341/*********************************************************************
3342 *
3343 *  Enable receive unit.
3344 *
3345 **********************************************************************/
3346static void
3347igb_initialize_receive_units(struct adapter *adapter)
3348{
3349	struct rx_ring	*rxr = adapter->rx_rings;
3350	struct ifnet	*ifp = adapter->ifp;
3351	u32		rctl, rxcsum, psize;
3352
3353	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3354
3355	/*
3356	 * Make sure receives are disabled while setting
3357	 * up the descriptor ring
3358	 */
3359	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3360	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3361
3362	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3363	    adapter->rx_abs_int_delay.value);
3364
3365	/* Setup the Base and Length of the Rx Descriptor Rings */
3366	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3367		u64 bus_addr = rxr->rxdma.dma_paddr;
3368		u32 rxdctl, srrctl;
3369
3370		E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3371		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3372		E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3373		    (uint32_t)(bus_addr >> 32));
3374		E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3375		    (uint32_t)bus_addr);
3376		/* Use Advanced Descriptor type */
3377		srrctl = E1000_READ_REG(&adapter->hw, E1000_SRRCTL(i));
3378		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3379		E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3380		/* Enable this Queue */
3381		rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3382		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3383		rxdctl &= 0xFFF00000;
3384		rxdctl |= IGB_RX_PTHRESH;
3385		rxdctl |= IGB_RX_HTHRESH << 8;
3386		rxdctl |= IGB_RX_WTHRESH << 16;
3387		E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3388	}
3389
3390	/*
3391	** Setup for RX MultiQueue
3392	*/
3393	if (adapter->num_rx_queues >1) {
3394		u32 random[10], mrqc, shift = 0;
3395		union igb_reta {
3396			u32 dword;
3397			u8  bytes[4];
3398		} reta;
3399
3400		arc4rand(&random, sizeof(random), 0);
3401		if (adapter->hw.mac.type == e1000_82575)
3402			shift = 6;
3403		/* Warning FM follows */
3404		for (int i = 0; i < 128; i++) {
3405			reta.bytes[i & 3] =
3406			    (i % adapter->num_rx_queues) << shift;
3407			if ((i & 3) == 3)
3408				E1000_WRITE_REG(&adapter->hw,
3409				    E1000_RETA(i & ~3), reta.dword);
3410		}
3411		/* Now fill in hash table */
3412		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3413		for (int i = 0; i < 10; i++)
3414			E1000_WRITE_REG_ARRAY(&adapter->hw,
3415			    E1000_RSSRK(0), i, random[i]);
3416
3417		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3418		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3419		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3420		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3421		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3422		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3423		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3424		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3425
3426		E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3427
3428		/*
3429		** NOTE: Receive Full-Packet Checksum Offload
3430		** is mutually exclusive with Multiqueue. However
3431		** this is not the same as TCP/IP checksums which
3432		** still work.
3433		*/
3434		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3435		rxcsum |= E1000_RXCSUM_PCSD;
3436		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3437	} else if (ifp->if_capenable & IFCAP_RXCSUM) {
3438		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3439		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3440		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3441	}
3442
3443	/* Setup the Receive Control Register */
3444	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3445	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3446		   E1000_RCTL_RDMTS_HALF |
3447		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3448
3449	/* Make sure VLAN Filters are off */
3450	rctl &= ~E1000_RCTL_VFE;
3451
3452	rctl &= ~E1000_RCTL_SBP;
3453
3454	switch (adapter->rx_buffer_len) {
3455	default:
3456	case 2048:
3457		rctl |= E1000_RCTL_SZ_2048;
3458		break;
3459	case 4096:
3460		rctl |= E1000_RCTL_SZ_4096 |
3461		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3462		break;
3463	case 8192:
3464		rctl |= E1000_RCTL_SZ_8192 |
3465		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3466		break;
3467	case 16384:
3468		rctl |= E1000_RCTL_SZ_16384 |
3469		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3470		break;
3471	}
3472
3473	if (ifp->if_mtu > ETHERMTU) {
3474		/* Set maximum packet len */
3475		psize = adapter->max_frame_size;
3476		/* are we on a vlan? */
3477		if (adapter->ifp->if_vlantrunk != NULL)
3478			psize += VLAN_TAG_SIZE;
3479		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3480		rctl |= E1000_RCTL_LPE;
3481	} else
3482		rctl &= ~E1000_RCTL_LPE;
3483
3484	/* Enable Receives */
3485	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3486
3487	/*
3488	 * Setup the HW Rx Head and Tail Descriptor Pointers
3489	 *   - needs to be after enable
3490	 */
3491	for (int i = 0; i < adapter->num_rx_queues; i++) {
3492		E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3493		E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3494		     adapter->num_rx_desc - 1);
3495	}
3496	return;
3497}
3498
3499/*********************************************************************
3500 *
3501 *  Free receive rings.
3502 *
3503 **********************************************************************/
3504static void
3505igb_free_receive_structures(struct adapter *adapter)
3506{
3507	struct rx_ring *rxr = adapter->rx_rings;
3508
3509	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3510		igb_free_receive_buffers(rxr);
3511		igb_dma_free(adapter, &rxr->rxdma);
3512	}
3513
3514	free(adapter->rx_rings, M_DEVBUF);
3515}
3516
3517/*********************************************************************
3518 *
3519 *  Free receive ring data structures.
3520 *
3521 **********************************************************************/
3522static void
3523igb_free_receive_buffers(struct rx_ring *rxr)
3524{
3525	struct adapter	*adapter = rxr->adapter;
3526	struct igb_buffer *rx_buffer;
3527
3528	INIT_DEBUGOUT("free_receive_structures: begin");
3529
3530	if (rxr->rx_spare_map) {
3531		bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3532		rxr->rx_spare_map = NULL;
3533	}
3534
3535	/* Cleanup any existing buffers */
3536	if (rxr->rx_buffers != NULL) {
3537		rx_buffer = &rxr->rx_buffers[0];
3538		for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3539			if (rx_buffer->m_head != NULL) {
3540				bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3541				    BUS_DMASYNC_POSTREAD);
3542				bus_dmamap_unload(rxr->rxtag,
3543				    rx_buffer->map);
3544				m_freem(rx_buffer->m_head);
3545				rx_buffer->m_head = NULL;
3546			} else if (rx_buffer->map != NULL)
3547				bus_dmamap_unload(rxr->rxtag,
3548				    rx_buffer->map);
3549			if (rx_buffer->map != NULL) {
3550				bus_dmamap_destroy(rxr->rxtag,
3551				    rx_buffer->map);
3552				rx_buffer->map = NULL;
3553			}
3554		}
3555	}
3556
3557	if (rxr->rx_buffers != NULL) {
3558		free(rxr->rx_buffers, M_DEVBUF);
3559		rxr->rx_buffers = NULL;
3560	}
3561
3562	if (rxr->rxtag != NULL) {
3563		bus_dma_tag_destroy(rxr->rxtag);
3564		rxr->rxtag = NULL;
3565	}
3566}
3567/*********************************************************************
3568 *
3569 *  This routine executes in interrupt context. It replenishes
3570 *  the mbufs in the descriptor and sends data which has been
3571 *  dma'ed into host memory to upper layer.
3572 *
3573 *  We loop at most count times if count is > 0, or until done if
3574 *  count < 0.
3575 *
3576 *  Return TRUE if all clean, FALSE otherwise
3577 *********************************************************************/
3578static bool
3579igb_rxeof(struct rx_ring *rxr, int count)
3580{
3581	struct adapter	*adapter = rxr->adapter;
3582	struct ifnet	*ifp;
3583	struct mbuf	*mp;
3584	uint8_t		accept_frame = 0;
3585	uint8_t		eop = 0;
3586	uint16_t 	len, desc_len, prev_len_adj;
3587	int		i;
3588	union e1000_adv_rx_desc   *cur;
3589	u32		staterr;
3590
3591	IGB_RX_LOCK(rxr);
3592	ifp = adapter->ifp;
3593	i = rxr->next_to_check;
3594	cur = &rxr->rx_base[i];
3595	staterr = cur->wb.upper.status_error;
3596
3597	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3598	    BUS_DMASYNC_POSTREAD);
3599
3600	if (!(staterr & E1000_RXD_STAT_DD)) {
3601		IGB_RX_UNLOCK(rxr);
3602		return FALSE;
3603	}
3604
3605	while ((staterr & E1000_RXD_STAT_DD) &&
3606	    (count != 0) &&
3607	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3608		struct mbuf *m = NULL;
3609
3610		mp = rxr->rx_buffers[i].m_head;
3611		/*
3612		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3613		 * needs to access the last received byte in the mbuf.
3614		 */
3615		bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
3616		    BUS_DMASYNC_POSTREAD);
3617
3618		accept_frame = 1;
3619		prev_len_adj = 0;
3620		desc_len = le16toh(cur->wb.upper.length);
3621		if (staterr & E1000_RXD_STAT_EOP) {
3622			count--;
3623			eop = 1;
3624			if (desc_len < ETHER_CRC_LEN) {
3625				len = 0;
3626				prev_len_adj = ETHER_CRC_LEN - desc_len;
3627			} else
3628				len = desc_len - ETHER_CRC_LEN;
3629		} else {
3630			eop = 0;
3631			len = desc_len;
3632		}
3633
3634		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
3635			u32	pkt_len = desc_len;
3636
3637			if (rxr->fmp != NULL)
3638				pkt_len += rxr->fmp->m_pkthdr.len;
3639
3640			accept_frame = 0;
3641		}
3642
3643		if (accept_frame) {
3644			if (igb_get_buf(rxr, i) != 0) {
3645				ifp->if_iqdrops++;
3646				goto discard;
3647			}
3648
3649			/* Assign correct length to the current fragment */
3650			mp->m_len = len;
3651
3652			if (rxr->fmp == NULL) {
3653				mp->m_pkthdr.len = len;
3654				rxr->fmp = mp; /* Store the first mbuf */
3655				rxr->lmp = mp;
3656			} else {
3657				/* Chain mbuf's together */
3658				mp->m_flags &= ~M_PKTHDR;
3659				/*
3660				 * Adjust length of previous mbuf in chain if
3661				 * we received less than 4 bytes in the last
3662				 * descriptor.
3663				 */
3664				if (prev_len_adj > 0) {
3665					rxr->lmp->m_len -= prev_len_adj;
3666					rxr->fmp->m_pkthdr.len -=
3667					    prev_len_adj;
3668				}
3669				rxr->lmp->m_next = mp;
3670				rxr->lmp = rxr->lmp->m_next;
3671				rxr->fmp->m_pkthdr.len += len;
3672			}
3673
3674			if (eop) {
3675				rxr->fmp->m_pkthdr.rcvif = ifp;
3676				ifp->if_ipackets++;
3677				rxr->rx_packets++;
3678				rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
3679
3680				igb_rx_checksum(staterr, rxr->fmp);
3681#ifndef __NO_STRICT_ALIGNMENT
3682				if (adapter->max_frame_size >
3683				    (MCLBYTES - ETHER_ALIGN) &&
3684				    igb_fixup_rx(rxr) != 0)
3685					goto skip;
3686#endif
3687				if (staterr & E1000_RXD_STAT_VP) {
3688					rxr->fmp->m_pkthdr.ether_vtag =
3689					    le16toh(cur->wb.upper.vlan);
3690					rxr->fmp->m_flags |= M_VLANTAG;
3691				}
3692#ifndef __NO_STRICT_ALIGNMENT
3693skip:
3694#endif
3695				m = rxr->fmp;
3696				rxr->fmp = NULL;
3697				rxr->lmp = NULL;
3698			}
3699		} else {
3700			ifp->if_ierrors++;
3701discard:
3702			/* Reuse loaded DMA map and just update mbuf chain */
3703			mp = rxr->rx_buffers[i].m_head;
3704			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3705			mp->m_data = mp->m_ext.ext_buf;
3706			mp->m_next = NULL;
3707			if (adapter->max_frame_size <=
3708			    (MCLBYTES - ETHER_ALIGN))
3709				m_adj(mp, ETHER_ALIGN);
3710			if (rxr->fmp != NULL) {
3711				m_freem(rxr->fmp);
3712				rxr->fmp = NULL;
3713				rxr->lmp = NULL;
3714			}
3715			m = NULL;
3716		}
3717
3718		/* Zero out the receive descriptors status. */
3719		cur->wb.upper.status_error = 0;
3720		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3721		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3722
3723		rxr->last_cleaned = i; /* For updating tail */
3724
3725		/* Advance our pointers to the next descriptor. */
3726		if (++i == adapter->num_rx_desc)
3727			i = 0;
3728
3729		if (m != NULL) {
3730			rxr->next_to_check = i;
3731			/* Pass up to the stack */
3732			IGB_RX_UNLOCK(rxr);
3733			(*ifp->if_input)(ifp, m);
3734			IGB_RX_LOCK(rxr);
3735			i = rxr->next_to_check;
3736		}
3737		/* Get the next descriptor */
3738		cur = &rxr->rx_base[i];
3739		staterr = cur->wb.upper.status_error;
3740	}
3741	rxr->next_to_check = i;
3742
3743	if (--i < 0)
3744		i = adapter->num_rx_desc - 1;
3745
3746	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3747	E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
3748	IGB_RX_UNLOCK(rxr);
3749
3750	if (!((staterr) & E1000_RXD_STAT_DD))
3751		return FALSE;
3752
3753	return TRUE;
3754}
3755
3756#ifndef __NO_STRICT_ALIGNMENT
3757/*
3758 * When jumbo frames are enabled we should realign entire payload on
3759 * architecures with strict alignment. This is serious design mistake of 8254x
3760 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3761 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3762 * payload. On architecures without strict alignment restrictions 8254x still
3763 * performs unaligned memory access which would reduce the performance too.
3764 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3765 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3766 * existing mbuf chain.
3767 *
3768 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3769 * not used at all on architectures with strict alignment.
3770 */
3771static int
3772igb_fixup_rx(struct rx_ring *rxr)
3773{
3774	struct adapter *adapter = rxr->adapter;
3775	struct mbuf *m, *n;
3776	int error;
3777
3778	error = 0;
3779	m = rxr->fmp;
3780	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3781		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3782		m->m_data += ETHER_HDR_LEN;
3783	} else {
3784		MGETHDR(n, M_DONTWAIT, MT_DATA);
3785		if (n != NULL) {
3786			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3787			m->m_data += ETHER_HDR_LEN;
3788			m->m_len -= ETHER_HDR_LEN;
3789			n->m_len = ETHER_HDR_LEN;
3790			M_MOVE_PKTHDR(n, m);
3791			n->m_next = m;
3792			rxr->fmp = n;
3793		} else {
3794			adapter->dropped_pkts++;
3795			m_freem(rxr->fmp);
3796			rxr->fmp = NULL;
3797			error = ENOMEM;
3798		}
3799	}
3800
3801	return (error);
3802}
3803#endif
3804
3805/*********************************************************************
3806 *
3807 *  Verify that the hardware indicated that the checksum is valid.
3808 *  Inform the stack about the status of checksum so that stack
3809 *  doesn't spend time verifying the checksum.
3810 *
3811 *********************************************************************/
3812static void
3813igb_rx_checksum(u32 staterr, struct mbuf *mp)
3814{
3815	u16 status = (u16)staterr;
3816	u8  errors = (u8) (staterr >> 24);
3817
3818	/* Ignore Checksum bit is set */
3819	if (status & E1000_RXD_STAT_IXSM) {
3820		mp->m_pkthdr.csum_flags = 0;
3821		return;
3822	}
3823
3824	if (status & E1000_RXD_STAT_IPCS) {
3825		/* Did it pass? */
3826		if (!(errors & E1000_RXD_ERR_IPE)) {
3827			/* IP Checksum Good */
3828			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3829			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3830
3831		} else
3832			mp->m_pkthdr.csum_flags = 0;
3833	}
3834
3835	if (status & E1000_RXD_STAT_TCPCS) {
3836		/* Did it pass? */
3837		if (!(errors & E1000_RXD_ERR_TCPE)) {
3838			mp->m_pkthdr.csum_flags |=
3839			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3840			mp->m_pkthdr.csum_data = htons(0xffff);
3841		}
3842	}
3843	return;
3844}
3845
3846/*
3847 * This turns on the hardware offload of the VLAN
3848 * tag insertion and strip
3849 */
3850static void
3851igb_enable_hw_vlans(struct adapter *adapter)
3852{
3853	uint32_t ctrl;
3854
3855	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
3856	ctrl |= E1000_CTRL_VME;
3857	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
3858}
3859
3860static void
3861igb_enable_intr(struct adapter *adapter)
3862{
3863	/* With RSS set up what to auto clear */
3864	if (adapter->msix_mem) {
3865		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
3866		    adapter->eims_mask);
3867		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
3868		    adapter->eims_mask);
3869		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
3870		    E1000_IMS_LSC);
3871	} else {
3872		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
3873		    IMS_ENABLE_MASK);
3874	}
3875	E1000_WRITE_FLUSH(&adapter->hw);
3876
3877	return;
3878}
3879
3880static void
3881igb_disable_intr(struct adapter *adapter)
3882{
3883	if (adapter->msix_mem) {
3884		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
3885		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
3886	}
3887		E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
3888	E1000_WRITE_FLUSH(&adapter->hw);
3889	return;
3890}
3891
3892/*
3893 * Bit of a misnomer, what this really means is
3894 * to enable OS management of the system... aka
3895 * to disable special hardware management features
3896 */
3897static void
3898igb_init_manageability(struct adapter *adapter)
3899{
3900	/* A shared code workaround */
3901#define E1000_82542_MANC2H E1000_MANC2H
3902	if (adapter->has_manage) {
3903		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
3904		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
3905
3906		/* disable hardware interception of ARP */
3907		manc &= ~(E1000_MANC_ARP_EN);
3908
3909                /* enable receiving management packets to the host */
3910		manc |= E1000_MANC_EN_MNG2HOST;
3911#define E1000_MNG2HOST_PORT_623 (1 << 5)
3912#define E1000_MNG2HOST_PORT_664 (1 << 6)
3913		manc2h |= E1000_MNG2HOST_PORT_623;
3914		manc2h |= E1000_MNG2HOST_PORT_664;
3915		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
3916
3917		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
3918	}
3919}
3920
3921/*
3922 * Give control back to hardware management
3923 * controller if there is one.
3924 */
3925static void
3926igb_release_manageability(struct adapter *adapter)
3927{
3928	if (adapter->has_manage) {
3929		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
3930
3931		/* re-enable hardware interception of ARP */
3932		manc |= E1000_MANC_ARP_EN;
3933		manc &= ~E1000_MANC_EN_MNG2HOST;
3934
3935		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
3936	}
3937}
3938
3939/*
3940 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
3941 * For ASF and Pass Through versions of f/w this means that
3942 * the driver is loaded.
3943 *
3944 */
3945static void
3946igb_get_hw_control(struct adapter *adapter)
3947{
3948	u32 ctrl_ext;
3949
3950	/* Let firmware know the driver has taken over */
3951	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
3952	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
3953	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
3954}
3955
3956/*
3957 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
3958 * For ASF and Pass Through versions of f/w this means that the
3959 * driver is no longer loaded.
3960 *
3961 */
3962static void
3963igb_release_hw_control(struct adapter *adapter)
3964{
3965	u32 ctrl_ext;
3966
3967	/* Let firmware taken over control of h/w */
3968	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
3969	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
3970	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
3971}
3972
3973static int
3974igb_is_valid_ether_addr(uint8_t *addr)
3975{
3976	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3977
3978	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
3979		return (FALSE);
3980	}
3981
3982	return (TRUE);
3983}
3984
3985/*
3986 * NOTE: the following routines using the e1000
3987 * 	naming style are provided to the shared
3988 *	code which expects that rather than 'em'
3989 */
3990
3991void
3992e1000_write_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
3993{
3994	pci_write_config(((struct e1000_osdep *)hw->back)->dev, reg, *value, 2);
3995}
3996
3997void
3998e1000_read_pci_cfg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
3999{
4000	*value = pci_read_config(((struct e1000_osdep *)hw->back)->dev, reg, 2);
4001}
4002
4003void
4004e1000_pci_set_mwi(struct e1000_hw *hw)
4005{
4006	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4007	    (hw->bus.pci_cmd_word | CMD_MEM_WRT_INVALIDATE), 2);
4008}
4009
4010void
4011e1000_pci_clear_mwi(struct e1000_hw *hw)
4012{
4013	pci_write_config(((struct e1000_osdep *)hw->back)->dev, PCIR_COMMAND,
4014	    (hw->bus.pci_cmd_word & ~CMD_MEM_WRT_INVALIDATE), 2);
4015}
4016
4017/*
4018 * Read the PCI Express capabilities
4019 */
4020int32_t
4021e1000_read_pcie_cap_reg(struct e1000_hw *hw, uint32_t reg, uint16_t *value)
4022{
4023	u32	result;
4024
4025	pci_find_extcap(((struct e1000_osdep *)hw->back)->dev,
4026	    reg, &result);
4027	*value = (u16)result;
4028	return (E1000_SUCCESS);
4029}
4030
4031int32_t
4032e1000_alloc_zeroed_dev_spec_struct(struct e1000_hw *hw, uint32_t size)
4033{
4034	int32_t error = 0;
4035
4036	hw->dev_spec = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
4037	if (hw->dev_spec == NULL)
4038		error = ENOMEM;
4039
4040	return (error);
4041}
4042
4043void
4044e1000_free_dev_spec_struct(struct e1000_hw *hw)
4045{
4046	if (hw->dev_spec != NULL)
4047		free(hw->dev_spec, M_DEVBUF);
4048	return;
4049}
4050
4051/*
4052 * Enable PCI Wake On Lan capability
4053 */
4054void
4055igb_enable_wakeup(device_t dev)
4056{
4057	u16     cap, status;
4058	u8      id;
4059
4060	/* First find the capabilities pointer*/
4061	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4062	/* Read the PM Capabilities */
4063	id = pci_read_config(dev, cap, 1);
4064	if (id != PCIY_PMG)     /* Something wrong */
4065		return;
4066	/* OK, we have the power capabilities, so
4067	   now get the status register */
4068	cap += PCIR_POWER_STATUS;
4069	status = pci_read_config(dev, cap, 2);
4070	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4071	pci_write_config(dev, cap, status, 2);
4072	return;
4073}
4074
4075
4076/**********************************************************************
4077 *
4078 *  Update the board statistics counters.
4079 *
4080 **********************************************************************/
4081static void
4082igb_update_stats_counters(struct adapter *adapter)
4083{
4084	struct ifnet   *ifp;
4085
4086	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4087	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4088		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4089		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4090	}
4091	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4092	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4093	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4094	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4095
4096	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4097	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4098	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4099	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4100	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4101	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4102	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4103	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4104	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4105	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4106	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4107	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4108	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4109	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4110	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4111	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4112	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4113	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4114	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4115	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4116
4117	/* For the 64-bit byte counters the low dword must be read first. */
4118	/* Both registers clear on the read of the high dword */
4119
4120	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4121	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4122
4123	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4124	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4125	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4126	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4127	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4128
4129	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4130	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4131
4132	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4133	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4134	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4135	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4136	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4137	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4138	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4139	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4140	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4141	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4142
4143	adapter->stats.algnerrc +=
4144		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4145	adapter->stats.rxerrc +=
4146		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4147	adapter->stats.tncrs +=
4148		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4149	adapter->stats.cexterr +=
4150		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4151	adapter->stats.tsctc +=
4152		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4153	adapter->stats.tsctfc +=
4154		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4155	ifp = adapter->ifp;
4156
4157	ifp->if_collisions = adapter->stats.colc;
4158
4159	/* Rx Errors */
4160	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4161	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4162	    adapter->stats.ruc + adapter->stats.roc +
4163	    adapter->stats.mpc + adapter->stats.cexterr;
4164
4165	/* Tx Errors */
4166	ifp->if_oerrors = adapter->stats.ecol +
4167	    adapter->stats.latecol + adapter->watchdog_events;
4168}
4169
4170
4171/**********************************************************************
4172 *
4173 *  This routine is called only when igb_display_debug_stats is enabled.
4174 *  This routine provides a way to take a look at important statistics
4175 *  maintained by the driver and hardware.
4176 *
4177 **********************************************************************/
4178static void
4179igb_print_debug_info(struct adapter *adapter)
4180{
4181	device_t dev = adapter->dev;
4182	struct rx_ring *rxr = adapter->rx_rings;
4183	struct tx_ring *txr = adapter->tx_rings;
4184	uint8_t *hw_addr = adapter->hw.hw_addr;
4185
4186	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4187	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4188	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4189	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4190	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4191	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4192	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4193	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4194	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4195	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4196	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4197	    adapter->hw.fc.high_water,
4198	    adapter->hw.fc.low_water);
4199	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4200	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4201	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4202	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4203	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4204	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4205
4206	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
4207		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4208		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4209		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4210		device_printf(dev, "no descriptors avail event = %lld\n",
4211		    (long long)txr->no_desc_avail);
4212		device_printf(dev, "TX(%d) IRQ Handled = %lld\n", txr->me,
4213		    (long long)txr->tx_irq);
4214		device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4215		    (long long)txr->tx_packets);
4216	}
4217
4218	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
4219		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4220		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4221		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4222		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4223		    (long long)rxr->rx_packets);
4224		device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4225		    (long long)rxr->rx_bytes);
4226		device_printf(dev, "RX(%d) IRQ Handled = %lld\n", rxr->me,
4227		    (long long)rxr->rx_irq);
4228	}
4229	device_printf(dev, "LINK IRQ Handled = %u\n", adapter->link_irq);
4230
4231	device_printf(dev, "Std mbuf failed = %ld\n",
4232	    adapter->mbuf_alloc_failed);
4233	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4234	    adapter->mbuf_cluster_failed);
4235	device_printf(dev, "Driver dropped packets = %ld\n",
4236	    adapter->dropped_pkts);
4237	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4238		adapter->no_tx_dma_setup);
4239}
4240
4241static void
4242igb_print_hw_stats(struct adapter *adapter)
4243{
4244	device_t dev = adapter->dev;
4245
4246	device_printf(dev, "Excessive collisions = %lld\n",
4247	    (long long)adapter->stats.ecol);
4248#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4249	device_printf(dev, "Symbol errors = %lld\n",
4250	    (long long)adapter->stats.symerrs);
4251#endif
4252	device_printf(dev, "Sequence errors = %lld\n",
4253	    (long long)adapter->stats.sec);
4254	device_printf(dev, "Defer count = %lld\n",
4255	    (long long)adapter->stats.dc);
4256	device_printf(dev, "Missed Packets = %lld\n",
4257	    (long long)adapter->stats.mpc);
4258	device_printf(dev, "Receive No Buffers = %lld\n",
4259	    (long long)adapter->stats.rnbc);
4260	/* RLEC is inaccurate on some hardware, calculate our own. */
4261	device_printf(dev, "Receive Length Errors = %lld\n",
4262	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4263	device_printf(dev, "Receive errors = %lld\n",
4264	    (long long)adapter->stats.rxerrc);
4265	device_printf(dev, "Crc errors = %lld\n",
4266	    (long long)adapter->stats.crcerrs);
4267	device_printf(dev, "Alignment errors = %lld\n",
4268	    (long long)adapter->stats.algnerrc);
4269	/* On 82575 these are collision counts */
4270	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4271	    (long long)adapter->stats.cexterr);
4272	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4273	device_printf(dev, "watchdog timeouts = %ld\n",
4274	    adapter->watchdog_events);
4275	device_printf(dev, "XON Rcvd = %lld\n",
4276	    (long long)adapter->stats.xonrxc);
4277	device_printf(dev, "XON Xmtd = %lld\n",
4278	    (long long)adapter->stats.xontxc);
4279	device_printf(dev, "XOFF Rcvd = %lld\n",
4280	    (long long)adapter->stats.xoffrxc);
4281	device_printf(dev, "XOFF Xmtd = %lld\n",
4282	    (long long)adapter->stats.xofftxc);
4283	device_printf(dev, "Good Packets Rcvd = %lld\n",
4284	    (long long)adapter->stats.gprc);
4285	device_printf(dev, "Good Packets Xmtd = %lld\n",
4286	    (long long)adapter->stats.gptc);
4287	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4288	    (long long)adapter->stats.tsctc);
4289	device_printf(dev, "TSO Contexts Failed = %lld\n",
4290	    (long long)adapter->stats.tsctfc);
4291}
4292
4293/**********************************************************************
4294 *
4295 *  This routine provides a way to dump out the adapter eeprom,
4296 *  often a useful debug/service tool. This only dumps the first
4297 *  32 words, stuff that matters is in that extent.
4298 *
4299 **********************************************************************/
4300static void
4301igb_print_nvm_info(struct adapter *adapter)
4302{
4303	u16	eeprom_data;
4304	int	i, j, row = 0;
4305
4306	/* Its a bit crude, but it gets the job done */
4307	printf("\nInterface EEPROM Dump:\n");
4308	printf("Offset\n0x0000  ");
4309	for (i = 0, j = 0; i < 32; i++, j++) {
4310		if (j == 8) { /* Make the offset block */
4311			j = 0; ++row;
4312			printf("\n0x00%x0  ",row);
4313		}
4314		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4315		printf("%04x ", eeprom_data);
4316	}
4317	printf("\n");
4318}
4319
4320static int
4321igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4322{
4323	struct adapter *adapter;
4324	int error;
4325	int result;
4326
4327	result = -1;
4328	error = sysctl_handle_int(oidp, &result, 0, req);
4329
4330	if (error || !req->newptr)
4331		return (error);
4332
4333	if (result == 1) {
4334		adapter = (struct adapter *)arg1;
4335		igb_print_debug_info(adapter);
4336	}
4337	/*
4338	 * This value will cause a hex dump of the
4339	 * first 32 16-bit words of the EEPROM to
4340	 * the screen.
4341	 */
4342	if (result == 2) {
4343		adapter = (struct adapter *)arg1;
4344		igb_print_nvm_info(adapter);
4345        }
4346
4347	return (error);
4348}
4349
4350
4351static int
4352igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4353{
4354	struct adapter *adapter;
4355	int error;
4356	int result;
4357
4358	result = -1;
4359	error = sysctl_handle_int(oidp, &result, 0, req);
4360
4361	if (error || !req->newptr)
4362		return (error);
4363
4364	if (result == 1) {
4365		adapter = (struct adapter *)arg1;
4366		igb_print_hw_stats(adapter);
4367	}
4368
4369	return (error);
4370}
4371
4372static int
4373igb_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4374{
4375	struct igb_int_delay_info *info;
4376	struct adapter *adapter;
4377	uint32_t regval;
4378	int error;
4379	int usecs;
4380	int ticks;
4381
4382	info = (struct igb_int_delay_info *)arg1;
4383	usecs = info->value;
4384	error = sysctl_handle_int(oidp, &usecs, 0, req);
4385	if (error != 0 || req->newptr == NULL)
4386		return (error);
4387	if (usecs < 0 || usecs > IGB_TICKS_TO_USECS(65535))
4388		return (EINVAL);
4389	info->value = usecs;
4390	ticks = IGB_USECS_TO_TICKS(usecs);
4391
4392	adapter = info->adapter;
4393
4394	IGB_CORE_LOCK(adapter);
4395	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4396	regval = (regval & ~0xffff) | (ticks & 0xffff);
4397	/* Handle a few special cases. */
4398	switch (info->offset) {
4399	case E1000_RDTR:
4400		break;
4401	case E1000_TIDV:
4402		if (ticks == 0) {
4403			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4404			/* Don't write 0 into the TIDV register. */
4405			regval++;
4406		} else
4407			if (adapter->hw.mac.type < e1000_82575)
4408				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4409		break;
4410	}
4411	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4412	IGB_CORE_UNLOCK(adapter);
4413	return (0);
4414}
4415
4416static void
4417igb_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4418	const char *description, struct igb_int_delay_info *info,
4419	int offset, int value)
4420{
4421	info->adapter = adapter;
4422	info->offset = offset;
4423	info->value = value;
4424	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4425	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4426	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4427	    info, 0, igb_sysctl_int_delay, "I", description);
4428}
4429
4430static void
4431igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4432	const char *description, int *limit, int value)
4433{
4434	*limit = value;
4435	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4436	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4437	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4438}
4439
4440
4441