if_igb.c revision 177868
1/******************************************************************************
2
3  Copyright (c) 2001-2008, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/igb/if_igb.c 177868 2008-04-03 00:25:09Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#endif
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/bus.h>
42#include <sys/endian.h>
43#include <sys/kernel.h>
44#include <sys/kthread.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/module.h>
48#include <sys/rman.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/sysctl.h>
52#include <sys/taskqueue.h>
53#include <sys/pcpu.h>
54#include <machine/bus.h>
55#include <machine/resource.h>
56
57#include <net/bpf.h>
58#include <net/ethernet.h>
59#include <net/if.h>
60#include <net/if_arp.h>
61#include <net/if_dl.h>
62#include <net/if_media.h>
63
64#include <net/if_types.h>
65#include <net/if_vlan_var.h>
66
67#include <netinet/in_systm.h>
68#include <netinet/in.h>
69#include <netinet/if_ether.h>
70#include <netinet/ip.h>
71#include <netinet/ip6.h>
72#include <netinet/tcp.h>
73#include <netinet/udp.h>
74
75#include <machine/in_cksum.h>
76#include <dev/pci/pcivar.h>
77#include <dev/pci/pcireg.h>
78
79#include "e1000_api.h"
80#include "e1000_82575.h"
81#include "if_igb.h"
82
83/*********************************************************************
84 *  Set this to one to display debug statistics
85 *********************************************************************/
86int	igb_display_debug_stats = 0;
87
88/*********************************************************************
89 *  Driver version:
90 *********************************************************************/
91char igb_driver_version[] = "version - 1.1.6";
92
93
94/*********************************************************************
95 *  PCI Device ID Table
96 *
97 *  Used by probe to select devices to load on
98 *  Last field stores an index into e1000_strings
99 *  Last entry must be all 0s
100 *
101 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102 *********************************************************************/
103
104static igb_vendor_info_t igb_vendor_info_array[] =
105{
106	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
107	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
108						PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
110						PCI_ANY_ID, PCI_ANY_ID, 0},
111	/* required last entry */
112	{ 0, 0, 0, 0, 0}
113};
114
115/*********************************************************************
116 *  Table of branding strings for all supported NICs.
117 *********************************************************************/
118
119static char *igb_strings[] = {
120	"Intel(R) PRO/1000 Network Connection"
121};
122
123/*********************************************************************
124 *  Function prototypes
125 *********************************************************************/
126static int	igb_probe(device_t);
127static int	igb_attach(device_t);
128static int	igb_detach(device_t);
129static int	igb_shutdown(device_t);
130static int	igb_suspend(device_t);
131static int	igb_resume(device_t);
132static void	igb_start(struct ifnet *);
133static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
134static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
135static void	igb_watchdog(struct adapter *);
136static void	igb_init(void *);
137static void	igb_init_locked(struct adapter *);
138static void	igb_stop(void *);
139static void	igb_media_status(struct ifnet *, struct ifmediareq *);
140static int	igb_media_change(struct ifnet *);
141static void	igb_identify_hardware(struct adapter *);
142static int	igb_allocate_pci_resources(struct adapter *);
143static int	igb_allocate_msix(struct adapter *);
144static int	igb_allocate_legacy(struct adapter *);
145static int	igb_setup_msix(struct adapter *);
146static void	igb_free_pci_resources(struct adapter *);
147static void	igb_local_timer(void *);
148static int	igb_hardware_init(struct adapter *);
149static void	igb_setup_interface(device_t, struct adapter *);
150static int	igb_allocate_queues(struct adapter *);
151static void	igb_configure_queues(struct adapter *);
152
153static int	igb_allocate_transmit_buffers(struct tx_ring *);
154static void	igb_setup_transmit_structures(struct adapter *);
155static void	igb_setup_transmit_ring(struct tx_ring *);
156static void	igb_initialize_transmit_units(struct adapter *);
157static void	igb_free_transmit_structures(struct adapter *);
158static void	igb_free_transmit_buffers(struct tx_ring *);
159
160static int	igb_allocate_receive_buffers(struct rx_ring *);
161static int	igb_setup_receive_structures(struct adapter *);
162static int	igb_setup_receive_ring(struct rx_ring *);
163static void	igb_initialize_receive_units(struct adapter *);
164static void	igb_free_receive_structures(struct adapter *);
165static void	igb_free_receive_buffers(struct rx_ring *);
166
167static void	igb_enable_intr(struct adapter *);
168static void	igb_disable_intr(struct adapter *);
169static void	igb_update_stats_counters(struct adapter *);
170static bool	igb_txeof(struct tx_ring *);
171static bool	igb_rxeof(struct rx_ring *, int);
172#ifndef __NO_STRICT_ALIGNMENT
173static int	igb_fixup_rx(struct rx_ring *);
174#endif
175static void	igb_rx_checksum(u32, struct mbuf *);
176static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
177static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
178static void	igb_set_promisc(struct adapter *);
179static void	igb_disable_promisc(struct adapter *);
180static void	igb_set_multi(struct adapter *);
181static void	igb_print_hw_stats(struct adapter *);
182static void	igb_update_link_status(struct adapter *);
183static int	igb_get_buf(struct rx_ring *, int);
184static void	igb_enable_hw_vlans(struct adapter *);
185static int	igb_xmit(struct tx_ring *, struct mbuf **);
186static int	igb_dma_malloc(struct adapter *, bus_size_t,
187		    struct igb_dma_alloc *, int);
188static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
189static void	igb_print_debug_info(struct adapter *);
190static void	igb_print_nvm_info(struct adapter *);
191static int 	igb_is_valid_ether_addr(u8 *);
192static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
193static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
194static int	igb_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
195static void	igb_add_int_delay_sysctl(struct adapter *, const char *,
196		    const char *, struct igb_int_delay_info *, int, int);
197/* Management and WOL Support */
198static void	igb_init_manageability(struct adapter *);
199static void	igb_release_manageability(struct adapter *);
200static void     igb_get_hw_control(struct adapter *);
201static void     igb_release_hw_control(struct adapter *);
202static void     igb_enable_wakeup(device_t);
203
204
205static int	igb_irq_fast(void *);
206static void	igb_add_rx_process_limit(struct adapter *, const char *,
207		    const char *, int *, int);
208static void	igb_handle_rxtx(void *context, int pending);
209static void	igb_handle_tx(void *context, int pending);
210static void	igb_handle_rx(void *context, int pending);
211static void	igb_handle_link(void *context, int pending);
212
213/* These are MSIX only irq handlers */
214static void	igb_msix_rx(void *);
215static void	igb_msix_tx(void *);
216static void	igb_msix_link(void *);
217
218#ifdef DEVICE_POLLING
219static poll_handler_t igb_poll;
220#endif
221
222/*********************************************************************
223 *  FreeBSD Device Interface Entry Points
224 *********************************************************************/
225
226static device_method_t igb_methods[] = {
227	/* Device interface */
228	DEVMETHOD(device_probe, igb_probe),
229	DEVMETHOD(device_attach, igb_attach),
230	DEVMETHOD(device_detach, igb_detach),
231	DEVMETHOD(device_shutdown, igb_shutdown),
232	DEVMETHOD(device_suspend, igb_suspend),
233	DEVMETHOD(device_resume, igb_resume),
234	{0, 0}
235};
236
237static driver_t igb_driver = {
238	"igb", igb_methods, sizeof(struct adapter),
239};
240
241static devclass_t igb_devclass;
242DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
243MODULE_DEPEND(igb, pci, 1, 1, 1);
244MODULE_DEPEND(igb, ether, 1, 1, 1);
245
246/*********************************************************************
247 *  Tunable default values.
248 *********************************************************************/
249
250#define IGB_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
251#define IGB_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
252#define M_TSO_LEN			66
253
254/* Allow common code without TSO */
255#ifndef CSUM_TSO
256#define CSUM_TSO	0
257#endif
258
259static int igb_tx_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_TIDV);
260static int igb_rx_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_RDTR);
261static int igb_tx_abs_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_TADV);
262static int igb_rx_abs_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_RADV);
263static int igb_rxd = IGB_DEFAULT_RXD;
264static int igb_txd = IGB_DEFAULT_TXD;
265static int igb_smart_pwr_down = FALSE;
266TUNABLE_INT("hw.igb.tx_int_delay", &igb_tx_int_delay_dflt);
267TUNABLE_INT("hw.igb.rx_int_delay", &igb_rx_int_delay_dflt);
268TUNABLE_INT("hw.igb.tx_abs_int_delay", &igb_tx_abs_int_delay_dflt);
269TUNABLE_INT("hw.igb.rx_abs_int_delay", &igb_rx_abs_int_delay_dflt);
270TUNABLE_INT("hw.igb.rxd", &igb_rxd);
271TUNABLE_INT("hw.igb.txd", &igb_txd);
272TUNABLE_INT("hw.igb.smart_pwr_down", &igb_smart_pwr_down);
273
274/* These auto configure if set to 0, based on number of cpus */
275extern int mp_ncpus;
276static int igb_tx_queues = 1;
277static int igb_rx_queues = 1;
278TUNABLE_INT("hw.igb.tx_queues", &igb_tx_queues);
279TUNABLE_INT("hw.igb.rx_queues", &igb_rx_queues);
280
281/* How many packets rxeof tries to clean at a time */
282static int igb_rx_process_limit = 100;
283TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
284
285/*********************************************************************
286 *  Device identification routine
287 *
288 *  igb_probe determines if the driver should be loaded on
289 *  adapter based on PCI vendor/device id of the adapter.
290 *
291 *  return BUS_PROBE_DEFAULT on success, positive on failure
292 *********************************************************************/
293
294static int
295igb_probe(device_t dev)
296{
297	char		adapter_name[60];
298	uint16_t	pci_vendor_id = 0;
299	uint16_t	pci_device_id = 0;
300	uint16_t	pci_subvendor_id = 0;
301	uint16_t	pci_subdevice_id = 0;
302	igb_vendor_info_t *ent;
303
304	INIT_DEBUGOUT("igb_probe: begin");
305
306	pci_vendor_id = pci_get_vendor(dev);
307	if (pci_vendor_id != IGB_VENDOR_ID)
308		return (ENXIO);
309
310	pci_device_id = pci_get_device(dev);
311	pci_subvendor_id = pci_get_subvendor(dev);
312	pci_subdevice_id = pci_get_subdevice(dev);
313
314	ent = igb_vendor_info_array;
315	while (ent->vendor_id != 0) {
316		if ((pci_vendor_id == ent->vendor_id) &&
317		    (pci_device_id == ent->device_id) &&
318
319		    ((pci_subvendor_id == ent->subvendor_id) ||
320		    (ent->subvendor_id == PCI_ANY_ID)) &&
321
322		    ((pci_subdevice_id == ent->subdevice_id) ||
323		    (ent->subdevice_id == PCI_ANY_ID))) {
324			sprintf(adapter_name, "%s %s",
325				igb_strings[ent->index],
326				igb_driver_version);
327			device_set_desc_copy(dev, adapter_name);
328			return (BUS_PROBE_DEFAULT);
329		}
330		ent++;
331	}
332
333	return (ENXIO);
334}
335
336/*********************************************************************
337 *  Device initialization routine
338 *
339 *  The attach entry point is called when the driver is being loaded.
340 *  This routine identifies the type of hardware, allocates all resources
341 *  and initializes the hardware.
342 *
343 *  return 0 on success, positive on failure
344 *********************************************************************/
345
346static int
347igb_attach(device_t dev)
348{
349	struct adapter	*adapter;
350	int		error = 0;
351	u16		eeprom_data;
352
353	INIT_DEBUGOUT("igb_attach: begin");
354
355	adapter = device_get_softc(dev);
356	adapter->dev = adapter->osdep.dev = dev;
357	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
358
359	/* SYSCTL stuff */
360	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
361	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
362	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
363	    igb_sysctl_debug_info, "I", "Debug Information");
364
365	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
366	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
367	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
368	    igb_sysctl_stats, "I", "Statistics");
369
370	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
371
372	/* Determine hardware and mac info */
373	igb_identify_hardware(adapter);
374
375	/* Setup PCI resources */
376	if (igb_allocate_pci_resources(adapter)) {
377		device_printf(dev, "Allocation of PCI resources failed\n");
378		error = ENXIO;
379		goto err_pci;
380	}
381
382	/* Do Shared Code initialization */
383	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
384		device_printf(dev, "Setup of Shared code failed\n");
385		error = ENXIO;
386		goto err_pci;
387	}
388
389	e1000_get_bus_info(&adapter->hw);
390
391	/* Set up some sysctls for the tunable interrupt delays */
392	igb_add_int_delay_sysctl(adapter, "rx_int_delay",
393	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
394	    E1000_REGISTER(&adapter->hw, E1000_RDTR), igb_rx_int_delay_dflt);
395	igb_add_int_delay_sysctl(adapter, "tx_int_delay",
396	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
397	    E1000_REGISTER(&adapter->hw, E1000_TIDV), igb_tx_int_delay_dflt);
398	igb_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
399	    "receive interrupt delay limit in usecs",
400	    &adapter->rx_abs_int_delay,
401	    E1000_REGISTER(&adapter->hw, E1000_RADV),
402	    igb_rx_abs_int_delay_dflt);
403	igb_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
404	    "transmit interrupt delay limit in usecs",
405	    &adapter->tx_abs_int_delay,
406	    E1000_REGISTER(&adapter->hw, E1000_TADV),
407	    igb_tx_abs_int_delay_dflt);
408
409	/* Sysctls for limiting the amount of work done in the taskqueue */
410	igb_add_rx_process_limit(adapter, "rx_processing_limit",
411	    "max number of rx packets to process", &adapter->rx_process_limit,
412	    igb_rx_process_limit);
413
414	/*
415	 * Validate number of transmit and receive descriptors. It
416	 * must not exceed hardware maximum, and must be multiple
417	 * of E1000_DBA_ALIGN.
418	 */
419	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
420	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
421		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
422		    IGB_DEFAULT_TXD, igb_txd);
423		adapter->num_tx_desc = IGB_DEFAULT_TXD;
424	} else
425		adapter->num_tx_desc = igb_txd;
426	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
427	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
428		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
429		    IGB_DEFAULT_RXD, igb_rxd);
430		adapter->num_rx_desc = IGB_DEFAULT_RXD;
431	} else
432		adapter->num_rx_desc = igb_rxd;
433
434	adapter->hw.mac.autoneg = DO_AUTO_NEG;
435	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
436	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
437	adapter->rx_buffer_len = 2048;
438
439	/* Copper options */
440	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
441		adapter->hw.phy.mdix = AUTO_ALL_MODES;
442		adapter->hw.phy.disable_polarity_correction = FALSE;
443		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
444	}
445
446	/*
447	 * Set the frame limits assuming
448	 * standard ethernet sized frames.
449	 */
450	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
451	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
452
453	/*
454	 * This controls when hardware reports transmit completion
455	 * status.
456	 */
457	adapter->hw.mac.report_tx_early = 1;
458
459	/*
460	** Allocate and Setup Queues
461	*/
462	if (igb_allocate_queues(adapter)) {
463		error = ENOMEM;
464		goto err_hw_init;
465	}
466
467	/* Make sure we have a good EEPROM before we read from it */
468	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
469		/*
470		** Some PCI-E parts fail the first check due to
471		** the link being in sleep state, call it again,
472		** if it fails a second time its a real issue.
473		*/
474		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
475			device_printf(dev,
476			    "The EEPROM Checksum Is Not Valid\n");
477			error = EIO;
478			goto err_late;
479		}
480	}
481
482	/* Initialize the hardware */
483	if (igb_hardware_init(adapter)) {
484		device_printf(dev, "Unable to initialize the hardware\n");
485		error = EIO;
486		goto err_late;
487	}
488
489	/* Copy the permanent MAC address out of the EEPROM */
490	if (e1000_read_mac_addr(&adapter->hw) < 0) {
491		device_printf(dev, "EEPROM read error while reading MAC"
492		    " address\n");
493		error = EIO;
494		goto err_late;
495	}
496
497	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
498		device_printf(dev, "Invalid MAC address\n");
499		error = EIO;
500		goto err_late;
501	}
502
503	/*
504	** Configure Interrupts
505	*/
506	if (adapter->msix > 1) /* MSIX */
507		error = igb_allocate_msix(adapter);
508	else /* MSI or Legacy */
509		error = igb_allocate_legacy(adapter);
510	if (error)
511		goto err_late;
512
513	/* Setup OS specific network interface */
514	igb_setup_interface(dev, adapter);
515
516	/* Initialize statistics */
517	igb_update_stats_counters(adapter);
518
519	adapter->hw.mac.get_link_status = 1;
520	igb_update_link_status(adapter);
521
522	/* Indicate SOL/IDER usage */
523	if (e1000_check_reset_block(&adapter->hw))
524		device_printf(dev,
525		    "PHY reset is blocked due to SOL/IDER session.\n");
526
527	/* Determine if we have to control management hardware */
528	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
529
530	/*
531	 * Setup Wake-on-Lan
532	 */
533	/* APME bit in EEPROM is mapped to WUC.APME */
534	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
535	if (eeprom_data)
536		adapter->wol = E1000_WUFC_MAG;
537
538	/* Tell the stack that the interface is not active */
539	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
540
541	INIT_DEBUGOUT("igb_attach: end");
542
543	return (0);
544
545err_late:
546	igb_free_transmit_structures(adapter);
547	igb_free_receive_structures(adapter);
548	igb_release_hw_control(adapter);
549err_hw_init:
550	e1000_remove_device(&adapter->hw);
551err_pci:
552	igb_free_pci_resources(adapter);
553	IGB_CORE_LOCK_DESTROY(adapter);
554
555	return (error);
556}
557
558/*********************************************************************
559 *  Device removal routine
560 *
561 *  The detach entry point is called when the driver is being removed.
562 *  This routine stops the adapter and deallocates all the resources
563 *  that were allocated for driver operation.
564 *
565 *  return 0 on success, positive on failure
566 *********************************************************************/
567
568static int
569igb_detach(device_t dev)
570{
571	struct adapter	*adapter = device_get_softc(dev);
572	struct ifnet	*ifp = adapter->ifp;
573
574	INIT_DEBUGOUT("igb_detach: begin");
575
576	/* Make sure VLANS are not using driver */
577	if (adapter->ifp->if_vlantrunk != NULL) {
578		device_printf(dev,"Vlan in use, detach first\n");
579		return (EBUSY);
580	}
581
582#ifdef DEVICE_POLLING
583	if (ifp->if_capenable & IFCAP_POLLING)
584		ether_poll_deregister(ifp);
585#endif
586
587	IGB_CORE_LOCK(adapter);
588	adapter->in_detach = 1;
589	igb_stop(adapter);
590	IGB_CORE_UNLOCK(adapter);
591
592	e1000_phy_hw_reset(&adapter->hw);
593
594	/* Give control back to firmware */
595	igb_release_manageability(adapter);
596	igb_release_hw_control(adapter);
597
598	if (adapter->wol) {
599		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
600		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
601		igb_enable_wakeup(dev);
602	}
603
604	ether_ifdetach(adapter->ifp);
605
606	callout_drain(&adapter->timer);
607
608	igb_free_pci_resources(adapter);
609	bus_generic_detach(dev);
610	if_free(ifp);
611
612	e1000_remove_device(&adapter->hw);
613	igb_free_transmit_structures(adapter);
614	igb_free_receive_structures(adapter);
615
616	IGB_CORE_LOCK_DESTROY(adapter);
617
618	return (0);
619}
620
621/*********************************************************************
622 *
623 *  Shutdown entry point
624 *
625 **********************************************************************/
626
627static int
628igb_shutdown(device_t dev)
629{
630	return igb_suspend(dev);
631}
632
633/*
634 * Suspend/resume device methods.
635 */
636static int
637igb_suspend(device_t dev)
638{
639	struct adapter *adapter = device_get_softc(dev);
640
641	IGB_CORE_LOCK(adapter);
642
643	igb_stop(adapter);
644
645        igb_release_manageability(adapter);
646	igb_release_hw_control(adapter);
647
648        if (adapter->wol) {
649                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
650                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
651                igb_enable_wakeup(dev);
652        }
653
654	IGB_CORE_UNLOCK(adapter);
655
656	return bus_generic_suspend(dev);
657}
658
659static int
660igb_resume(device_t dev)
661{
662	struct adapter *adapter = device_get_softc(dev);
663	struct ifnet *ifp = adapter->ifp;
664
665	IGB_CORE_LOCK(adapter);
666	igb_init_locked(adapter);
667	igb_init_manageability(adapter);
668
669	if ((ifp->if_flags & IFF_UP) &&
670	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
671		igb_start(ifp);
672
673	IGB_CORE_UNLOCK(adapter);
674
675	return bus_generic_resume(dev);
676}
677
678
679/*********************************************************************
680 *  Transmit entry point
681 *
682 *  igb_start is called by the stack to initiate a transmit.
683 *  The driver will remain in this routine as long as there are
684 *  packets to transmit and transmit resources are available.
685 *  In case resources are not available stack is notified and
686 *  the packet is requeued.
687 **********************************************************************/
688
689static void
690igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
691{
692	struct adapter	*adapter = ifp->if_softc;
693	struct mbuf	*m_head;
694
695	IGB_TX_LOCK_ASSERT(txr);
696
697	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
698	    IFF_DRV_RUNNING)
699		return;
700	if (!adapter->link_active)
701		return;
702
703	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
704
705		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
706		if (m_head == NULL)
707			break;
708		/*
709		 *  Encapsulation can modify our pointer, and or make it
710		 *  NULL on failure.  In that event, we can't requeue.
711		 */
712		if (igb_xmit(txr, &m_head)) {
713			if (m_head == NULL)
714				break;
715			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
716			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
717			break;
718		}
719
720		/* Send a copy of the frame to the BPF listener */
721		ETHER_BPF_MTAP(ifp, m_head);
722
723		/* Set timeout in case hardware has problems transmitting. */
724		txr->watchdog_timer = IGB_TX_TIMEOUT;
725	}
726}
727
728static void
729igb_start(struct ifnet *ifp)
730{
731	struct adapter	*adapter = ifp->if_softc;
732	struct tx_ring	*txr;
733	u32		queue = 0;
734
735	/*
736	** This is really just here for testing
737	** TX multiqueue, ultimately what is
738	** needed is the flow support in the stack
739	** and appropriate logic here to deal with
740	** it. -jfv
741	*/
742	if (adapter->num_tx_queues > 1)
743		queue = (curcpu % adapter->num_tx_queues);
744
745	txr = &adapter->tx_rings[queue];
746	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
747		IGB_TX_LOCK(txr);
748		igb_start_locked(txr, ifp);
749		IGB_TX_UNLOCK(txr);
750	}
751}
752
753/*********************************************************************
754 *  Ioctl entry point
755 *
756 *  igb_ioctl is called when the user wants to configure the
757 *  interface.
758 *
759 *  return 0 on success, positive on failure
760 **********************************************************************/
761
762static int
763igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
764{
765	struct adapter	*adapter = ifp->if_softc;
766	struct ifreq *ifr = (struct ifreq *)data;
767	struct ifaddr *ifa = (struct ifaddr *)data;
768	int error = 0;
769
770	if (adapter->in_detach)
771		return (error);
772
773	switch (command) {
774	case SIOCSIFADDR:
775		if (ifa->ifa_addr->sa_family == AF_INET) {
776			/*
777			 * XXX
778			 * Since resetting hardware takes a very long time
779			 * and results in link renegotiation we only
780			 * initialize the hardware only when it is absolutely
781			 * required.
782			 */
783			ifp->if_flags |= IFF_UP;
784			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
785				IGB_CORE_LOCK(adapter);
786				igb_init_locked(adapter);
787				IGB_CORE_UNLOCK(adapter);
788			}
789			arp_ifinit(ifp, ifa);
790		} else
791			error = ether_ioctl(ifp, command, data);
792		break;
793	case SIOCSIFMTU:
794	    {
795		int max_frame_size;
796
797		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
798
799		IGB_CORE_LOCK(adapter);
800		max_frame_size = 9234;
801		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
802		    ETHER_CRC_LEN) {
803			IGB_CORE_UNLOCK(adapter);
804			error = EINVAL;
805			break;
806		}
807
808		ifp->if_mtu = ifr->ifr_mtu;
809		adapter->max_frame_size =
810		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
811		igb_init_locked(adapter);
812		IGB_CORE_UNLOCK(adapter);
813		break;
814	    }
815	case SIOCSIFFLAGS:
816		IOCTL_DEBUGOUT("ioctl rcv'd:\
817		    SIOCSIFFLAGS (Set Interface Flags)");
818		IGB_CORE_LOCK(adapter);
819		if (ifp->if_flags & IFF_UP) {
820			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
821				if ((ifp->if_flags ^ adapter->if_flags) &
822				    IFF_PROMISC) {
823					igb_disable_promisc(adapter);
824					igb_set_promisc(adapter);
825				}
826			} else
827				igb_init_locked(adapter);
828		} else
829			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
830				igb_stop(adapter);
831		adapter->if_flags = ifp->if_flags;
832		IGB_CORE_UNLOCK(adapter);
833		break;
834	case SIOCADDMULTI:
835	case SIOCDELMULTI:
836		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
837		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
838			IGB_CORE_LOCK(adapter);
839			igb_disable_intr(adapter);
840			igb_set_multi(adapter);
841#ifdef DEVICE_POLLING
842			if (!(ifp->if_capenable & IFCAP_POLLING))
843#endif
844				igb_enable_intr(adapter);
845			IGB_CORE_UNLOCK(adapter);
846		}
847		break;
848	case SIOCSIFMEDIA:
849		/* Check SOL/IDER usage */
850		IGB_CORE_LOCK(adapter);
851		if (e1000_check_reset_block(&adapter->hw)) {
852			IGB_CORE_UNLOCK(adapter);
853			device_printf(adapter->dev, "Media change is"
854			    " blocked due to SOL/IDER session.\n");
855			break;
856		}
857		IGB_CORE_UNLOCK(adapter);
858	case SIOCGIFMEDIA:
859		IOCTL_DEBUGOUT("ioctl rcv'd: \
860		    SIOCxIFMEDIA (Get/Set Interface Media)");
861		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
862		break;
863	case SIOCSIFCAP:
864	    {
865		int mask, reinit;
866
867		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
868		reinit = 0;
869		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
870#ifdef DEVICE_POLLING
871		if (mask & IFCAP_POLLING) {
872			if (ifr->ifr_reqcap & IFCAP_POLLING) {
873				error = ether_poll_register(igb_poll, ifp);
874				if (error)
875					return (error);
876				IGB_CORE_LOCK(adapter);
877				igb_disable_intr(adapter);
878				ifp->if_capenable |= IFCAP_POLLING;
879				IGB_CORE_UNLOCK(adapter);
880			} else {
881				error = ether_poll_deregister(ifp);
882				/* Enable interrupt even in error case */
883				IGB_CORE_LOCK(adapter);
884				igb_enable_intr(adapter);
885				ifp->if_capenable &= ~IFCAP_POLLING;
886				IGB_CORE_UNLOCK(adapter);
887			}
888		}
889#endif
890		if (mask & IFCAP_HWCSUM) {
891			ifp->if_capenable ^= IFCAP_HWCSUM;
892			reinit = 1;
893		}
894		if (mask & IFCAP_TSO4) {
895			ifp->if_capenable ^= IFCAP_TSO4;
896			reinit = 1;
897		}
898		if (mask & IFCAP_VLAN_HWTAGGING) {
899			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
900			reinit = 1;
901		}
902		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
903			igb_init(adapter);
904		VLAN_CAPABILITIES(ifp);
905		break;
906	    }
907
908
909	default:
910		error = ether_ioctl(ifp, command, data);
911		break;
912	}
913
914	return (error);
915}
916
917/*********************************************************************
918 *  Watchdog timer:
919 *
920 *  This routine is called from the local timer every second.
921 *  As long as transmit descriptors are being cleaned the value
922 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
923 *  and we then reset the device.
924 *
925 **********************************************************************/
926
927static void
928igb_watchdog(struct adapter *adapter)
929{
930	struct tx_ring	*txr = adapter->tx_rings;
931	bool		tx_hang = FALSE;
932
933	IGB_CORE_LOCK_ASSERT(adapter);
934
935	/*
936	** The timer is set to 5 every time start() queues a packet.
937	** Then txeof keeps resetting it as long as it cleans at
938	** least one descriptor.
939	** Finally, anytime all descriptors are clean the timer is
940	** set to 0.
941	**
942	** With TX Multiqueue we need to check every queue's timer,
943	** if any time out we do the reset.
944	*/
945	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
946		IGB_TX_LOCK(txr);
947		if (txr->watchdog_timer == 0 ||
948		    (--txr->watchdog_timer)) {
949			IGB_TX_UNLOCK(txr);
950			continue;
951		} else {
952			tx_hang = TRUE;
953			IGB_TX_UNLOCK(txr);
954			break;
955		}
956	}
957	if (tx_hang == FALSE)
958		return;
959
960	/* If we are in this routine because of pause frames, then
961	 * don't reset the hardware.
962	 */
963	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
964	    E1000_STATUS_TXOFF) {
965		txr = adapter->tx_rings; /* reset pointer */
966		for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
967			IGB_TX_LOCK(txr);
968			txr->watchdog_timer = IGB_TX_TIMEOUT;
969			IGB_TX_UNLOCK(txr);
970		}
971		return;
972	}
973
974	if (e1000_check_for_link(&adapter->hw) == 0)
975		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
976
977	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
978		device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
979		    i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
980		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
981		device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
982		    " Next Desc to Clean = %d\n", i, txr->tx_avail,
983		    txr->next_to_clean);
984	}
985
986	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
987	adapter->watchdog_events++;
988
989	igb_init_locked(adapter);
990}
991
992/*********************************************************************
993 *  Init entry point
994 *
995 *  This routine is used in two ways. It is used by the stack as
996 *  init entry point in network interface structure. It is also used
997 *  by the driver as a hw/sw initialization routine to get to a
998 *  consistent state.
999 *
1000 *  return 0 on success, positive on failure
1001 **********************************************************************/
1002
1003static void
1004igb_init_locked(struct adapter *adapter)
1005{
1006	struct ifnet	*ifp = adapter->ifp;
1007	device_t	dev = adapter->dev;
1008	u32		pba = 0;
1009
1010	INIT_DEBUGOUT("igb_init: begin");
1011
1012	IGB_CORE_LOCK_ASSERT(adapter);
1013
1014	igb_stop(adapter);
1015
1016	/*
1017	 * Packet Buffer Allocation (PBA)
1018	 * Writing PBA sets the receive portion of the buffer
1019	 * the remainder is used for the transmit buffer.
1020	 */
1021	if (adapter->hw.mac.type == e1000_82575) {
1022		INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1023		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1024		E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1025	}
1026
1027	/* Get the latest mac address, User can use a LAA */
1028        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1029              ETHER_ADDR_LEN);
1030
1031	/* Put the address into the Receive Address Array */
1032	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1033
1034	/* Initialize the hardware */
1035	if (igb_hardware_init(adapter)) {
1036		device_printf(dev, "Unable to initialize the hardware\n");
1037		return;
1038	}
1039	igb_update_link_status(adapter);
1040
1041	/* Setup VLAN support, basic and offload if available */
1042	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1043	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1044		igb_enable_hw_vlans(adapter);
1045
1046	/* Set hardware offload abilities */
1047	ifp->if_hwassist = 0;
1048	if (ifp->if_capenable & IFCAP_TXCSUM)
1049		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1050	if (ifp->if_capenable & IFCAP_TSO4)
1051		ifp->if_hwassist |= CSUM_TSO;
1052
1053	/* Configure for OS presence */
1054	igb_init_manageability(adapter);
1055
1056	/* Prepare transmit descriptors and buffers */
1057	igb_setup_transmit_structures(adapter);
1058	igb_initialize_transmit_units(adapter);
1059
1060	/* Setup Multicast table */
1061	igb_set_multi(adapter);
1062
1063	/* Prepare receive descriptors and buffers */
1064	if (igb_setup_receive_structures(adapter)) {
1065		device_printf(dev, "Could not setup receive structures\n");
1066		igb_stop(adapter);
1067		return;
1068	}
1069	igb_initialize_receive_units(adapter);
1070
1071	/* Don't lose promiscuous settings */
1072	igb_set_promisc(adapter);
1073
1074	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1075	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1076
1077	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1078	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1079
1080	if (adapter->msix > 1) /* Set up queue routing */
1081		igb_configure_queues(adapter);
1082
1083#ifdef DEVICE_POLLING
1084	/*
1085	 * Only enable interrupts if we are not polling, make sure
1086	 * they are off otherwise.
1087	 */
1088	if (ifp->if_capenable & IFCAP_POLLING)
1089		igb_disable_intr(adapter);
1090	else
1091#endif /* DEVICE_POLLING */
1092	{
1093		E1000_READ_REG(&adapter->hw, E1000_ICR);
1094		igb_enable_intr(adapter);
1095	}
1096
1097
1098	/* Don't reset the phy next time init gets called */
1099	adapter->hw.phy.reset_disable = TRUE;
1100}
1101
1102static void
1103igb_init(void *arg)
1104{
1105	struct adapter *adapter = arg;
1106
1107	IGB_CORE_LOCK(adapter);
1108	igb_init_locked(adapter);
1109	IGB_CORE_UNLOCK(adapter);
1110}
1111
1112
1113#ifdef DEVICE_POLLING
1114/*********************************************************************
1115 *
1116 *  Legacy polling routine
1117 *
1118 *********************************************************************/
1119static void
1120igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1121{
1122	struct adapter *adapter = ifp->if_softc;
1123	struct rx_ring *rxr = adapter->rx_rings;
1124	struct tx_ring *txr = adapter->tx_rings;
1125	uint32_t reg_icr;
1126
1127	IGB_CORE_LOCK(adapter);
1128	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1129		IGB_CORE_UNLOCK(adapter);
1130		return;
1131	}
1132
1133	if (cmd == POLL_AND_CHECK_STATUS) {
1134		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1135		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1136			callout_stop(&adapter->timer);
1137			adapter->hw.mac.get_link_status = 1;
1138			igb_update_link_status(adapter);
1139			callout_reset(&adapter->timer, hz,
1140			    igb_local_timer, adapter);
1141		}
1142	}
1143	igb_rxeof(rxr, count);
1144	IGB_CORE_UNLOCK(adapter);
1145
1146	/* With polling we cannot do multiqueue */
1147	IGB_TX_LOCK(txr);
1148	igb_txeof(txr);
1149
1150	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1151		igb_start_locked(txr);
1152	IGB_TX_UNLOCK(txr);
1153}
1154#endif /* DEVICE_POLLING */
1155
1156
1157static void
1158igb_handle_link(void *context, int pending)
1159{
1160	struct adapter	*adapter = context;
1161	struct ifnet *ifp;
1162
1163	ifp = adapter->ifp;
1164
1165	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1166		return;
1167
1168	IGB_CORE_LOCK(adapter);
1169	callout_stop(&adapter->timer);
1170	igb_update_link_status(adapter);
1171	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1172	IGB_CORE_UNLOCK(adapter);
1173}
1174
1175static void
1176igb_handle_rxtx(void *context, int pending)
1177{
1178	struct adapter	*adapter = context;
1179	struct tx_ring	*txr = adapter->tx_rings;
1180	struct rx_ring	*rxr = adapter->rx_rings;
1181	struct ifnet	*ifp;
1182
1183	ifp = adapter->ifp;
1184
1185	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1186		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1187			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1188		IGB_TX_LOCK(txr);
1189		igb_txeof(txr);
1190
1191		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1192			igb_start_locked(txr, ifp);
1193		IGB_TX_UNLOCK(txr);
1194	}
1195
1196	igb_enable_intr(adapter);
1197}
1198
1199static void
1200igb_handle_rx(void *context, int pending)
1201{
1202	struct rx_ring	*rxr = context;
1203	struct adapter	*adapter = rxr->adapter;
1204	struct ifnet	*ifp = adapter->ifp;
1205
1206	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1207		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1208			/* More to clean, schedule another task */
1209			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1210
1211}
1212
1213static void
1214igb_handle_tx(void *context, int pending)
1215{
1216	struct tx_ring	*txr = context;
1217	struct adapter	*adapter = txr->adapter;
1218	struct ifnet	*ifp = adapter->ifp;
1219
1220	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1221		IGB_TX_LOCK(txr);
1222		igb_txeof(txr);
1223		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1224			igb_start_locked(txr, ifp);
1225		IGB_TX_UNLOCK(txr);
1226	}
1227}
1228
1229
1230/*********************************************************************
1231 *
1232 *  MSI/Legacy Deferred
1233 *  Interrupt Service routine
1234 *
1235 *********************************************************************/
1236static int
1237igb_irq_fast(void *arg)
1238{
1239	struct adapter	*adapter = arg;
1240	struct ifnet	*ifp = adapter->ifp;
1241	uint32_t	reg_icr;
1242
1243	/* Should not happen, but... */
1244	if (ifp->if_capenable & IFCAP_POLLING)
1245                return FILTER_STRAY;
1246
1247	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1248
1249	/* Hot eject?  */
1250	if (reg_icr == 0xffffffff)
1251		return FILTER_STRAY;
1252
1253	/* Definitely not our interrupt.  */
1254	if (reg_icr == 0x0)
1255		return FILTER_STRAY;
1256
1257	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1258		return FILTER_STRAY;
1259
1260	/*
1261	 * Mask interrupts until the taskqueue is finished running.  This is
1262	 * cheap, just assume that it is needed.  This also works around the
1263	 * MSI message reordering errata on certain systems.
1264	 */
1265	igb_disable_intr(adapter);
1266	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1267
1268	/* Link status change */
1269	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1270		adapter->hw.mac.get_link_status = 1;
1271		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1272	}
1273
1274	if (reg_icr & E1000_ICR_RXO)
1275		adapter->rx_overruns++;
1276	return FILTER_HANDLED;
1277}
1278
1279
1280/*********************************************************************
1281 *
1282 *  MSIX TX Interrupt Service routine
1283 *
1284 **********************************************************************/
1285
1286static void
1287igb_msix_tx(void *arg)
1288{
1289	struct tx_ring *txr = arg;
1290	struct adapter *adapter = txr->adapter;
1291	struct ifnet	*ifp = adapter->ifp;
1292
1293	++txr->tx_irq;
1294	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1295		IGB_TX_LOCK(txr);
1296		igb_txeof(txr);
1297		IGB_TX_UNLOCK(txr);
1298		taskqueue_enqueue(adapter->tq, &txr->tx_task);
1299	}
1300	/* Reenable this interrupt */
1301	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1302	return;
1303}
1304
1305/*********************************************************************
1306 *
1307 *  MSIX RX Interrupt Service routine
1308 *
1309 **********************************************************************/
1310
1311static void
1312igb_msix_rx(void *arg)
1313{
1314	struct rx_ring *rxr = arg;
1315	struct adapter *adapter = rxr->adapter;
1316	struct ifnet	*ifp = adapter->ifp;
1317
1318	++rxr->rx_irq;
1319	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1320		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1321			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1322	/* Reenable this interrupt */
1323	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1324	return;
1325}
1326
1327/*********************************************************************
1328 *
1329 *  MSIX Link Interrupt Service routine
1330 *
1331 **********************************************************************/
1332
1333static void
1334igb_msix_link(void *arg)
1335{
1336	struct adapter	*adapter = arg;
1337	u32       	icr;
1338
1339	++adapter->link_irq;
1340	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1341	if (!(icr & E1000_ICR_LSC))
1342		goto spurious;
1343	adapter->hw.mac.get_link_status = 1;
1344	taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1345
1346spurious:
1347	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1348	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, E1000_EIMS_OTHER);
1349	return;
1350}
1351
1352
1353/*********************************************************************
1354 *
1355 *  Media Ioctl callback
1356 *
1357 *  This routine is called whenever the user queries the status of
1358 *  the interface using ifconfig.
1359 *
1360 **********************************************************************/
1361static void
1362igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1363{
1364	struct adapter *adapter = ifp->if_softc;
1365	u_char fiber_type = IFM_1000_SX;
1366
1367	INIT_DEBUGOUT("igb_media_status: begin");
1368
1369	IGB_CORE_LOCK(adapter);
1370	igb_update_link_status(adapter);
1371
1372	ifmr->ifm_status = IFM_AVALID;
1373	ifmr->ifm_active = IFM_ETHER;
1374
1375	if (!adapter->link_active) {
1376		IGB_CORE_UNLOCK(adapter);
1377		return;
1378	}
1379
1380	ifmr->ifm_status |= IFM_ACTIVE;
1381
1382	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1383	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1384		ifmr->ifm_active |= fiber_type | IFM_FDX;
1385	else {
1386		switch (adapter->link_speed) {
1387		case 10:
1388			ifmr->ifm_active |= IFM_10_T;
1389			break;
1390		case 100:
1391			ifmr->ifm_active |= IFM_100_TX;
1392			break;
1393		case 1000:
1394			ifmr->ifm_active |= IFM_1000_T;
1395			break;
1396		}
1397		if (adapter->link_duplex == FULL_DUPLEX)
1398			ifmr->ifm_active |= IFM_FDX;
1399		else
1400			ifmr->ifm_active |= IFM_HDX;
1401	}
1402	IGB_CORE_UNLOCK(adapter);
1403}
1404
1405/*********************************************************************
1406 *
1407 *  Media Ioctl callback
1408 *
1409 *  This routine is called when the user changes speed/duplex using
1410 *  media/mediopt option with ifconfig.
1411 *
1412 **********************************************************************/
1413static int
1414igb_media_change(struct ifnet *ifp)
1415{
1416	struct adapter *adapter = ifp->if_softc;
1417	struct ifmedia  *ifm = &adapter->media;
1418
1419	INIT_DEBUGOUT("igb_media_change: begin");
1420
1421	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1422		return (EINVAL);
1423
1424	IGB_CORE_LOCK(adapter);
1425	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1426	case IFM_AUTO:
1427		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1428		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1429		break;
1430	case IFM_1000_LX:
1431	case IFM_1000_SX:
1432	case IFM_1000_T:
1433		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1434		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1435		break;
1436	case IFM_100_TX:
1437		adapter->hw.mac.autoneg = FALSE;
1438		adapter->hw.phy.autoneg_advertised = 0;
1439		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1440			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1441		else
1442			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1443		break;
1444	case IFM_10_T:
1445		adapter->hw.mac.autoneg = FALSE;
1446		adapter->hw.phy.autoneg_advertised = 0;
1447		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1448			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1449		else
1450			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1451		break;
1452	default:
1453		device_printf(adapter->dev, "Unsupported media type\n");
1454	}
1455
1456	/* As the speed/duplex settings my have changed we need to
1457	 * reset the PHY.
1458	 */
1459	adapter->hw.phy.reset_disable = FALSE;
1460
1461	igb_init_locked(adapter);
1462	IGB_CORE_UNLOCK(adapter);
1463
1464	return (0);
1465}
1466
1467
1468/*********************************************************************
1469 *
1470 *  This routine maps the mbufs to Advanced TX descriptors.
1471 *  used by the 82575 adapter.
1472 *
1473 **********************************************************************/
1474
1475static int
1476igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1477{
1478	struct adapter		*adapter = txr->adapter;
1479	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1480	bus_dmamap_t		map;
1481	struct igb_buffer	*tx_buffer, *tx_buffer_mapped;
1482	union e1000_adv_tx_desc	*txd = NULL;
1483	struct mbuf		*m_head;
1484	u32			olinfo_status = 0, cmd_type_len = 0;
1485	int			nsegs, i, j, error, first, last = 0;
1486	u32			hdrlen = 0, offload = 0;
1487
1488	m_head = *m_headp;
1489
1490
1491	/* Set basic descriptor constants */
1492	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1493	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1494	if (m_head->m_flags & M_VLANTAG)
1495		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1496
1497        /*
1498         * Force a cleanup if number of TX descriptors
1499         * available hits the threshold
1500         */
1501	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1502		igb_txeof(txr);
1503		/* Now do we at least have a minimal? */
1504		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1505			txr->no_desc_avail++;
1506			return (ENOBUFS);
1507		}
1508	}
1509
1510	/*
1511         * Map the packet for DMA.
1512	 *
1513	 * Capture the first descriptor index,
1514	 * this descriptor will have the index
1515	 * of the EOP which is the only one that
1516	 * now gets a DONE bit writeback.
1517	 */
1518	first = txr->next_avail_desc;
1519	tx_buffer = &txr->tx_buffers[first];
1520	tx_buffer_mapped = tx_buffer;
1521	map = tx_buffer->map;
1522
1523	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1524	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1525
1526	if (error == EFBIG) {
1527		struct mbuf *m;
1528
1529		m = m_defrag(*m_headp, M_DONTWAIT);
1530		if (m == NULL) {
1531			adapter->mbuf_alloc_failed++;
1532			m_freem(*m_headp);
1533			*m_headp = NULL;
1534			return (ENOBUFS);
1535		}
1536		*m_headp = m;
1537
1538		/* Try it again */
1539		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1540		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1541
1542		if (error == ENOMEM) {
1543			adapter->no_tx_dma_setup++;
1544			return (error);
1545		} else if (error != 0) {
1546			adapter->no_tx_dma_setup++;
1547			m_freem(*m_headp);
1548			*m_headp = NULL;
1549			return (error);
1550		}
1551	} else if (error == ENOMEM) {
1552		adapter->no_tx_dma_setup++;
1553		return (error);
1554	} else if (error != 0) {
1555		adapter->no_tx_dma_setup++;
1556		m_freem(*m_headp);
1557		*m_headp = NULL;
1558		return (error);
1559	}
1560
1561	/* Check again to be sure we have enough descriptors */
1562        if (nsegs > (txr->tx_avail - 2)) {
1563                txr->no_desc_avail++;
1564		bus_dmamap_unload(txr->txtag, map);
1565		return (ENOBUFS);
1566        }
1567	m_head = *m_headp;
1568
1569        /*
1570         * Set up the context descriptor:
1571         * used when any hardware offload is done.
1572	 * This includes CSUM, VLAN, and TSO. It
1573	 * will use the first descriptor.
1574         */
1575        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1576		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1577			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1578			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1579			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1580		} else
1581			return (ENXIO);
1582	} else
1583		/* Do all other context descriptor setup */
1584	offload = igb_tx_ctx_setup(txr, m_head);
1585	if (offload == TRUE)
1586		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1587	/* Calculate payload length */
1588	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1589	    << E1000_ADVTXD_PAYLEN_SHIFT);
1590
1591	/* Set up our transmit descriptors */
1592	i = txr->next_avail_desc;
1593	for (j = 0; j < nsegs; j++) {
1594		bus_size_t seg_len;
1595		bus_addr_t seg_addr;
1596
1597		tx_buffer = &txr->tx_buffers[i];
1598		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1599		seg_addr = segs[j].ds_addr;
1600		seg_len  = segs[j].ds_len;
1601
1602		txd->read.buffer_addr = htole64(seg_addr);
1603		txd->read.cmd_type_len = htole32(
1604		    adapter->txd_cmd | cmd_type_len | seg_len);
1605		txd->read.olinfo_status = htole32(olinfo_status);
1606		last = i;
1607		if (++i == adapter->num_tx_desc)
1608			i = 0;
1609		tx_buffer->m_head = NULL;
1610		tx_buffer->next_eop = -1;
1611	}
1612
1613	txr->next_avail_desc = i;
1614	txr->tx_avail -= nsegs;
1615
1616        tx_buffer->m_head = m_head;
1617	tx_buffer_mapped->map = tx_buffer->map;
1618	tx_buffer->map = map;
1619        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1620
1621        /*
1622         * Last Descriptor of Packet
1623	 * needs End Of Packet (EOP)
1624	 * and Report Status (RS)
1625         */
1626        txd->read.cmd_type_len |=
1627	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1628	/*
1629	 * Keep track in the first buffer which
1630	 * descriptor will be written back
1631	 */
1632	tx_buffer = &txr->tx_buffers[first];
1633	tx_buffer->next_eop = last;
1634
1635	/*
1636	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1637	 * that this frame is available to transmit.
1638	 */
1639	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1640	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1641	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1642	++txr->tx_packets;
1643
1644	return (0);
1645
1646}
1647
1648static void
1649igb_set_promisc(struct adapter *adapter)
1650{
1651	struct ifnet	*ifp = adapter->ifp;
1652	uint32_t	reg_rctl;
1653
1654	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1655
1656	if (ifp->if_flags & IFF_PROMISC) {
1657		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1658		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1659	} else if (ifp->if_flags & IFF_ALLMULTI) {
1660		reg_rctl |= E1000_RCTL_MPE;
1661		reg_rctl &= ~E1000_RCTL_UPE;
1662		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1663	}
1664}
1665
1666static void
1667igb_disable_promisc(struct adapter *adapter)
1668{
1669	uint32_t	reg_rctl;
1670
1671	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1672
1673	reg_rctl &=  (~E1000_RCTL_UPE);
1674	reg_rctl &=  (~E1000_RCTL_MPE);
1675	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1676}
1677
1678
1679/*********************************************************************
1680 *  Multicast Update
1681 *
1682 *  This routine is called whenever multicast address list is updated.
1683 *
1684 **********************************************************************/
1685
1686static void
1687igb_set_multi(struct adapter *adapter)
1688{
1689	struct ifnet	*ifp = adapter->ifp;
1690	struct ifmultiaddr *ifma;
1691	uint32_t reg_rctl = 0;
1692	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
1693	int mcnt = 0;
1694
1695	IOCTL_DEBUGOUT("igb_set_multi: begin");
1696
1697	IF_ADDR_LOCK(ifp);
1698	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1699		if (ifma->ifma_addr->sa_family != AF_LINK)
1700			continue;
1701
1702		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1703			break;
1704
1705		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1706		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1707		mcnt++;
1708	}
1709	IF_ADDR_UNLOCK(ifp);
1710
1711	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1712		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1713		reg_rctl |= E1000_RCTL_MPE;
1714		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1715	} else
1716		e1000_update_mc_addr_list(&adapter->hw, mta,
1717		    mcnt, 1, adapter->hw.mac.rar_entry_count);
1718}
1719
1720
1721/*********************************************************************
1722 *  Timer routine
1723 *
1724 *  This routine checks for link status and updates statistics.
1725 *
1726 **********************************************************************/
1727
1728static void
1729igb_local_timer(void *arg)
1730{
1731	struct adapter	*adapter = arg;
1732	struct ifnet	*ifp = adapter->ifp;
1733
1734	IGB_CORE_LOCK_ASSERT(adapter);
1735
1736	igb_update_link_status(adapter);
1737	igb_update_stats_counters(adapter);
1738
1739	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1740		igb_print_hw_stats(adapter);
1741
1742	/*
1743	 * Each second we check the watchdog to
1744	 * protect against hardware hangs.
1745	 */
1746	igb_watchdog(adapter);
1747
1748	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1749
1750}
1751
1752static void
1753igb_update_link_status(struct adapter *adapter)
1754{
1755	struct e1000_hw *hw = &adapter->hw;
1756	struct ifnet *ifp = adapter->ifp;
1757	device_t dev = adapter->dev;
1758	struct tx_ring *txr = adapter->tx_rings;
1759	u32 link_check = 0;
1760
1761	/* Get the cached link value or read for real */
1762        switch (hw->phy.media_type) {
1763        case e1000_media_type_copper:
1764                if (hw->mac.get_link_status) {
1765			/* Do the work to read phy */
1766                        e1000_check_for_link(hw);
1767                        link_check = !hw->mac.get_link_status;
1768                } else
1769                        link_check = TRUE;
1770                break;
1771        case e1000_media_type_fiber:
1772                e1000_check_for_link(hw);
1773                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1774                                 E1000_STATUS_LU);
1775                break;
1776        case e1000_media_type_internal_serdes:
1777                e1000_check_for_link(hw);
1778                link_check = adapter->hw.mac.serdes_has_link;
1779                break;
1780        default:
1781        case e1000_media_type_unknown:
1782                break;
1783        }
1784
1785	/* Now we check if a transition has happened */
1786	if (link_check && (adapter->link_active == 0)) {
1787		e1000_get_speed_and_duplex(&adapter->hw,
1788		    &adapter->link_speed, &adapter->link_duplex);
1789		if (bootverbose)
1790			device_printf(dev, "Link is up %d Mbps %s\n",
1791			    adapter->link_speed,
1792			    ((adapter->link_duplex == FULL_DUPLEX) ?
1793			    "Full Duplex" : "Half Duplex"));
1794		adapter->link_active = 1;
1795		ifp->if_baudrate = adapter->link_speed * 1000000;
1796		if_link_state_change(ifp, LINK_STATE_UP);
1797	} else if (!link_check && (adapter->link_active == 1)) {
1798		ifp->if_baudrate = adapter->link_speed = 0;
1799		adapter->link_duplex = 0;
1800		if (bootverbose)
1801			device_printf(dev, "Link is Down\n");
1802		adapter->link_active = 0;
1803		if_link_state_change(ifp, LINK_STATE_DOWN);
1804		/* Turn off watchdogs */
1805		for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
1806			txr->watchdog_timer = FALSE;
1807	}
1808}
1809
1810/*********************************************************************
1811 *
1812 *  This routine disables all traffic on the adapter by issuing a
1813 *  global reset on the MAC and deallocates TX/RX buffers.
1814 *
1815 **********************************************************************/
1816
1817static void
1818igb_stop(void *arg)
1819{
1820	struct adapter	*adapter = arg;
1821	struct ifnet	*ifp = adapter->ifp;
1822
1823	IGB_CORE_LOCK_ASSERT(adapter);
1824
1825	INIT_DEBUGOUT("igb_stop: begin");
1826
1827	igb_disable_intr(adapter);
1828
1829	callout_stop(&adapter->timer);
1830
1831	/* Tell the stack that the interface is no longer active */
1832	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1833
1834
1835	e1000_reset_hw(&adapter->hw);
1836	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
1837}
1838
1839
1840/*********************************************************************
1841 *
1842 *  Determine hardware revision.
1843 *
1844 **********************************************************************/
1845static void
1846igb_identify_hardware(struct adapter *adapter)
1847{
1848	device_t dev = adapter->dev;
1849
1850	/* Make sure our PCI config space has the necessary stuff set */
1851	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
1852	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
1853	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
1854		device_printf(dev, "Memory Access and/or Bus Master bits "
1855		    "were not set!\n");
1856		adapter->hw.bus.pci_cmd_word |=
1857		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
1858		pci_write_config(dev, PCIR_COMMAND,
1859		    adapter->hw.bus.pci_cmd_word, 2);
1860	}
1861
1862	/* Save off the information about this board */
1863	adapter->hw.vendor_id = pci_get_vendor(dev);
1864	adapter->hw.device_id = pci_get_device(dev);
1865	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
1866	adapter->hw.subsystem_vendor_id =
1867	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
1868	adapter->hw.subsystem_device_id =
1869	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
1870
1871	/* Do Shared Code Init and Setup */
1872	if (e1000_set_mac_type(&adapter->hw)) {
1873		device_printf(dev, "Setup init failure\n");
1874		return;
1875	}
1876}
1877
1878static int
1879igb_allocate_pci_resources(struct adapter *adapter)
1880{
1881	device_t	dev = adapter->dev;
1882	int		rid, error = 0;
1883
1884	rid = PCIR_BAR(0);
1885	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1886	    &rid, RF_ACTIVE);
1887	if (adapter->pci_mem == NULL) {
1888		device_printf(dev, "Unable to allocate bus resource: memory\n");
1889		return (ENXIO);
1890	}
1891	adapter->osdep.mem_bus_space_tag =
1892	    rman_get_bustag(adapter->pci_mem);
1893	adapter->osdep.mem_bus_space_handle =
1894	    rman_get_bushandle(adapter->pci_mem);
1895	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
1896
1897	/*
1898	** Init the resource arrays
1899	*/
1900	for (int i = 0; i < IGB_MSIX_VEC; i++) {
1901		adapter->rid[i] = i + 1; /* MSI/X RID starts at 1 */
1902		adapter->tag[i] = NULL;
1903		adapter->res[i] = NULL;
1904	}
1905
1906	adapter->num_tx_queues = 1; /* Defaults for Legacy or MSI */
1907	adapter->num_rx_queues = 1;
1908
1909	/* This will setup either MSI/X or MSI */
1910	adapter->msix = igb_setup_msix(adapter);
1911
1912	adapter->hw.back = &adapter->osdep;
1913
1914	return (error);
1915}
1916
1917/*********************************************************************
1918 *
1919 *  Setup the Legacy or MSI Interrupt handler
1920 *
1921 **********************************************************************/
1922static int
1923igb_allocate_legacy(struct adapter *adapter)
1924{
1925	device_t dev = adapter->dev;
1926	int error;
1927
1928	/* Turn off all interrupts */
1929	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
1930
1931	/* Legacy RID at 0 */
1932	if (adapter->msix == 0)
1933		adapter->rid[0] = 0;
1934
1935	/* We allocate a single interrupt resource */
1936	adapter->res[0] = bus_alloc_resource_any(dev,
1937	    SYS_RES_IRQ, &adapter->rid[0], RF_SHAREABLE | RF_ACTIVE);
1938	if (adapter->res[0] == NULL) {
1939		device_printf(dev, "Unable to allocate bus resource: "
1940		    "interrupt\n");
1941		return (ENXIO);
1942	}
1943
1944	/*
1945	 * Try allocating a fast interrupt and the associated deferred
1946	 * processing contexts.
1947	 */
1948	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
1949	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
1950	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
1951	    taskqueue_thread_enqueue, &adapter->tq);
1952	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
1953	    device_get_nameunit(adapter->dev));
1954	if ((error = bus_setup_intr(dev, adapter->res[0],
1955	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter,
1956	    &adapter->tag[0])) != 0) {
1957		device_printf(dev, "Failed to register fast interrupt "
1958			    "handler: %d\n", error);
1959		taskqueue_free(adapter->tq);
1960		adapter->tq = NULL;
1961		return (error);
1962	}
1963
1964	return (0);
1965}
1966
1967
1968/*********************************************************************
1969 *
1970 *  Setup the MSIX Interrupt handlers:
1971 *
1972 **********************************************************************/
1973static int
1974igb_allocate_msix(struct adapter *adapter)
1975{
1976	device_t dev = adapter->dev;
1977	struct tx_ring *txr = adapter->tx_rings;
1978	struct rx_ring *rxr = adapter->rx_rings;
1979	int error, vector = 0;
1980
1981	/*
1982	 * Setup the interrupt handlers
1983	 */
1984
1985	/* TX Setup */
1986	for (int i = 0; i < adapter->num_tx_queues; i++, vector++, txr++) {
1987		adapter->res[vector] = bus_alloc_resource_any(dev,
1988		    SYS_RES_IRQ, &adapter->rid[vector],
1989		    RF_SHAREABLE | RF_ACTIVE);
1990		if (adapter->res[vector] == NULL) {
1991			device_printf(dev,
1992			    "Unable to allocate bus resource: "
1993			    "MSIX TX Interrupt\n");
1994			return (ENXIO);
1995		}
1996		error = bus_setup_intr(dev, adapter->res[vector],
1997	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_tx,
1998		    txr, &adapter->tag[vector]);
1999		if (error) {
2000			adapter->res[vector] = NULL;
2001			device_printf(dev, "Failed to register TX handler");
2002			return (error);
2003		}
2004		/* Make tasklet for deferred handling - one per queue */
2005		TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2006		if (adapter->hw.mac.type == e1000_82575) {
2007			txr->eims = E1000_EICR_TX_QUEUE0 << i;
2008			/* MSIXBM registers start at 0 */
2009			txr->msix = adapter->rid[vector] - 1;
2010		} else {
2011			txr->eims = 1 << vector;
2012			txr->msix = vector;
2013		}
2014	}
2015
2016	/* RX Setup */
2017	for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rxr++) {
2018		adapter->res[vector] = bus_alloc_resource_any(dev,
2019		    SYS_RES_IRQ, &adapter->rid[vector],
2020		    RF_SHAREABLE | RF_ACTIVE);
2021		if (adapter->res[vector] == NULL) {
2022			device_printf(dev,
2023			    "Unable to allocate bus resource: "
2024			    "MSIX RX Interrupt\n");
2025			return (ENXIO);
2026		}
2027		error = bus_setup_intr(dev, adapter->res[vector],
2028	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_rx,
2029		    rxr, &adapter->tag[vector]);
2030		if (error) {
2031			adapter->res[vector] = NULL;
2032			device_printf(dev, "Failed to register RX handler");
2033			return (error);
2034		}
2035		TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2036		if (adapter->hw.mac.type == e1000_82575) {
2037			rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2038			rxr->msix = adapter->rid[vector] - 1;
2039		} else {
2040			rxr->eims = 1 << vector;
2041			rxr->msix = vector;
2042		}
2043	}
2044
2045	/* And Link */
2046	adapter->res[vector] = bus_alloc_resource_any(dev,
2047	    SYS_RES_IRQ, &adapter->rid[vector],
2048		    RF_SHAREABLE | RF_ACTIVE);
2049	if (adapter->res[vector] == NULL) {
2050		device_printf(dev,
2051		    "Unable to allocate bus resource: "
2052		    "MSIX Link Interrupt\n");
2053		return (ENXIO);
2054	}
2055	if ((error = bus_setup_intr(dev, adapter->res[vector],
2056	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link,
2057	    adapter, &adapter->tag[vector])) != 0) {
2058		device_printf(dev, "Failed to register Link handler");
2059		return (error);
2060	}
2061	if (adapter->hw.mac.type == e1000_82575)
2062		adapter->linkvec = adapter->rid[vector] - 1;
2063	else
2064		adapter->linkvec = vector;
2065
2066	/* Make tasklet for deferred link interrupt handling */
2067	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2068
2069	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2070	    taskqueue_thread_enqueue, &adapter->tq);
2071	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2072	    device_get_nameunit(adapter->dev));
2073
2074	return (0);
2075}
2076
2077static void
2078igb_configure_queues(struct adapter *adapter)
2079{
2080	struct	e1000_hw *hw = &adapter->hw;
2081	struct	tx_ring	*txr;
2082	struct	rx_ring	*rxr;
2083
2084	/* Turn on MSIX */
2085	{ /* 82575 */
2086		int tmp;
2087
2088                /* enable MSI-X PBA support*/
2089		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2090                tmp |= E1000_CTRL_EXT_PBA_CLR;
2091                /* Auto-Mask interrupts upon ICR read. */
2092                tmp |= E1000_CTRL_EXT_EIAME;
2093                tmp |= E1000_CTRL_EXT_IRCA;
2094                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2095
2096	 	/* Set the interrupt throttling rate. */
2097		for (int i = 0; i < 10; i++)
2098			E1000_WRITE_REG(&adapter->hw,
2099			    E1000_EITR(i), DEFAULT_ITR);
2100
2101		/* TX */
2102		for (int i = 0; i < adapter->num_tx_queues; i++) {
2103			txr = &adapter->tx_rings[i];
2104			E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2105			    txr->eims);
2106			adapter->eims_mask |= txr->eims;
2107		}
2108
2109		/* RX */
2110		for (int i = 0; i < adapter->num_rx_queues; i++) {
2111			rxr = &adapter->rx_rings[i];
2112			E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2113			    rxr->eims);
2114			adapter->eims_mask |= rxr->eims;
2115		}
2116
2117		/* Link */
2118		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2119		    E1000_EIMS_OTHER);
2120		adapter->eims_mask |= E1000_EIMS_OTHER;
2121	}
2122	return;
2123}
2124
2125
2126static void
2127igb_free_pci_resources(struct adapter *adapter)
2128{
2129	device_t dev = adapter->dev;
2130
2131	/* Make sure the for loop below runs once */
2132	if (adapter->msix == 0)
2133		adapter->msix = 1;
2134
2135	/*
2136	 * First release all the interrupt resources:
2137	 *      notice that since these are just kept
2138	 *      in an array we can do the same logic
2139	 *      whether its MSIX or just legacy.
2140	 */
2141	for (int i = 0; i < adapter->msix; i++) {
2142		if (adapter->tag[i] != NULL) {
2143			bus_teardown_intr(dev, adapter->res[i],
2144			    adapter->tag[i]);
2145			adapter->tag[i] = NULL;
2146		}
2147		if (adapter->res[i] != NULL) {
2148			bus_release_resource(dev, SYS_RES_IRQ,
2149			    adapter->rid[i], adapter->res[i]);
2150		}
2151	}
2152
2153	if (adapter->msix)
2154		pci_release_msi(dev);
2155
2156	if (adapter->msix_mem != NULL)
2157		bus_release_resource(dev, SYS_RES_MEMORY,
2158		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2159
2160	if (adapter->pci_mem != NULL)
2161		bus_release_resource(dev, SYS_RES_MEMORY,
2162		    PCIR_BAR(0), adapter->pci_mem);
2163
2164}
2165
2166/*
2167 * Setup Either MSI/X or MSI
2168 */
2169static int
2170igb_setup_msix(struct adapter *adapter)
2171{
2172	device_t dev = adapter->dev;
2173	int rid, want, queues, msgs;
2174
2175	/* First try MSI/X */
2176	rid = PCIR_BAR(IGB_MSIX_BAR);
2177	adapter->msix_mem = bus_alloc_resource_any(dev,
2178	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2179       	if (!adapter->msix_mem) {
2180		/* May not be enabled */
2181		device_printf(adapter->dev,
2182		    "Unable to map MSIX table \n");
2183		goto msi;
2184	}
2185
2186	msgs = pci_msix_count(dev);
2187	if (msgs == 0) { /* system has msix disabled */
2188		bus_release_resource(dev, SYS_RES_MEMORY,
2189		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2190		adapter->msix_mem = NULL;
2191		goto msi;
2192	}
2193
2194	/* Figure out a reasonable auto config value */
2195	queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2196
2197	if (igb_tx_queues == 0)
2198		igb_tx_queues = queues;
2199	if (igb_rx_queues == 0)
2200		igb_rx_queues = queues;
2201	want = igb_tx_queues + igb_rx_queues + 1;
2202	if (msgs >= want)
2203		msgs = want;
2204	else {
2205               	device_printf(adapter->dev,
2206		    "MSIX Configuration Problem, "
2207		    "%d vectors but %d queues wanted!\n",
2208		    msgs, want);
2209		return (ENXIO);
2210	}
2211	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2212               	device_printf(adapter->dev,
2213		    "Using MSIX interrupts with %d vectors\n", msgs);
2214		adapter->num_tx_queues = igb_tx_queues;
2215		adapter->num_rx_queues = igb_rx_queues;
2216		return (msgs);
2217	}
2218msi:
2219       	msgs = pci_msi_count(dev);
2220       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2221               	device_printf(adapter->dev,"Using MSI interrupt\n");
2222	return (msgs);
2223}
2224
2225/*********************************************************************
2226 *
2227 *  Initialize the hardware to a configuration
2228 *  as specified by the adapter structure.
2229 *
2230 **********************************************************************/
2231static int
2232igb_hardware_init(struct adapter *adapter)
2233{
2234	device_t	dev = adapter->dev;
2235	u32		rx_buffer_size;
2236
2237	INIT_DEBUGOUT("igb_hardware_init: begin");
2238
2239	/* Issue a global reset */
2240	e1000_reset_hw(&adapter->hw);
2241
2242	/* Let the firmware know the OS is in control */
2243	igb_get_hw_control(adapter);
2244
2245	/*
2246	 * These parameters control the automatic generation (Tx) and
2247	 * response (Rx) to Ethernet PAUSE frames.
2248	 * - High water mark should allow for at least two frames to be
2249	 *   received after sending an XOFF.
2250	 * - Low water mark works best when it is very near the high water mark.
2251	 *   This allows the receiver to restart by sending XON when it has
2252	 *   drained a bit. Here we use an arbitary value of 1500 which will
2253	 *   restart after one full frame is pulled from the buffer. There
2254	 *   could be several smaller frames in the buffer and if so they will
2255	 *   not trigger the XON until their total number reduces the buffer
2256	 *   by 1500.
2257	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2258	 */
2259		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2260		    E1000_PBA) & 0xffff) << 10 );
2261
2262	adapter->hw.fc.high_water = rx_buffer_size -
2263	    roundup2(adapter->max_frame_size, 1024);
2264	adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2265
2266	adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2267	adapter->hw.fc.send_xon = TRUE;
2268	adapter->hw.fc.type = e1000_fc_full;
2269
2270	if (e1000_init_hw(&adapter->hw) < 0) {
2271		device_printf(dev, "Hardware Initialization Failed\n");
2272		return (EIO);
2273	}
2274
2275	e1000_check_for_link(&adapter->hw);
2276
2277	return (0);
2278}
2279
2280/*********************************************************************
2281 *
2282 *  Setup networking device structure and register an interface.
2283 *
2284 **********************************************************************/
2285static void
2286igb_setup_interface(device_t dev, struct adapter *adapter)
2287{
2288	struct ifnet   *ifp;
2289
2290	INIT_DEBUGOUT("igb_setup_interface: begin");
2291
2292	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2293	if (ifp == NULL)
2294		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2295	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2296	ifp->if_mtu = ETHERMTU;
2297	ifp->if_init =  igb_init;
2298	ifp->if_softc = adapter;
2299	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2300	ifp->if_ioctl = igb_ioctl;
2301	ifp->if_start = igb_start;
2302	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2303	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2304	IFQ_SET_READY(&ifp->if_snd);
2305
2306	ether_ifattach(ifp, adapter->hw.mac.addr);
2307
2308	ifp->if_capabilities = ifp->if_capenable = 0;
2309
2310	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2311	ifp->if_capabilities |= IFCAP_TSO4;
2312	ifp->if_capenable = ifp->if_capabilities;
2313
2314	/*
2315	 * Tell the upper layer(s) we support long frames.
2316	 */
2317	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2318	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2319	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2320
2321#ifdef DEVICE_POLLING
2322	if (adapter->msix > 1)
2323		device_printf(adapter->dev, "POLLING not supported with MSIX\n");
2324	else
2325		ifp->if_capabilities |= IFCAP_POLLING;
2326#endif
2327
2328	/*
2329	 * Specify the media types supported by this adapter and register
2330	 * callbacks to update media and link information
2331	 */
2332	ifmedia_init(&adapter->media, IFM_IMASK,
2333	    igb_media_change, igb_media_status);
2334	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2335	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2336		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2337			    0, NULL);
2338		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2339	} else {
2340		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2341		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2342			    0, NULL);
2343		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2344			    0, NULL);
2345		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2346			    0, NULL);
2347		if (adapter->hw.phy.type != e1000_phy_ife) {
2348			ifmedia_add(&adapter->media,
2349				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2350			ifmedia_add(&adapter->media,
2351				IFM_ETHER | IFM_1000_T, 0, NULL);
2352		}
2353	}
2354	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2355	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2356}
2357
2358
2359/*
2360 * Manage DMA'able memory.
2361 */
2362static void
2363igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2364{
2365	if (error)
2366		return;
2367	*(bus_addr_t *) arg = segs[0].ds_addr;
2368}
2369
2370static int
2371igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2372        struct igb_dma_alloc *dma, int mapflags)
2373{
2374	int error;
2375
2376	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2377				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2378				BUS_SPACE_MAXADDR,	/* lowaddr */
2379				BUS_SPACE_MAXADDR,	/* highaddr */
2380				NULL, NULL,		/* filter, filterarg */
2381				size,			/* maxsize */
2382				1,			/* nsegments */
2383				size,			/* maxsegsize */
2384				0,			/* flags */
2385				NULL,			/* lockfunc */
2386				NULL,			/* lockarg */
2387				&dma->dma_tag);
2388	if (error) {
2389		device_printf(adapter->dev,
2390		    "%s: bus_dma_tag_create failed: %d\n",
2391		    __func__, error);
2392		goto fail_0;
2393	}
2394
2395	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2396	    BUS_DMA_NOWAIT, &dma->dma_map);
2397	if (error) {
2398		device_printf(adapter->dev,
2399		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2400		    __func__, (uintmax_t)size, error);
2401		goto fail_2;
2402	}
2403
2404	dma->dma_paddr = 0;
2405	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2406	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2407	if (error || dma->dma_paddr == 0) {
2408		device_printf(adapter->dev,
2409		    "%s: bus_dmamap_load failed: %d\n",
2410		    __func__, error);
2411		goto fail_3;
2412	}
2413
2414	return (0);
2415
2416fail_3:
2417	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2418fail_2:
2419	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2420	bus_dma_tag_destroy(dma->dma_tag);
2421fail_0:
2422	dma->dma_map = NULL;
2423	dma->dma_tag = NULL;
2424
2425	return (error);
2426}
2427
2428static void
2429igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2430{
2431	if (dma->dma_tag == NULL)
2432		return;
2433	if (dma->dma_map != NULL) {
2434		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2435		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2436		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2437		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2438		dma->dma_map = NULL;
2439	}
2440	bus_dma_tag_destroy(dma->dma_tag);
2441	dma->dma_tag = NULL;
2442}
2443
2444
2445/*********************************************************************
2446 *
2447 *  Allocate memory for the transmit and receive rings, and then
2448 *  the descriptors associated with each, called only once at attach.
2449 *
2450 **********************************************************************/
2451static int
2452igb_allocate_queues(struct adapter *adapter)
2453{
2454	device_t dev = adapter->dev;
2455	struct tx_ring *txr;
2456	struct rx_ring *rxr;
2457	int rsize, tsize, error = E1000_SUCCESS;
2458	int txconf = 0, rxconf = 0;
2459	char	name_string[16];
2460
2461	/* First allocate the TX ring struct memory */
2462	if (!(adapter->tx_rings =
2463	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2464	    adapter->num_tx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2465		device_printf(dev, "Unable to allocate TX ring memory\n");
2466		error = ENOMEM;
2467		goto fail;
2468	}
2469	txr = adapter->tx_rings;
2470
2471	/* Next allocate the RX */
2472	if (!(adapter->rx_rings =
2473	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2474	    adapter->num_rx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2475		device_printf(dev, "Unable to allocate RX ring memory\n");
2476		error = ENOMEM;
2477		goto rx_fail;
2478	}
2479	rxr = adapter->rx_rings;
2480
2481	tsize = roundup2(adapter->num_tx_desc *
2482	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2483	/*
2484	 * Now set up the TX queues, txconf is needed to handle the
2485	 * possibility that things fail midcourse and we need to
2486	 * undo memory gracefully
2487	 */
2488	for (int i = 0; i < adapter->num_tx_queues; i++, txconf++) {
2489		/* Set up some basics */
2490		txr = &adapter->tx_rings[i];
2491		txr->adapter = adapter;
2492		txr->me = i;
2493
2494		/* Initialize the TX lock */
2495		snprintf(name_string, sizeof(name_string), "%s:tx(%d)",
2496		    device_get_nameunit(dev), txr->me);
2497		mtx_init(&txr->tx_mtx, name_string, NULL, MTX_DEF);
2498
2499		if (igb_dma_malloc(adapter, tsize,
2500			&txr->txdma, BUS_DMA_NOWAIT)) {
2501			device_printf(dev,
2502			    "Unable to allocate TX Descriptor memory\n");
2503			error = ENOMEM;
2504			goto err_tx_desc;
2505		}
2506		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2507		bzero((void *)txr->tx_base, tsize);
2508
2509        	/* Now allocate transmit buffers for the ring */
2510        	if (igb_allocate_transmit_buffers(txr)) {
2511			device_printf(dev,
2512			    "Critical Failure setting up transmit buffers\n");
2513			error = ENOMEM;
2514			goto err_tx_desc;
2515        	}
2516
2517	}
2518
2519	/*
2520	 * Next the RX queues...
2521	 */
2522	rsize = roundup2(adapter->num_rx_desc *
2523	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2524	for (int i = 0; i < adapter->num_rx_queues; i++, rxconf++) {
2525		rxr = &adapter->rx_rings[i];
2526		rxr->adapter = adapter;
2527		rxr->me = i;
2528
2529		/* Initialize the RX lock */
2530		snprintf(name_string, sizeof(name_string), "%s:rx(%d)",
2531		    device_get_nameunit(dev), txr->me);
2532		mtx_init(&rxr->rx_mtx, name_string, NULL, MTX_DEF);
2533
2534		if (igb_dma_malloc(adapter, rsize,
2535			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2536			device_printf(dev,
2537			    "Unable to allocate RxDescriptor memory\n");
2538			error = ENOMEM;
2539			goto err_rx_desc;
2540		}
2541		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2542		bzero((void *)rxr->rx_base, rsize);
2543
2544        	/* Allocate receive buffers for the ring*/
2545		if (igb_allocate_receive_buffers(rxr)) {
2546			device_printf(dev,
2547			    "Critical Failure setting up receive buffers\n");
2548			error = ENOMEM;
2549			goto err_rx_desc;
2550		}
2551	}
2552
2553	return (0);
2554
2555err_rx_desc:
2556	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2557		igb_dma_free(adapter, &rxr->rxdma);
2558err_tx_desc:
2559	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2560		igb_dma_free(adapter, &txr->txdma);
2561	free(adapter->rx_rings, M_DEVBUF);
2562rx_fail:
2563	free(adapter->tx_rings, M_DEVBUF);
2564fail:
2565	return (error);
2566}
2567
2568/*********************************************************************
2569 *
2570 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2571 *  the information needed to transmit a packet on the wire. This is
2572 *  called only once at attach, setup is done every reset.
2573 *
2574 **********************************************************************/
2575static int
2576igb_allocate_transmit_buffers(struct tx_ring *txr)
2577{
2578	struct adapter *adapter = txr->adapter;
2579	device_t dev = adapter->dev;
2580	struct igb_buffer *txbuf;
2581	int error, i;
2582
2583	/*
2584	 * Setup DMA descriptor areas.
2585	 */
2586	if ((error = bus_dma_tag_create(NULL,		/* parent */
2587			       PAGE_SIZE, 0,		/* alignment, bounds */
2588			       BUS_SPACE_MAXADDR,	/* lowaddr */
2589			       BUS_SPACE_MAXADDR,	/* highaddr */
2590			       NULL, NULL,		/* filter, filterarg */
2591			       IGB_TSO_SIZE,		/* maxsize */
2592			       IGB_MAX_SCATTER,		/* nsegments */
2593			       PAGE_SIZE,		/* maxsegsize */
2594			       0,			/* flags */
2595			       NULL,			/* lockfunc */
2596			       NULL,			/* lockfuncarg */
2597			       &txr->txtag))) {
2598		device_printf(dev,"Unable to allocate TX DMA tag\n");
2599		goto fail;
2600	}
2601
2602	if (!(txr->tx_buffers =
2603	    (struct igb_buffer *) malloc(sizeof(struct igb_buffer) *
2604	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2605		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2606		error = ENOMEM;
2607		goto fail;
2608	}
2609
2610        /* Create the descriptor buffer dma maps */
2611	txbuf = txr->tx_buffers;
2612	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2613		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2614		if (error != 0) {
2615			device_printf(dev, "Unable to create TX DMA map\n");
2616			goto fail;
2617		}
2618	}
2619
2620	return 0;
2621fail:
2622	/* We free all, it handles case where we are in the middle */
2623	igb_free_transmit_structures(adapter);
2624	return (error);
2625}
2626
2627/*********************************************************************
2628 *
2629 *  Initialize a transmit ring.
2630 *
2631 **********************************************************************/
2632static void
2633igb_setup_transmit_ring(struct tx_ring *txr)
2634{
2635	struct adapter *adapter = txr->adapter;
2636	struct igb_buffer *txbuf;
2637	int i;
2638
2639	/* Clear the old ring contents */
2640	bzero((void *)txr->tx_base,
2641	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2642	/* Reset indices */
2643	txr->next_avail_desc = 0;
2644	txr->next_to_clean = 0;
2645
2646	/* Free any existing tx buffers. */
2647        txbuf = txr->tx_buffers;
2648	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2649		if (txbuf->m_head != NULL) {
2650			bus_dmamap_sync(txr->txtag, txbuf->map,
2651			    BUS_DMASYNC_POSTWRITE);
2652			bus_dmamap_unload(txr->txtag, txbuf->map);
2653			m_freem(txbuf->m_head);
2654			txbuf->m_head = NULL;
2655		}
2656		/* clear the watch index */
2657		txbuf->next_eop = -1;
2658        }
2659
2660	/* Set number of descriptors available */
2661	txr->tx_avail = adapter->num_tx_desc;
2662
2663	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2664	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2665
2666}
2667
2668/*********************************************************************
2669 *
2670 *  Initialize all transmit rings.
2671 *
2672 **********************************************************************/
2673static void
2674igb_setup_transmit_structures(struct adapter *adapter)
2675{
2676	struct tx_ring *txr = adapter->tx_rings;
2677
2678	for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
2679		igb_setup_transmit_ring(txr);
2680
2681	return;
2682}
2683
2684/*********************************************************************
2685 *
2686 *  Enable transmit unit.
2687 *
2688 **********************************************************************/
2689static void
2690igb_initialize_transmit_units(struct adapter *adapter)
2691{
2692	struct tx_ring	*txr = adapter->tx_rings;
2693	u32		tctl, txdctl, tipg = 0;
2694
2695	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2696
2697	/* Setup the Base and Length of the Tx Descriptor Rings */
2698	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2699		u64 bus_addr = txr->txdma.dma_paddr;
2700
2701		E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2702		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2703		E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2704		    (uint32_t)(bus_addr >> 32));
2705		E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2706		    (uint32_t)bus_addr);
2707
2708		/* Setup the HW Tx Head and Tail descriptor pointers */
2709		E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2710		E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2711
2712		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2713		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2714		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2715
2716		/* Setup Transmit Descriptor Base Settings */
2717		adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2718
2719		txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
2720		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2721		E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
2722	}
2723
2724	/* Set the default values for the Tx Inter Packet Gap timer */
2725	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2726	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
2727		tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2728	else
2729		tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2730
2731	tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2732	tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2733
2734	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
2735	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
2736	E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value);
2737
2738	/* Program the Transmit Control Register */
2739	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2740	tctl &= ~E1000_TCTL_CT;
2741	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2742		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2743
2744	/* This write will effectively turn on the transmit unit. */
2745	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2746
2747}
2748
2749/*********************************************************************
2750 *
2751 *  Free all transmit rings.
2752 *
2753 **********************************************************************/
2754static void
2755igb_free_transmit_structures(struct adapter *adapter)
2756{
2757	struct tx_ring *txr = adapter->tx_rings;
2758
2759	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2760		IGB_TX_LOCK(txr);
2761		igb_free_transmit_buffers(txr);
2762		igb_dma_free(adapter, &txr->txdma);
2763		IGB_TX_UNLOCK(txr);
2764		IGB_TX_LOCK_DESTROY(txr);
2765	}
2766	free(adapter->tx_rings, M_DEVBUF);
2767}
2768
2769/*********************************************************************
2770 *
2771 *  Free transmit ring related data structures.
2772 *
2773 **********************************************************************/
2774static void
2775igb_free_transmit_buffers(struct tx_ring *txr)
2776{
2777	struct adapter *adapter = txr->adapter;
2778	struct igb_buffer *tx_buffer;
2779	int             i;
2780
2781	INIT_DEBUGOUT("free_transmit_ring: begin");
2782
2783	if (txr->tx_buffers == NULL)
2784		return;
2785
2786	tx_buffer = txr->tx_buffers;
2787	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2788		if (tx_buffer->m_head != NULL) {
2789			bus_dmamap_sync(txr->txtag, tx_buffer->map,
2790			    BUS_DMASYNC_POSTWRITE);
2791			bus_dmamap_unload(txr->txtag,
2792			    tx_buffer->map);
2793			m_freem(tx_buffer->m_head);
2794			tx_buffer->m_head = NULL;
2795			if (tx_buffer->map != NULL) {
2796				bus_dmamap_destroy(txr->txtag,
2797				    tx_buffer->map);
2798				tx_buffer->map = NULL;
2799			}
2800		} else if (tx_buffer->map != NULL) {
2801			bus_dmamap_unload(txr->txtag,
2802			    tx_buffer->map);
2803			bus_dmamap_destroy(txr->txtag,
2804			    tx_buffer->map);
2805			tx_buffer->map = NULL;
2806		}
2807	}
2808
2809	if (txr->tx_buffers != NULL) {
2810		free(txr->tx_buffers, M_DEVBUF);
2811		txr->tx_buffers = NULL;
2812	}
2813	if (txr->txtag != NULL) {
2814		bus_dma_tag_destroy(txr->txtag);
2815		txr->txtag = NULL;
2816	}
2817	return;
2818}
2819
2820/**********************************************************************
2821 *
2822 *  Setup work for hardware segmentation offload (TSO) on
2823 *  adapters using advanced tx descriptors (82575)
2824 *
2825 **********************************************************************/
2826static boolean_t
2827igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
2828{
2829	struct adapter *adapter = txr->adapter;
2830	struct e1000_adv_tx_context_desc *TXD;
2831	struct igb_buffer        *tx_buffer;
2832	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2833	u32 mss_l4len_idx = 0;
2834	u16 vtag = 0;
2835	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2836	struct ether_vlan_header *eh;
2837	struct ip *ip;
2838	struct tcphdr *th;
2839
2840
2841	/*
2842	 * Determine where frame payload starts.
2843	 * Jump over vlan headers if already present
2844	 */
2845	eh = mtod(mp, struct ether_vlan_header *);
2846	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
2847		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2848	else
2849		ehdrlen = ETHER_HDR_LEN;
2850
2851	/* Ensure we have at least the IP+TCP header in the first mbuf. */
2852	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
2853		return FALSE;
2854
2855	/* Only supports IPV4 for now */
2856	ctxd = txr->next_avail_desc;
2857	tx_buffer = &txr->tx_buffers[ctxd];
2858	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
2859
2860	ip = (struct ip *)(mp->m_data + ehdrlen);
2861	if (ip->ip_p != IPPROTO_TCP)
2862                return FALSE;   /* 0 */
2863	ip->ip_len = 0;
2864	ip->ip_sum = 0;
2865	ip_hlen = ip->ip_hl << 2;
2866	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2867	th->th_sum = in_pseudo(ip->ip_src.s_addr,
2868	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2869	tcp_hlen = th->th_off << 2;
2870	/*
2871	 * Calculate header length, this is used
2872	 * in the transmit desc in igb_xmit
2873	 */
2874	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
2875
2876	/* VLAN MACLEN IPLEN */
2877	if (mp->m_flags & M_VLANTAG) {
2878		vtag = htole16(mp->m_pkthdr.ether_vtag);
2879		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
2880	}
2881
2882	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
2883	vlan_macip_lens |= ip_hlen;
2884	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
2885
2886	/* ADV DTYPE TUCMD */
2887	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
2888	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
2889	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2890	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
2891
2892	/* MSS L4LEN IDX */
2893	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
2894	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
2895	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2896
2897	TXD->seqnum_seed = htole32(0);
2898	tx_buffer->m_head = NULL;
2899	tx_buffer->next_eop = -1;
2900
2901	if (++ctxd == adapter->num_tx_desc)
2902		ctxd = 0;
2903
2904	txr->tx_avail--;
2905	txr->next_avail_desc = ctxd;
2906	return TRUE;
2907}
2908
2909
2910/*********************************************************************
2911 *
2912 *  Context Descriptor setup for VLAN or CSUM
2913 *
2914 **********************************************************************/
2915
2916static int
2917igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
2918{
2919	struct adapter *adapter = txr->adapter;
2920	struct e1000_adv_tx_context_desc *TXD;
2921	struct igb_buffer        *tx_buffer;
2922	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2923	struct ether_vlan_header *eh;
2924	struct ip *ip = NULL;
2925	struct ip6_hdr *ip6;
2926	int  ehdrlen, ip_hlen = 0;
2927	u16	etype;
2928	u8	ipproto = 0;
2929	bool	offload = TRUE;
2930	u16 vtag = 0;
2931
2932	int ctxd = txr->next_avail_desc;
2933	tx_buffer = &txr->tx_buffers[ctxd];
2934	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
2935
2936	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2937		offload = FALSE; /* Only here to handle VLANs */
2938	/*
2939	** In advanced descriptors the vlan tag must
2940	** be placed into the descriptor itself.
2941	*/
2942	if (mp->m_flags & M_VLANTAG) {
2943		vtag = htole16(mp->m_pkthdr.ether_vtag);
2944		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
2945	} else if (offload == FALSE)
2946		return FALSE;
2947	/*
2948	 * Determine where frame payload starts.
2949	 * Jump over vlan headers if already present,
2950	 * helpful for QinQ too.
2951	 */
2952	eh = mtod(mp, struct ether_vlan_header *);
2953	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2954		etype = ntohs(eh->evl_proto);
2955		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2956	} else {
2957		etype = ntohs(eh->evl_encap_proto);
2958		ehdrlen = ETHER_HDR_LEN;
2959	}
2960
2961	/* Set the ether header length */
2962	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
2963
2964	switch (etype) {
2965		case ETHERTYPE_IP:
2966			ip = (struct ip *)(mp->m_data + ehdrlen);
2967			ip_hlen = ip->ip_hl << 2;
2968			if (mp->m_len < ehdrlen + ip_hlen) {
2969				offload = FALSE;
2970				break;
2971			}
2972			ipproto = ip->ip_p;
2973			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2974			break;
2975		case ETHERTYPE_IPV6:
2976			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2977			ip_hlen = sizeof(struct ip6_hdr);
2978			if (mp->m_len < ehdrlen + ip_hlen)
2979				return FALSE; /* failure */
2980			ipproto = ip6->ip6_nxt;
2981			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
2982			break;
2983		default:
2984			offload = FALSE;
2985			break;
2986	}
2987
2988	vlan_macip_lens |= ip_hlen;
2989	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
2990
2991	switch (ipproto) {
2992		case IPPROTO_TCP:
2993			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
2994				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
2995			break;
2996		case IPPROTO_UDP:
2997		{
2998			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
2999				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3000			break;
3001		}
3002		default:
3003			offload = FALSE;
3004			break;
3005	}
3006
3007	/* Now copy bits into descriptor */
3008	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3009	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3010	TXD->seqnum_seed = htole32(0);
3011	TXD->mss_l4len_idx = htole32(0);
3012
3013	tx_buffer->m_head = NULL;
3014	tx_buffer->next_eop = -1;
3015
3016	/* We've consumed the first desc, adjust counters */
3017	if (++ctxd == adapter->num_tx_desc)
3018		ctxd = 0;
3019	txr->next_avail_desc = ctxd;
3020	--txr->tx_avail;
3021
3022        return (offload);
3023}
3024
3025
3026/**********************************************************************
3027 *
3028 *  Examine each tx_buffer in the used queue. If the hardware is done
3029 *  processing the packet then free associated resources. The
3030 *  tx_buffer is put back on the free queue.
3031 *
3032 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3033 **********************************************************************/
3034static bool
3035igb_txeof(struct tx_ring *txr)
3036{
3037	struct adapter	*adapter = txr->adapter;
3038        int first, last, done, num_avail;
3039        struct igb_buffer *tx_buffer;
3040        struct e1000_tx_desc   *tx_desc, *eop_desc;
3041	struct ifnet   *ifp = adapter->ifp;
3042
3043	IGB_TX_LOCK_ASSERT(txr);
3044
3045        if (txr->tx_avail == adapter->num_tx_desc)
3046                return FALSE;
3047
3048        num_avail = txr->tx_avail;
3049        first = txr->next_to_clean;
3050        tx_desc = &txr->tx_base[first];
3051        tx_buffer = &txr->tx_buffers[first];
3052	last = tx_buffer->next_eop;
3053        eop_desc = &txr->tx_base[last];
3054
3055	/*
3056	 * What this does is get the index of the
3057	 * first descriptor AFTER the EOP of the
3058	 * first packet, that way we can do the
3059	 * simple comparison on the inner while loop.
3060	 */
3061	if (++last == adapter->num_tx_desc)
3062 		last = 0;
3063	done = last;
3064
3065        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3066            BUS_DMASYNC_POSTREAD);
3067
3068        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3069		/* We clean the range of the packet */
3070		while (first != done) {
3071                	tx_desc->upper.data = 0;
3072                	tx_desc->lower.data = 0;
3073                	tx_desc->buffer_addr = 0;
3074                	num_avail++;
3075
3076			if (tx_buffer->m_head) {
3077				ifp->if_opackets++;
3078				bus_dmamap_sync(txr->txtag,
3079				    tx_buffer->map,
3080				    BUS_DMASYNC_POSTWRITE);
3081				bus_dmamap_unload(txr->txtag,
3082				    tx_buffer->map);
3083
3084                        	m_freem(tx_buffer->m_head);
3085                        	tx_buffer->m_head = NULL;
3086                	}
3087			tx_buffer->next_eop = -1;
3088
3089	                if (++first == adapter->num_tx_desc)
3090				first = 0;
3091
3092	                tx_buffer = &txr->tx_buffers[first];
3093			tx_desc = &txr->tx_base[first];
3094		}
3095		/* See if we can continue to the next packet */
3096		last = tx_buffer->next_eop;
3097		if (last != -1) {
3098        		eop_desc = &txr->tx_base[last];
3099			/* Get new done point */
3100			if (++last == adapter->num_tx_desc) last = 0;
3101			done = last;
3102		} else
3103			break;
3104        }
3105        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3106            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3107
3108        txr->next_to_clean = first;
3109
3110        /*
3111         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3112         * that it is OK to send packets.
3113         * If there are no pending descriptors, clear the timeout. Otherwise,
3114         * if some descriptors have been freed, restart the timeout.
3115         */
3116        if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3117                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3118		/* All clean, turn off the timer */
3119                if (num_avail == adapter->num_tx_desc) {
3120			txr->watchdog_timer = 0;
3121        		txr->tx_avail = num_avail;
3122			return FALSE;
3123		}
3124		/* Some cleaned, reset the timer */
3125                else if (num_avail != txr->tx_avail)
3126			txr->watchdog_timer = IGB_TX_TIMEOUT;
3127        }
3128        txr->tx_avail = num_avail;
3129        return TRUE;
3130}
3131
3132
3133/*********************************************************************
3134 *
3135 *  Get a buffer from system mbuf buffer pool.
3136 *
3137 **********************************************************************/
3138static int
3139igb_get_buf(struct rx_ring *rxr, int i)
3140{
3141	struct adapter		*adapter = rxr->adapter;
3142	struct mbuf		*m;
3143	bus_dma_segment_t	segs[1];
3144	bus_dmamap_t		map;
3145	struct igb_buffer	*rx_buffer;
3146	int			error, nsegs;
3147
3148	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3149	if (m == NULL) {
3150		adapter->mbuf_cluster_failed++;
3151		return (ENOBUFS);
3152	}
3153	m->m_len = m->m_pkthdr.len = MCLBYTES;
3154
3155	if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3156		m_adj(m, ETHER_ALIGN);
3157
3158	/*
3159	 * Using memory from the mbuf cluster pool, invoke the
3160	 * bus_dma machinery to arrange the memory mapping.
3161	 */
3162	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3163	    rxr->rx_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT);
3164	if (error != 0) {
3165		m_free(m);
3166		return (error);
3167	}
3168
3169	/* If nsegs is wrong then the stack is corrupt. */
3170	KASSERT(nsegs == 1, ("Too many segments returned!"));
3171
3172	rx_buffer = &rxr->rx_buffers[i];
3173	if (rx_buffer->m_head != NULL)
3174		bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3175
3176	map = rx_buffer->map;
3177	rx_buffer->map = rxr->rx_spare_map;
3178	rxr->rx_spare_map = map;
3179	bus_dmamap_sync(rxr->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3180	rx_buffer->m_head = m;
3181
3182	rxr->rx_base[i].read.pkt_addr = htole64(segs[0].ds_addr);
3183	return (0);
3184}
3185
3186
3187/*********************************************************************
3188 *
3189 *  Allocate memory for rx_buffer structures. Since we use one
3190 *  rx_buffer per received packet, the maximum number of rx_buffer's
3191 *  that we'll need is equal to the number of receive descriptors
3192 *  that we've allocated.
3193 *
3194 **********************************************************************/
3195static int
3196igb_allocate_receive_buffers(struct rx_ring *rxr)
3197{
3198	struct	adapter 	*adapter = rxr->adapter;
3199	device_t 		dev = adapter->dev;
3200	struct igb_buffer 	*rxbuf;
3201	int             	i, bsize, error;
3202
3203	bsize = sizeof(struct igb_buffer) * adapter->num_rx_desc;
3204	if (!(rxr->rx_buffers =
3205	    (struct igb_buffer *) malloc(bsize,
3206	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3207		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3208		error = ENOMEM;
3209		goto fail;
3210	}
3211
3212	if ((error = bus_dma_tag_create(NULL,		/* parent */
3213				   PAGE_SIZE, 0,	/* alignment, bounds */
3214				   BUS_SPACE_MAXADDR,	/* lowaddr */
3215				   BUS_SPACE_MAXADDR,	/* highaddr */
3216				   NULL, NULL,		/* filter, filterarg */
3217				   MCLBYTES,		/* maxsize */
3218				   1,			/* nsegments */
3219				   MCLBYTES,		/* maxsegsize */
3220				   0,			/* flags */
3221				   NULL,		/* lockfunc */
3222				   NULL,		/* lockfuncarg */
3223				   &rxr->rxtag))) {
3224		device_printf(dev, "Unable to create RX Small DMA tag\n");
3225		goto fail;
3226	}
3227
3228	/* Create the spare map (used by getbuf) */
3229        error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3230	     &rxr->rx_spare_map);
3231	if (error) {
3232		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3233		    __func__, error);
3234		goto fail;
3235	}
3236
3237	for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3238		rxbuf = &rxr->rx_buffers[i];
3239		error = bus_dmamap_create(rxr->rxtag,
3240		    BUS_DMA_NOWAIT, &rxbuf->map);
3241		if (error) {
3242			device_printf(dev, "Unable to create Small RX DMA map\n");
3243			goto fail;
3244		}
3245	}
3246
3247	return (0);
3248
3249fail:
3250	/* Frees all, but can handle partial completion */
3251	igb_free_receive_structures(adapter);
3252	return (error);
3253}
3254
3255/*********************************************************************
3256 *
3257 *  Initialize a receive ring and its buffers.
3258 *
3259 **********************************************************************/
3260static int
3261igb_setup_receive_ring(struct rx_ring *rxr)
3262{
3263	struct	adapter	*adapter;
3264	struct igb_buffer *rxbuf;
3265	int j, rsize;
3266
3267	adapter = rxr->adapter;
3268	rsize = roundup2(adapter->num_rx_desc *
3269	    sizeof(union e1000_adv_rx_desc), 4096);
3270	/* Clear the ring contents */
3271	bzero((void *)rxr->rx_base, rsize);
3272
3273	/*
3274	** Free current RX buffers: the size buffer
3275	** that is loaded is indicated by the buffer
3276	** bigbuf value.
3277	*/
3278	for (int i = 0; i < adapter->num_rx_desc; i++) {
3279		rxbuf = &rxr->rx_buffers[i];
3280		if (rxbuf->m_head != NULL) {
3281			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3282			    BUS_DMASYNC_POSTREAD);
3283			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3284			m_freem(rxbuf->m_head);
3285			rxbuf->m_head = NULL;
3286		}
3287	}
3288
3289	for (j = 0; j < adapter->num_rx_desc; j++) {
3290		if (igb_get_buf(rxr, j) == ENOBUFS) {
3291			rxr->rx_buffers[j].m_head = NULL;
3292			rxr->rx_base[j].read.pkt_addr = 0;
3293			goto fail;
3294		}
3295	}
3296
3297	/* Setup our descriptor indices */
3298	rxr->next_to_check = 0;
3299	rxr->last_cleaned = 0;
3300
3301	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3302	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3303
3304	return (0);
3305fail:
3306	/*
3307	 * We need to clean up any buffers allocated so far
3308	 * 'j' is the failing index, decrement it to get the
3309	 * last success.
3310	 */
3311	for (--j; j < 0; j--) {
3312		rxbuf = &rxr->rx_buffers[j];
3313		if (rxbuf->m_head != NULL) {
3314			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3315			    BUS_DMASYNC_POSTREAD);
3316			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3317			m_freem(rxbuf->m_head);
3318			rxbuf->m_head = NULL;
3319		}
3320	}
3321	return (ENOBUFS);
3322}
3323
3324/*********************************************************************
3325 *
3326 *  Initialize all receive rings.
3327 *
3328 **********************************************************************/
3329static int
3330igb_setup_receive_structures(struct adapter *adapter)
3331{
3332	struct rx_ring *rxr = adapter->rx_rings;
3333	int i, j;
3334
3335	for (i = 0; i < adapter->num_rx_queues; i++, rxr++)
3336		if (igb_setup_receive_ring(rxr))
3337			goto fail;
3338
3339	return (0);
3340fail:
3341	/*
3342	 * Free RX buffers allocated so far, we will only handle
3343	 * the rings that completed, the failing case will have
3344	 * cleaned up for itself. The value of 'i' will be the
3345	 * failed ring so we must pre-decrement it.
3346	 */
3347	rxr = adapter->rx_rings;
3348	for (--i; i > 0; i--, rxr++) {
3349		for (j = 0; j < adapter->num_rx_desc; j++) {
3350			struct igb_buffer *rxbuf;
3351			rxbuf = &rxr->rx_buffers[j];
3352			if (rxbuf->m_head != NULL) {
3353				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3354			  	  BUS_DMASYNC_POSTREAD);
3355				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3356				m_freem(rxbuf->m_head);
3357				rxbuf->m_head = NULL;
3358			}
3359		}
3360	}
3361
3362	return (ENOBUFS);
3363}
3364
3365/*********************************************************************
3366 *
3367 *  Enable receive unit.
3368 *
3369 **********************************************************************/
3370static void
3371igb_initialize_receive_units(struct adapter *adapter)
3372{
3373	struct rx_ring	*rxr = adapter->rx_rings;
3374	struct ifnet	*ifp = adapter->ifp;
3375	u32		rctl, rxcsum, psize;
3376
3377	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3378
3379	/*
3380	 * Make sure receives are disabled while setting
3381	 * up the descriptor ring
3382	 */
3383	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3384	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3385
3386	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3387	    adapter->rx_abs_int_delay.value);
3388
3389	/* Setup the Base and Length of the Rx Descriptor Rings */
3390	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3391		u64 bus_addr = rxr->rxdma.dma_paddr;
3392		u32 rxdctl, srrctl;
3393
3394		E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3395		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3396		E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3397		    (uint32_t)(bus_addr >> 32));
3398		E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3399		    (uint32_t)bus_addr);
3400		/* Use Advanced Descriptor type */
3401		srrctl = E1000_READ_REG(&adapter->hw, E1000_SRRCTL(i));
3402		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3403		E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3404		/* Enable this Queue */
3405		rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3406		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3407		rxdctl &= 0xFFF00000;
3408		rxdctl |= IGB_RX_PTHRESH;
3409		rxdctl |= IGB_RX_HTHRESH << 8;
3410		rxdctl |= IGB_RX_WTHRESH << 16;
3411		E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3412	}
3413
3414	/*
3415	** Setup for RX MultiQueue
3416	*/
3417	if (adapter->num_rx_queues >1) {
3418		u32 random[10], mrqc, shift = 0;
3419		union igb_reta {
3420			u32 dword;
3421			u8  bytes[4];
3422		} reta;
3423
3424		arc4rand(&random, sizeof(random), 0);
3425		if (adapter->hw.mac.type == e1000_82575)
3426			shift = 6;
3427		/* Warning FM follows */
3428		for (int i = 0; i < 128; i++) {
3429			reta.bytes[i & 3] =
3430			    (i % adapter->num_rx_queues) << shift;
3431			if ((i & 3) == 3)
3432				E1000_WRITE_REG(&adapter->hw,
3433				    E1000_RETA(i & ~3), reta.dword);
3434		}
3435		/* Now fill in hash table */
3436		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3437		for (int i = 0; i < 10; i++)
3438			E1000_WRITE_REG_ARRAY(&adapter->hw,
3439			    E1000_RSSRK(0), i, random[i]);
3440
3441		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3442		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3443		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3444		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3445		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3446		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3447		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3448		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3449
3450		E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3451
3452		/*
3453		** NOTE: Receive Full-Packet Checksum Offload
3454		** is mutually exclusive with Multiqueue. However
3455		** this is not the same as TCP/IP checksums which
3456		** still work.
3457		*/
3458		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3459		rxcsum |= E1000_RXCSUM_PCSD;
3460		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3461	} else if (ifp->if_capenable & IFCAP_RXCSUM) {
3462		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3463		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3464		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3465	}
3466
3467	/* Setup the Receive Control Register */
3468	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3469	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3470		   E1000_RCTL_RDMTS_HALF |
3471		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3472
3473	/* Make sure VLAN Filters are off */
3474	rctl &= ~E1000_RCTL_VFE;
3475
3476	rctl &= ~E1000_RCTL_SBP;
3477
3478	switch (adapter->rx_buffer_len) {
3479	default:
3480	case 2048:
3481		rctl |= E1000_RCTL_SZ_2048;
3482		break;
3483	case 4096:
3484		rctl |= E1000_RCTL_SZ_4096 |
3485		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3486		break;
3487	case 8192:
3488		rctl |= E1000_RCTL_SZ_8192 |
3489		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3490		break;
3491	case 16384:
3492		rctl |= E1000_RCTL_SZ_16384 |
3493		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3494		break;
3495	}
3496
3497	if (ifp->if_mtu > ETHERMTU) {
3498		/* Set maximum packet len */
3499		psize = adapter->max_frame_size;
3500		/* are we on a vlan? */
3501		if (adapter->ifp->if_vlantrunk != NULL)
3502			psize += VLAN_TAG_SIZE;
3503		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3504		rctl |= E1000_RCTL_LPE;
3505	} else
3506		rctl &= ~E1000_RCTL_LPE;
3507
3508	/* Enable Receives */
3509	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3510
3511	/*
3512	 * Setup the HW Rx Head and Tail Descriptor Pointers
3513	 *   - needs to be after enable
3514	 */
3515	for (int i = 0; i < adapter->num_rx_queues; i++) {
3516		E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3517		E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3518		     adapter->num_rx_desc - 1);
3519	}
3520	return;
3521}
3522
3523/*********************************************************************
3524 *
3525 *  Free receive rings.
3526 *
3527 **********************************************************************/
3528static void
3529igb_free_receive_structures(struct adapter *adapter)
3530{
3531	struct rx_ring *rxr = adapter->rx_rings;
3532
3533	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3534		igb_free_receive_buffers(rxr);
3535		igb_dma_free(adapter, &rxr->rxdma);
3536	}
3537
3538	free(adapter->rx_rings, M_DEVBUF);
3539}
3540
3541/*********************************************************************
3542 *
3543 *  Free receive ring data structures.
3544 *
3545 **********************************************************************/
3546static void
3547igb_free_receive_buffers(struct rx_ring *rxr)
3548{
3549	struct adapter	*adapter = rxr->adapter;
3550	struct igb_buffer *rx_buffer;
3551
3552	INIT_DEBUGOUT("free_receive_structures: begin");
3553
3554	if (rxr->rx_spare_map) {
3555		bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3556		rxr->rx_spare_map = NULL;
3557	}
3558
3559	/* Cleanup any existing buffers */
3560	if (rxr->rx_buffers != NULL) {
3561		rx_buffer = &rxr->rx_buffers[0];
3562		for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3563			if (rx_buffer->m_head != NULL) {
3564				bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3565				    BUS_DMASYNC_POSTREAD);
3566				bus_dmamap_unload(rxr->rxtag,
3567				    rx_buffer->map);
3568				m_freem(rx_buffer->m_head);
3569				rx_buffer->m_head = NULL;
3570			} else if (rx_buffer->map != NULL)
3571				bus_dmamap_unload(rxr->rxtag,
3572				    rx_buffer->map);
3573			if (rx_buffer->map != NULL) {
3574				bus_dmamap_destroy(rxr->rxtag,
3575				    rx_buffer->map);
3576				rx_buffer->map = NULL;
3577			}
3578		}
3579	}
3580
3581	if (rxr->rx_buffers != NULL) {
3582		free(rxr->rx_buffers, M_DEVBUF);
3583		rxr->rx_buffers = NULL;
3584	}
3585
3586	if (rxr->rxtag != NULL) {
3587		bus_dma_tag_destroy(rxr->rxtag);
3588		rxr->rxtag = NULL;
3589	}
3590}
3591/*********************************************************************
3592 *
3593 *  This routine executes in interrupt context. It replenishes
3594 *  the mbufs in the descriptor and sends data which has been
3595 *  dma'ed into host memory to upper layer.
3596 *
3597 *  We loop at most count times if count is > 0, or until done if
3598 *  count < 0.
3599 *
3600 *  Return TRUE if all clean, FALSE otherwise
3601 *********************************************************************/
3602static bool
3603igb_rxeof(struct rx_ring *rxr, int count)
3604{
3605	struct adapter	*adapter = rxr->adapter;
3606	struct ifnet	*ifp;
3607	struct mbuf	*mp;
3608	uint8_t		accept_frame = 0;
3609	uint8_t		eop = 0;
3610	uint16_t 	len, desc_len, prev_len_adj;
3611	int		i;
3612	union e1000_adv_rx_desc   *cur;
3613	u32		staterr;
3614
3615	IGB_RX_LOCK(rxr);
3616	ifp = adapter->ifp;
3617	i = rxr->next_to_check;
3618	cur = &rxr->rx_base[i];
3619	staterr = cur->wb.upper.status_error;
3620
3621	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3622	    BUS_DMASYNC_POSTREAD);
3623
3624	if (!(staterr & E1000_RXD_STAT_DD)) {
3625		IGB_RX_UNLOCK(rxr);
3626		return FALSE;
3627	}
3628
3629	while ((staterr & E1000_RXD_STAT_DD) &&
3630	    (count != 0) &&
3631	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3632		struct mbuf *m = NULL;
3633
3634		mp = rxr->rx_buffers[i].m_head;
3635		/*
3636		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3637		 * needs to access the last received byte in the mbuf.
3638		 */
3639		bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
3640		    BUS_DMASYNC_POSTREAD);
3641
3642		accept_frame = 1;
3643		prev_len_adj = 0;
3644		desc_len = le16toh(cur->wb.upper.length);
3645		if (staterr & E1000_RXD_STAT_EOP) {
3646			count--;
3647			eop = 1;
3648			if (desc_len < ETHER_CRC_LEN) {
3649				len = 0;
3650				prev_len_adj = ETHER_CRC_LEN - desc_len;
3651			} else
3652				len = desc_len - ETHER_CRC_LEN;
3653		} else {
3654			eop = 0;
3655			len = desc_len;
3656		}
3657
3658		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
3659			u32	pkt_len = desc_len;
3660
3661			if (rxr->fmp != NULL)
3662				pkt_len += rxr->fmp->m_pkthdr.len;
3663
3664			accept_frame = 0;
3665		}
3666
3667		if (accept_frame) {
3668			if (igb_get_buf(rxr, i) != 0) {
3669				ifp->if_iqdrops++;
3670				goto discard;
3671			}
3672
3673			/* Assign correct length to the current fragment */
3674			mp->m_len = len;
3675
3676			if (rxr->fmp == NULL) {
3677				mp->m_pkthdr.len = len;
3678				rxr->fmp = mp; /* Store the first mbuf */
3679				rxr->lmp = mp;
3680			} else {
3681				/* Chain mbuf's together */
3682				mp->m_flags &= ~M_PKTHDR;
3683				/*
3684				 * Adjust length of previous mbuf in chain if
3685				 * we received less than 4 bytes in the last
3686				 * descriptor.
3687				 */
3688				if (prev_len_adj > 0) {
3689					rxr->lmp->m_len -= prev_len_adj;
3690					rxr->fmp->m_pkthdr.len -=
3691					    prev_len_adj;
3692				}
3693				rxr->lmp->m_next = mp;
3694				rxr->lmp = rxr->lmp->m_next;
3695				rxr->fmp->m_pkthdr.len += len;
3696			}
3697
3698			if (eop) {
3699				rxr->fmp->m_pkthdr.rcvif = ifp;
3700				ifp->if_ipackets++;
3701				rxr->rx_packets++;
3702				rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
3703
3704				igb_rx_checksum(staterr, rxr->fmp);
3705#ifndef __NO_STRICT_ALIGNMENT
3706				if (adapter->max_frame_size >
3707				    (MCLBYTES - ETHER_ALIGN) &&
3708				    igb_fixup_rx(rxr) != 0)
3709					goto skip;
3710#endif
3711				if (staterr & E1000_RXD_STAT_VP) {
3712					rxr->fmp->m_pkthdr.ether_vtag =
3713					    le16toh(cur->wb.upper.vlan);
3714					rxr->fmp->m_flags |= M_VLANTAG;
3715				}
3716#ifndef __NO_STRICT_ALIGNMENT
3717skip:
3718#endif
3719				m = rxr->fmp;
3720				rxr->fmp = NULL;
3721				rxr->lmp = NULL;
3722			}
3723		} else {
3724			ifp->if_ierrors++;
3725discard:
3726			/* Reuse loaded DMA map and just update mbuf chain */
3727			mp = rxr->rx_buffers[i].m_head;
3728			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3729			mp->m_data = mp->m_ext.ext_buf;
3730			mp->m_next = NULL;
3731			if (adapter->max_frame_size <=
3732			    (MCLBYTES - ETHER_ALIGN))
3733				m_adj(mp, ETHER_ALIGN);
3734			if (rxr->fmp != NULL) {
3735				m_freem(rxr->fmp);
3736				rxr->fmp = NULL;
3737				rxr->lmp = NULL;
3738			}
3739			m = NULL;
3740		}
3741
3742		/* Zero out the receive descriptors status. */
3743		cur->wb.upper.status_error = 0;
3744		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3745		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3746
3747		rxr->last_cleaned = i; /* For updating tail */
3748
3749		/* Advance our pointers to the next descriptor. */
3750		if (++i == adapter->num_rx_desc)
3751			i = 0;
3752
3753		if (m != NULL) {
3754			rxr->next_to_check = i;
3755			/* Pass up to the stack */
3756			IGB_RX_UNLOCK(rxr);
3757			(*ifp->if_input)(ifp, m);
3758			IGB_RX_LOCK(rxr);
3759			i = rxr->next_to_check;
3760		}
3761		/* Get the next descriptor */
3762		cur = &rxr->rx_base[i];
3763		staterr = cur->wb.upper.status_error;
3764	}
3765	rxr->next_to_check = i;
3766
3767	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3768	E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
3769	IGB_RX_UNLOCK(rxr);
3770
3771	if (!((staterr) & E1000_RXD_STAT_DD))
3772		return FALSE;
3773
3774	return TRUE;
3775}
3776
3777#ifndef __NO_STRICT_ALIGNMENT
3778/*
3779 * When jumbo frames are enabled we should realign entire payload on
3780 * architecures with strict alignment. This is serious design mistake of 8254x
3781 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3782 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3783 * payload. On architecures without strict alignment restrictions 8254x still
3784 * performs unaligned memory access which would reduce the performance too.
3785 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3786 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3787 * existing mbuf chain.
3788 *
3789 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3790 * not used at all on architectures with strict alignment.
3791 */
3792static int
3793igb_fixup_rx(struct rx_ring *rxr)
3794{
3795	struct adapter *adapter = rxr->adapter;
3796	struct mbuf *m, *n;
3797	int error;
3798
3799	error = 0;
3800	m = rxr->fmp;
3801	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3802		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3803		m->m_data += ETHER_HDR_LEN;
3804	} else {
3805		MGETHDR(n, M_DONTWAIT, MT_DATA);
3806		if (n != NULL) {
3807			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3808			m->m_data += ETHER_HDR_LEN;
3809			m->m_len -= ETHER_HDR_LEN;
3810			n->m_len = ETHER_HDR_LEN;
3811			M_MOVE_PKTHDR(n, m);
3812			n->m_next = m;
3813			rxr->fmp = n;
3814		} else {
3815			adapter->dropped_pkts++;
3816			m_freem(rxr->fmp);
3817			rxr->fmp = NULL;
3818			error = ENOMEM;
3819		}
3820	}
3821
3822	return (error);
3823}
3824#endif
3825
3826/*********************************************************************
3827 *
3828 *  Verify that the hardware indicated that the checksum is valid.
3829 *  Inform the stack about the status of checksum so that stack
3830 *  doesn't spend time verifying the checksum.
3831 *
3832 *********************************************************************/
3833static void
3834igb_rx_checksum(u32 staterr, struct mbuf *mp)
3835{
3836	u16 status = (u16)staterr;
3837	u8  errors = (u8) (staterr >> 24);
3838
3839	/* Ignore Checksum bit is set */
3840	if (status & E1000_RXD_STAT_IXSM) {
3841		mp->m_pkthdr.csum_flags = 0;
3842		return;
3843	}
3844
3845	if (status & E1000_RXD_STAT_IPCS) {
3846		/* Did it pass? */
3847		if (!(errors & E1000_RXD_ERR_IPE)) {
3848			/* IP Checksum Good */
3849			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3850			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3851
3852		} else
3853			mp->m_pkthdr.csum_flags = 0;
3854	}
3855
3856	if (status & E1000_RXD_STAT_TCPCS) {
3857		/* Did it pass? */
3858		if (!(errors & E1000_RXD_ERR_TCPE)) {
3859			mp->m_pkthdr.csum_flags |=
3860			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3861			mp->m_pkthdr.csum_data = htons(0xffff);
3862		}
3863	}
3864	return;
3865}
3866
3867/*
3868 * This turns on the hardware offload of the VLAN
3869 * tag insertion and strip
3870 */
3871static void
3872igb_enable_hw_vlans(struct adapter *adapter)
3873{
3874	uint32_t ctrl;
3875
3876	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
3877	ctrl |= E1000_CTRL_VME;
3878	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
3879}
3880
3881static void
3882igb_enable_intr(struct adapter *adapter)
3883{
3884	/* With RSS set up what to auto clear */
3885	if (adapter->msix_mem) {
3886		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
3887		    adapter->eims_mask);
3888		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
3889		    adapter->eims_mask);
3890		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
3891		    E1000_IMS_LSC);
3892	} else {
3893		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
3894		    IMS_ENABLE_MASK);
3895	}
3896	E1000_WRITE_FLUSH(&adapter->hw);
3897
3898	return;
3899}
3900
3901static void
3902igb_disable_intr(struct adapter *adapter)
3903{
3904	if (adapter->msix_mem) {
3905		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
3906		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
3907	}
3908		E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
3909	E1000_WRITE_FLUSH(&adapter->hw);
3910	return;
3911}
3912
3913/*
3914 * Bit of a misnomer, what this really means is
3915 * to enable OS management of the system... aka
3916 * to disable special hardware management features
3917 */
3918static void
3919igb_init_manageability(struct adapter *adapter)
3920{
3921	/* A shared code workaround */
3922#define E1000_82542_MANC2H E1000_MANC2H
3923	if (adapter->has_manage) {
3924		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
3925		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
3926
3927		/* disable hardware interception of ARP */
3928		manc &= ~(E1000_MANC_ARP_EN);
3929
3930                /* enable receiving management packets to the host */
3931		manc |= E1000_MANC_EN_MNG2HOST;
3932#define E1000_MNG2HOST_PORT_623 (1 << 5)
3933#define E1000_MNG2HOST_PORT_664 (1 << 6)
3934		manc2h |= E1000_MNG2HOST_PORT_623;
3935		manc2h |= E1000_MNG2HOST_PORT_664;
3936		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
3937
3938		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
3939	}
3940}
3941
3942/*
3943 * Give control back to hardware management
3944 * controller if there is one.
3945 */
3946static void
3947igb_release_manageability(struct adapter *adapter)
3948{
3949	if (adapter->has_manage) {
3950		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
3951
3952		/* re-enable hardware interception of ARP */
3953		manc |= E1000_MANC_ARP_EN;
3954		manc &= ~E1000_MANC_EN_MNG2HOST;
3955
3956		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
3957	}
3958}
3959
3960/*
3961 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
3962 * For ASF and Pass Through versions of f/w this means that
3963 * the driver is loaded.
3964 *
3965 */
3966static void
3967igb_get_hw_control(struct adapter *adapter)
3968{
3969	u32 ctrl_ext;
3970
3971	/* Let firmware know the driver has taken over */
3972	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
3973	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
3974	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
3975}
3976
3977/*
3978 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
3979 * For ASF and Pass Through versions of f/w this means that the
3980 * driver is no longer loaded.
3981 *
3982 */
3983static void
3984igb_release_hw_control(struct adapter *adapter)
3985{
3986	u32 ctrl_ext;
3987
3988	/* Let firmware taken over control of h/w */
3989	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
3990	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
3991	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
3992}
3993
3994static int
3995igb_is_valid_ether_addr(uint8_t *addr)
3996{
3997	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
3998
3999	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4000		return (FALSE);
4001	}
4002
4003	return (TRUE);
4004}
4005
4006
4007/*
4008 * Enable PCI Wake On Lan capability
4009 */
4010void
4011igb_enable_wakeup(device_t dev)
4012{
4013	u16     cap, status;
4014	u8      id;
4015
4016	/* First find the capabilities pointer*/
4017	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4018	/* Read the PM Capabilities */
4019	id = pci_read_config(dev, cap, 1);
4020	if (id != PCIY_PMG)     /* Something wrong */
4021		return;
4022	/* OK, we have the power capabilities, so
4023	   now get the status register */
4024	cap += PCIR_POWER_STATUS;
4025	status = pci_read_config(dev, cap, 2);
4026	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4027	pci_write_config(dev, cap, status, 2);
4028	return;
4029}
4030
4031
4032/**********************************************************************
4033 *
4034 *  Update the board statistics counters.
4035 *
4036 **********************************************************************/
4037static void
4038igb_update_stats_counters(struct adapter *adapter)
4039{
4040	struct ifnet   *ifp;
4041
4042	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4043	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4044		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4045		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4046	}
4047	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4048	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4049	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4050	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4051
4052	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4053	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4054	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4055	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4056	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4057	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4058	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4059	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4060	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4061	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4062	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4063	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4064	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4065	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4066	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4067	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4068	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4069	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4070	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4071	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4072
4073	/* For the 64-bit byte counters the low dword must be read first. */
4074	/* Both registers clear on the read of the high dword */
4075
4076	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4077	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4078
4079	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4080	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4081	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4082	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4083	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4084
4085	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4086	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4087
4088	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4089	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4090	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4091	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4092	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4093	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4094	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4095	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4096	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4097	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4098
4099	adapter->stats.algnerrc +=
4100		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4101	adapter->stats.rxerrc +=
4102		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4103	adapter->stats.tncrs +=
4104		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4105	adapter->stats.cexterr +=
4106		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4107	adapter->stats.tsctc +=
4108		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4109	adapter->stats.tsctfc +=
4110		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4111	ifp = adapter->ifp;
4112
4113	ifp->if_collisions = adapter->stats.colc;
4114
4115	/* Rx Errors */
4116	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4117	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4118	    adapter->stats.ruc + adapter->stats.roc +
4119	    adapter->stats.mpc + adapter->stats.cexterr;
4120
4121	/* Tx Errors */
4122	ifp->if_oerrors = adapter->stats.ecol +
4123	    adapter->stats.latecol + adapter->watchdog_events;
4124}
4125
4126
4127/**********************************************************************
4128 *
4129 *  This routine is called only when igb_display_debug_stats is enabled.
4130 *  This routine provides a way to take a look at important statistics
4131 *  maintained by the driver and hardware.
4132 *
4133 **********************************************************************/
4134static void
4135igb_print_debug_info(struct adapter *adapter)
4136{
4137	device_t dev = adapter->dev;
4138	struct rx_ring *rxr = adapter->rx_rings;
4139	struct tx_ring *txr = adapter->tx_rings;
4140	uint8_t *hw_addr = adapter->hw.hw_addr;
4141
4142	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4143	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4144	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4145	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4146	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4147	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4148	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4149	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4150	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4151	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4152	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4153	    adapter->hw.fc.high_water,
4154	    adapter->hw.fc.low_water);
4155	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4156	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4157	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4158	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4159	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4160	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4161
4162	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
4163		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4164		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4165		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4166		device_printf(dev, "no descriptors avail event = %lu\n",
4167		    txr->no_desc_avail);
4168		device_printf(dev, "TX(%d) MSIX IRQ Handled = %lu\n", txr->me,
4169		    txr->tx_irq);
4170		device_printf(dev, "TX(%d) Packets sent = %lu\n", txr->me,
4171		    txr->tx_packets);
4172	}
4173
4174	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
4175		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4176		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4177		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4178		device_printf(dev, "RX(%d) Packets received = %lu\n", rxr->me,
4179		    rxr->rx_packets);
4180		device_printf(dev, "RX(%d) Byte count = %lu\n", rxr->me,
4181		    rxr->rx_bytes);
4182		device_printf(dev, "RX(%d) MSIX IRQ Handled = %lu\n", rxr->me,
4183		    rxr->rx_irq);
4184	}
4185	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4186
4187	device_printf(dev, "Std mbuf failed = %ld\n",
4188	    adapter->mbuf_alloc_failed);
4189	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4190	    adapter->mbuf_cluster_failed);
4191	device_printf(dev, "Driver dropped packets = %ld\n",
4192	    adapter->dropped_pkts);
4193	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4194		adapter->no_tx_dma_setup);
4195}
4196
4197static void
4198igb_print_hw_stats(struct adapter *adapter)
4199{
4200	device_t dev = adapter->dev;
4201
4202	device_printf(dev, "Excessive collisions = %lld\n",
4203	    (long long)adapter->stats.ecol);
4204#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4205	device_printf(dev, "Symbol errors = %lld\n",
4206	    (long long)adapter->stats.symerrs);
4207#endif
4208	device_printf(dev, "Sequence errors = %lld\n",
4209	    (long long)adapter->stats.sec);
4210	device_printf(dev, "Defer count = %lld\n",
4211	    (long long)adapter->stats.dc);
4212	device_printf(dev, "Missed Packets = %lld\n",
4213	    (long long)adapter->stats.mpc);
4214	device_printf(dev, "Receive No Buffers = %lld\n",
4215	    (long long)adapter->stats.rnbc);
4216	/* RLEC is inaccurate on some hardware, calculate our own. */
4217	device_printf(dev, "Receive Length Errors = %lld\n",
4218	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4219	device_printf(dev, "Receive errors = %lld\n",
4220	    (long long)adapter->stats.rxerrc);
4221	device_printf(dev, "Crc errors = %lld\n",
4222	    (long long)adapter->stats.crcerrs);
4223	device_printf(dev, "Alignment errors = %lld\n",
4224	    (long long)adapter->stats.algnerrc);
4225	/* On 82575 these are collision counts */
4226	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4227	    (long long)adapter->stats.cexterr);
4228	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4229	device_printf(dev, "watchdog timeouts = %ld\n",
4230	    adapter->watchdog_events);
4231	device_printf(dev, "XON Rcvd = %lld\n",
4232	    (long long)adapter->stats.xonrxc);
4233	device_printf(dev, "XON Xmtd = %lld\n",
4234	    (long long)adapter->stats.xontxc);
4235	device_printf(dev, "XOFF Rcvd = %lld\n",
4236	    (long long)adapter->stats.xoffrxc);
4237	device_printf(dev, "XOFF Xmtd = %lld\n",
4238	    (long long)adapter->stats.xofftxc);
4239	device_printf(dev, "Good Packets Rcvd = %lld\n",
4240	    (long long)adapter->stats.gprc);
4241	device_printf(dev, "Good Packets Xmtd = %lld\n",
4242	    (long long)adapter->stats.gptc);
4243	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4244	    (long long)adapter->stats.tsctc);
4245	device_printf(dev, "TSO Contexts Failed = %lld\n",
4246	    (long long)adapter->stats.tsctfc);
4247}
4248
4249/**********************************************************************
4250 *
4251 *  This routine provides a way to dump out the adapter eeprom,
4252 *  often a useful debug/service tool. This only dumps the first
4253 *  32 words, stuff that matters is in that extent.
4254 *
4255 **********************************************************************/
4256static void
4257igb_print_nvm_info(struct adapter *adapter)
4258{
4259	u16	eeprom_data;
4260	int	i, j, row = 0;
4261
4262	/* Its a bit crude, but it gets the job done */
4263	printf("\nInterface EEPROM Dump:\n");
4264	printf("Offset\n0x0000  ");
4265	for (i = 0, j = 0; i < 32; i++, j++) {
4266		if (j == 8) { /* Make the offset block */
4267			j = 0; ++row;
4268			printf("\n0x00%x0  ",row);
4269		}
4270		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4271		printf("%04x ", eeprom_data);
4272	}
4273	printf("\n");
4274}
4275
4276static int
4277igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4278{
4279	struct adapter *adapter;
4280	int error;
4281	int result;
4282
4283	result = -1;
4284	error = sysctl_handle_int(oidp, &result, 0, req);
4285
4286	if (error || !req->newptr)
4287		return (error);
4288
4289	if (result == 1) {
4290		adapter = (struct adapter *)arg1;
4291		igb_print_debug_info(adapter);
4292	}
4293	/*
4294	 * This value will cause a hex dump of the
4295	 * first 32 16-bit words of the EEPROM to
4296	 * the screen.
4297	 */
4298	if (result == 2) {
4299		adapter = (struct adapter *)arg1;
4300		igb_print_nvm_info(adapter);
4301        }
4302
4303	return (error);
4304}
4305
4306
4307static int
4308igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4309{
4310	struct adapter *adapter;
4311	int error;
4312	int result;
4313
4314	result = -1;
4315	error = sysctl_handle_int(oidp, &result, 0, req);
4316
4317	if (error || !req->newptr)
4318		return (error);
4319
4320	if (result == 1) {
4321		adapter = (struct adapter *)arg1;
4322		igb_print_hw_stats(adapter);
4323	}
4324
4325	return (error);
4326}
4327
4328static int
4329igb_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4330{
4331	struct igb_int_delay_info *info;
4332	struct adapter *adapter;
4333	uint32_t regval;
4334	int error;
4335	int usecs;
4336	int ticks;
4337
4338	info = (struct igb_int_delay_info *)arg1;
4339	usecs = info->value;
4340	error = sysctl_handle_int(oidp, &usecs, 0, req);
4341	if (error != 0 || req->newptr == NULL)
4342		return (error);
4343	if (usecs < 0 || usecs > IGB_TICKS_TO_USECS(65535))
4344		return (EINVAL);
4345	info->value = usecs;
4346	ticks = IGB_USECS_TO_TICKS(usecs);
4347
4348	adapter = info->adapter;
4349
4350	IGB_CORE_LOCK(adapter);
4351	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4352	regval = (regval & ~0xffff) | (ticks & 0xffff);
4353	/* Handle a few special cases. */
4354	switch (info->offset) {
4355	case E1000_RDTR:
4356		break;
4357	case E1000_TIDV:
4358		if (ticks == 0) {
4359			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4360			/* Don't write 0 into the TIDV register. */
4361			regval++;
4362		} else
4363			if (adapter->hw.mac.type < e1000_82575)
4364				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4365		break;
4366	}
4367	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4368	IGB_CORE_UNLOCK(adapter);
4369	return (0);
4370}
4371
4372static void
4373igb_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4374	const char *description, struct igb_int_delay_info *info,
4375	int offset, int value)
4376{
4377	info->adapter = adapter;
4378	info->offset = offset;
4379	info->value = value;
4380	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4381	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4382	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4383	    info, 0, igb_sysctl_int_delay, "I", description);
4384}
4385
4386static void
4387igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4388	const char *description, int *limit, int value)
4389{
4390	*limit = value;
4391	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4392	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4393	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4394}
4395
4396
4397