if_igb.c revision 178523
1164022Sdds/******************************************************************************
2164022Sdds
3164022Sdds  Copyright (c) 2001-2008, Intel Corporation
4164022Sdds  All rights reserved.
5164022Sdds
6164022Sdds  Redistribution and use in source and binary forms, with or without
7164022Sdds  modification, are permitted provided that the following conditions are met:
8164022Sdds
9164022Sdds   1. Redistributions of source code must retain the above copyright notice,
10164022Sdds      this list of conditions and the following disclaimer.
11164022Sdds
12164022Sdds   2. Redistributions in binary form must reproduce the above copyright
13164022Sdds      notice, this list of conditions and the following disclaimer in the
14164022Sdds      documentation and/or other materials provided with the distribution.
15164022Sdds
16164022Sdds   3. Neither the name of the Intel Corporation nor the names of its
17164022Sdds      contributors may be used to endorse or promote products derived from
18164022Sdds      this software without specific prior written permission.
19164022Sdds
20164022Sdds  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21164022Sdds  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22164022Sdds  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23164022Sdds  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24164022Sdds  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25164022Sdds  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26164022Sdds  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27164022Sdds  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28164022Sdds  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29164022Sdds  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30164022Sdds  POSSIBILITY OF SUCH DAMAGE.
31164022Sdds
32164022Sdds******************************************************************************/
33164022Sdds/*$FreeBSD: head/sys/dev/igb/if_igb.c 178523 2008-04-25 21:19:41Z jfv $*/
34164022Sdds
35164022Sdds#ifdef HAVE_KERNEL_OPTION_HEADERS
36164022Sdds#include "opt_device_polling.h"
37164022Sdds#endif
38164022Sdds
39164022Sdds#include <sys/param.h>
40164022Sdds#include <sys/systm.h>
41164022Sdds#include <sys/bus.h>
42164022Sdds#include <sys/endian.h>
43164022Sdds#include <sys/kernel.h>
44164022Sdds#include <sys/kthread.h>
45164022Sdds#include <sys/malloc.h>
46164022Sdds#include <sys/mbuf.h>
47164022Sdds#include <sys/module.h>
48164022Sdds#include <sys/rman.h>
49164022Sdds#include <sys/socket.h>
50164022Sdds#include <sys/sockio.h>
51164022Sdds#include <sys/sysctl.h>
52164022Sdds#include <sys/taskqueue.h>
53164022Sdds#include <sys/pcpu.h>
54164022Sdds#include <machine/bus.h>
55164022Sdds#include <machine/resource.h>
56164022Sdds
57164022Sdds#include <net/bpf.h>
58164022Sdds#include <net/ethernet.h>
59164022Sdds#include <net/if.h>
60164022Sdds#include <net/if_arp.h>
61164022Sdds#include <net/if_dl.h>
62164022Sdds#include <net/if_media.h>
63164022Sdds
64164022Sdds#include <net/if_types.h>
65164022Sdds#include <net/if_vlan_var.h>
66164022Sdds
67164022Sdds#include <netinet/in_systm.h>
68164022Sdds#include <netinet/in.h>
69164022Sdds#include <netinet/if_ether.h>
70164022Sdds#include <netinet/ip.h>
71164022Sdds#include <netinet/ip6.h>
72164022Sdds#include <netinet/tcp.h>
73164022Sdds#include <netinet/udp.h>
74164022Sdds
75164022Sdds#include <machine/in_cksum.h>
76164022Sdds#include <dev/pci/pcivar.h>
77164022Sdds#include <dev/pci/pcireg.h>
78164022Sdds
79164022Sdds#include "e1000_api.h"
80164022Sdds#include "e1000_82575.h"
81164022Sdds#include "if_igb.h"
82164022Sdds
83164022Sdds/*********************************************************************
84164022Sdds *  Set this to one to display debug statistics
85164022Sdds *********************************************************************/
86164022Sddsint	igb_display_debug_stats = 0;
87164022Sdds
88164022Sdds/*********************************************************************
89164022Sdds *  Driver version:
90164022Sdds *********************************************************************/
91164022Sddschar igb_driver_version[] = "version - 1.1.9";
92164022Sdds
93164022Sdds
94164022Sdds/*********************************************************************
95164022Sdds *  PCI Device ID Table
96164022Sdds *
97164022Sdds *  Used by probe to select devices to load on
98164022Sdds *  Last field stores an index into e1000_strings
99164022Sdds *  Last entry must be all 0s
100164022Sdds *
101 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
102 *********************************************************************/
103
104static igb_vendor_info_t igb_vendor_info_array[] =
105{
106	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
107	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
108						PCI_ANY_ID, PCI_ANY_ID, 0},
109	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
110						PCI_ANY_ID, PCI_ANY_ID, 0},
111	/* required last entry */
112	{ 0, 0, 0, 0, 0}
113};
114
115/*********************************************************************
116 *  Table of branding strings for all supported NICs.
117 *********************************************************************/
118
119static char *igb_strings[] = {
120	"Intel(R) PRO/1000 Network Connection"
121};
122
123/*********************************************************************
124 *  Function prototypes
125 *********************************************************************/
126static int	igb_probe(device_t);
127static int	igb_attach(device_t);
128static int	igb_detach(device_t);
129static int	igb_shutdown(device_t);
130static int	igb_suspend(device_t);
131static int	igb_resume(device_t);
132static void	igb_start(struct ifnet *);
133static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
134static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
135static void	igb_watchdog(struct adapter *);
136static void	igb_init(void *);
137static void	igb_init_locked(struct adapter *);
138static void	igb_stop(void *);
139static void	igb_media_status(struct ifnet *, struct ifmediareq *);
140static int	igb_media_change(struct ifnet *);
141static void	igb_identify_hardware(struct adapter *);
142static int	igb_allocate_pci_resources(struct adapter *);
143static int	igb_allocate_msix(struct adapter *);
144static int	igb_allocate_legacy(struct adapter *);
145static int	igb_setup_msix(struct adapter *);
146static void	igb_free_pci_resources(struct adapter *);
147static void	igb_local_timer(void *);
148static int	igb_hardware_init(struct adapter *);
149static void	igb_setup_interface(device_t, struct adapter *);
150static int	igb_allocate_queues(struct adapter *);
151static void	igb_configure_queues(struct adapter *);
152
153static int	igb_allocate_transmit_buffers(struct tx_ring *);
154static void	igb_setup_transmit_structures(struct adapter *);
155static void	igb_setup_transmit_ring(struct tx_ring *);
156static void	igb_initialize_transmit_units(struct adapter *);
157static void	igb_free_transmit_structures(struct adapter *);
158static void	igb_free_transmit_buffers(struct tx_ring *);
159
160static int	igb_allocate_receive_buffers(struct rx_ring *);
161static int	igb_setup_receive_structures(struct adapter *);
162static int	igb_setup_receive_ring(struct rx_ring *);
163static void	igb_initialize_receive_units(struct adapter *);
164static void	igb_free_receive_structures(struct adapter *);
165static void	igb_free_receive_buffers(struct rx_ring *);
166
167static void	igb_enable_intr(struct adapter *);
168static void	igb_disable_intr(struct adapter *);
169static void	igb_update_stats_counters(struct adapter *);
170static bool	igb_txeof(struct tx_ring *);
171static bool	igb_rxeof(struct rx_ring *, int);
172#ifndef __NO_STRICT_ALIGNMENT
173static int	igb_fixup_rx(struct rx_ring *);
174#endif
175static void	igb_rx_checksum(u32, struct mbuf *);
176static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
177static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
178static void	igb_set_promisc(struct adapter *);
179static void	igb_disable_promisc(struct adapter *);
180static void	igb_set_multi(struct adapter *);
181static void	igb_print_hw_stats(struct adapter *);
182static void	igb_update_link_status(struct adapter *);
183static int	igb_get_buf(struct rx_ring *, int);
184static void	igb_enable_hw_vlans(struct adapter *);
185static int	igb_xmit(struct tx_ring *, struct mbuf **);
186static int	igb_dma_malloc(struct adapter *, bus_size_t,
187		    struct igb_dma_alloc *, int);
188static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
189static void	igb_print_debug_info(struct adapter *);
190static void	igb_print_nvm_info(struct adapter *);
191static int 	igb_is_valid_ether_addr(u8 *);
192static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
193static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
194static int	igb_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
195static void	igb_add_int_delay_sysctl(struct adapter *, const char *,
196		    const char *, struct igb_int_delay_info *, int, int);
197/* Management and WOL Support */
198static void	igb_init_manageability(struct adapter *);
199static void	igb_release_manageability(struct adapter *);
200static void     igb_get_hw_control(struct adapter *);
201static void     igb_release_hw_control(struct adapter *);
202static void     igb_enable_wakeup(device_t);
203
204
205static int	igb_irq_fast(void *);
206static void	igb_add_rx_process_limit(struct adapter *, const char *,
207		    const char *, int *, int);
208static void	igb_handle_rxtx(void *context, int pending);
209static void	igb_handle_tx(void *context, int pending);
210static void	igb_handle_rx(void *context, int pending);
211static void	igb_handle_link(void *context, int pending);
212
213/* These are MSIX only irq handlers */
214static void	igb_msix_rx(void *);
215static void	igb_msix_tx(void *);
216static void	igb_msix_link(void *);
217
218#ifdef DEVICE_POLLING
219static poll_handler_t igb_poll;
220#endif
221
222/*********************************************************************
223 *  FreeBSD Device Interface Entry Points
224 *********************************************************************/
225
226static device_method_t igb_methods[] = {
227	/* Device interface */
228	DEVMETHOD(device_probe, igb_probe),
229	DEVMETHOD(device_attach, igb_attach),
230	DEVMETHOD(device_detach, igb_detach),
231	DEVMETHOD(device_shutdown, igb_shutdown),
232	DEVMETHOD(device_suspend, igb_suspend),
233	DEVMETHOD(device_resume, igb_resume),
234	{0, 0}
235};
236
237static driver_t igb_driver = {
238	"igb", igb_methods, sizeof(struct adapter),
239};
240
241static devclass_t igb_devclass;
242DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
243MODULE_DEPEND(igb, pci, 1, 1, 1);
244MODULE_DEPEND(igb, ether, 1, 1, 1);
245
246/*********************************************************************
247 *  Tunable default values.
248 *********************************************************************/
249
250#define IGB_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
251#define IGB_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
252#define M_TSO_LEN			66
253
254/* Allow common code without TSO */
255#ifndef CSUM_TSO
256#define CSUM_TSO	0
257#endif
258
259static int igb_tx_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_TIDV);
260static int igb_rx_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_RDTR);
261static int igb_tx_abs_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_TADV);
262static int igb_rx_abs_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_RADV);
263static int igb_rxd = IGB_DEFAULT_RXD;
264static int igb_txd = IGB_DEFAULT_TXD;
265static int igb_smart_pwr_down = FALSE;
266TUNABLE_INT("hw.igb.tx_int_delay", &igb_tx_int_delay_dflt);
267TUNABLE_INT("hw.igb.rx_int_delay", &igb_rx_int_delay_dflt);
268TUNABLE_INT("hw.igb.tx_abs_int_delay", &igb_tx_abs_int_delay_dflt);
269TUNABLE_INT("hw.igb.rx_abs_int_delay", &igb_rx_abs_int_delay_dflt);
270TUNABLE_INT("hw.igb.rxd", &igb_rxd);
271TUNABLE_INT("hw.igb.txd", &igb_txd);
272TUNABLE_INT("hw.igb.smart_pwr_down", &igb_smart_pwr_down);
273
274/*
275** IF YOU CHANGE THESE: be sure and change IGB_MSIX_VEC in
276** if_igb.h to match. These can be autoconfigured if set to
277** 0, it will then be based on number of cpus.
278*/
279static int igb_tx_queues = 1;
280static int igb_rx_queues = 1;
281TUNABLE_INT("hw.igb.tx_queues", &igb_tx_queues);
282TUNABLE_INT("hw.igb.rx_queues", &igb_rx_queues);
283
284extern int mp_ncpus;
285
286/* How many packets rxeof tries to clean at a time */
287static int igb_rx_process_limit = 100;
288TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
289
290/*********************************************************************
291 *  Device identification routine
292 *
293 *  igb_probe determines if the driver should be loaded on
294 *  adapter based on PCI vendor/device id of the adapter.
295 *
296 *  return BUS_PROBE_DEFAULT on success, positive on failure
297 *********************************************************************/
298
299static int
300igb_probe(device_t dev)
301{
302	char		adapter_name[60];
303	uint16_t	pci_vendor_id = 0;
304	uint16_t	pci_device_id = 0;
305	uint16_t	pci_subvendor_id = 0;
306	uint16_t	pci_subdevice_id = 0;
307	igb_vendor_info_t *ent;
308
309	INIT_DEBUGOUT("igb_probe: begin");
310
311	pci_vendor_id = pci_get_vendor(dev);
312	if (pci_vendor_id != IGB_VENDOR_ID)
313		return (ENXIO);
314
315	pci_device_id = pci_get_device(dev);
316	pci_subvendor_id = pci_get_subvendor(dev);
317	pci_subdevice_id = pci_get_subdevice(dev);
318
319	ent = igb_vendor_info_array;
320	while (ent->vendor_id != 0) {
321		if ((pci_vendor_id == ent->vendor_id) &&
322		    (pci_device_id == ent->device_id) &&
323
324		    ((pci_subvendor_id == ent->subvendor_id) ||
325		    (ent->subvendor_id == PCI_ANY_ID)) &&
326
327		    ((pci_subdevice_id == ent->subdevice_id) ||
328		    (ent->subdevice_id == PCI_ANY_ID))) {
329			sprintf(adapter_name, "%s %s",
330				igb_strings[ent->index],
331				igb_driver_version);
332			device_set_desc_copy(dev, adapter_name);
333			return (BUS_PROBE_DEFAULT);
334		}
335		ent++;
336	}
337
338	return (ENXIO);
339}
340
341/*********************************************************************
342 *  Device initialization routine
343 *
344 *  The attach entry point is called when the driver is being loaded.
345 *  This routine identifies the type of hardware, allocates all resources
346 *  and initializes the hardware.
347 *
348 *  return 0 on success, positive on failure
349 *********************************************************************/
350
351static int
352igb_attach(device_t dev)
353{
354	struct adapter	*adapter;
355	int		error = 0;
356	u16		eeprom_data;
357
358	INIT_DEBUGOUT("igb_attach: begin");
359
360	adapter = device_get_softc(dev);
361	adapter->dev = adapter->osdep.dev = dev;
362	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
363
364	/* SYSCTL stuff */
365	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
366	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
367	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
368	    igb_sysctl_debug_info, "I", "Debug Information");
369
370	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
371	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
372	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
373	    igb_sysctl_stats, "I", "Statistics");
374
375	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
376
377	/* Determine hardware and mac info */
378	igb_identify_hardware(adapter);
379
380	/* Setup PCI resources */
381	if (igb_allocate_pci_resources(adapter)) {
382		device_printf(dev, "Allocation of PCI resources failed\n");
383		error = ENXIO;
384		goto err_pci;
385	}
386
387	/* Do Shared Code initialization */
388	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
389		device_printf(dev, "Setup of Shared code failed\n");
390		error = ENXIO;
391		goto err_pci;
392	}
393
394	e1000_get_bus_info(&adapter->hw);
395
396	/* Set up some sysctls for the tunable interrupt delays */
397	igb_add_int_delay_sysctl(adapter, "rx_int_delay",
398	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
399	    E1000_REGISTER(&adapter->hw, E1000_RDTR), igb_rx_int_delay_dflt);
400	igb_add_int_delay_sysctl(adapter, "tx_int_delay",
401	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
402	    E1000_REGISTER(&adapter->hw, E1000_TIDV), igb_tx_int_delay_dflt);
403	igb_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
404	    "receive interrupt delay limit in usecs",
405	    &adapter->rx_abs_int_delay,
406	    E1000_REGISTER(&adapter->hw, E1000_RADV),
407	    igb_rx_abs_int_delay_dflt);
408	igb_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
409	    "transmit interrupt delay limit in usecs",
410	    &adapter->tx_abs_int_delay,
411	    E1000_REGISTER(&adapter->hw, E1000_TADV),
412	    igb_tx_abs_int_delay_dflt);
413
414	/* Sysctls for limiting the amount of work done in the taskqueue */
415	igb_add_rx_process_limit(adapter, "rx_processing_limit",
416	    "max number of rx packets to process", &adapter->rx_process_limit,
417	    igb_rx_process_limit);
418
419	/*
420	 * Validate number of transmit and receive descriptors. It
421	 * must not exceed hardware maximum, and must be multiple
422	 * of E1000_DBA_ALIGN.
423	 */
424	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
425	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
426		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
427		    IGB_DEFAULT_TXD, igb_txd);
428		adapter->num_tx_desc = IGB_DEFAULT_TXD;
429	} else
430		adapter->num_tx_desc = igb_txd;
431	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
432	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
433		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
434		    IGB_DEFAULT_RXD, igb_rxd);
435		adapter->num_rx_desc = IGB_DEFAULT_RXD;
436	} else
437		adapter->num_rx_desc = igb_rxd;
438
439	adapter->hw.mac.autoneg = DO_AUTO_NEG;
440	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
441	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
442	adapter->rx_buffer_len = 2048;
443
444	/* Copper options */
445	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
446		adapter->hw.phy.mdix = AUTO_ALL_MODES;
447		adapter->hw.phy.disable_polarity_correction = FALSE;
448		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
449	}
450
451	/*
452	 * Set the frame limits assuming
453	 * standard ethernet sized frames.
454	 */
455	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
456	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
457
458	/*
459	 * This controls when hardware reports transmit completion
460	 * status.
461	 */
462	adapter->hw.mac.report_tx_early = 1;
463
464	/*
465	** Allocate and Setup Queues
466	*/
467	if (igb_allocate_queues(adapter)) {
468		error = ENOMEM;
469		goto err_hw_init;
470	}
471
472	/* Make sure we have a good EEPROM before we read from it */
473	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
474		/*
475		** Some PCI-E parts fail the first check due to
476		** the link being in sleep state, call it again,
477		** if it fails a second time its a real issue.
478		*/
479		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
480			device_printf(dev,
481			    "The EEPROM Checksum Is Not Valid\n");
482			error = EIO;
483			goto err_late;
484		}
485	}
486
487	/* Initialize the hardware */
488	if (igb_hardware_init(adapter)) {
489		device_printf(dev, "Unable to initialize the hardware\n");
490		error = EIO;
491		goto err_late;
492	}
493
494	/* Copy the permanent MAC address out of the EEPROM */
495	if (e1000_read_mac_addr(&adapter->hw) < 0) {
496		device_printf(dev, "EEPROM read error while reading MAC"
497		    " address\n");
498		error = EIO;
499		goto err_late;
500	}
501
502	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
503		device_printf(dev, "Invalid MAC address\n");
504		error = EIO;
505		goto err_late;
506	}
507
508	/*
509	** Configure Interrupts
510	*/
511	if (adapter->msix > 1) /* MSIX */
512		error = igb_allocate_msix(adapter);
513	else /* MSI or Legacy */
514		error = igb_allocate_legacy(adapter);
515	if (error)
516		goto err_late;
517
518	/* Setup OS specific network interface */
519	igb_setup_interface(dev, adapter);
520
521	/* Initialize statistics */
522	igb_update_stats_counters(adapter);
523
524	adapter->hw.mac.get_link_status = 1;
525	igb_update_link_status(adapter);
526
527	/* Indicate SOL/IDER usage */
528	if (e1000_check_reset_block(&adapter->hw))
529		device_printf(dev,
530		    "PHY reset is blocked due to SOL/IDER session.\n");
531
532	/* Determine if we have to control management hardware */
533	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
534
535	/*
536	 * Setup Wake-on-Lan
537	 */
538	/* APME bit in EEPROM is mapped to WUC.APME */
539	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
540	if (eeprom_data)
541		adapter->wol = E1000_WUFC_MAG;
542
543	/* Tell the stack that the interface is not active */
544	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
545
546	INIT_DEBUGOUT("igb_attach: end");
547
548	return (0);
549
550err_late:
551	igb_free_transmit_structures(adapter);
552	igb_free_receive_structures(adapter);
553	igb_release_hw_control(adapter);
554err_hw_init:
555	e1000_remove_device(&adapter->hw);
556err_pci:
557	igb_free_pci_resources(adapter);
558	IGB_CORE_LOCK_DESTROY(adapter);
559
560	return (error);
561}
562
563/*********************************************************************
564 *  Device removal routine
565 *
566 *  The detach entry point is called when the driver is being removed.
567 *  This routine stops the adapter and deallocates all the resources
568 *  that were allocated for driver operation.
569 *
570 *  return 0 on success, positive on failure
571 *********************************************************************/
572
573static int
574igb_detach(device_t dev)
575{
576	struct adapter	*adapter = device_get_softc(dev);
577	struct ifnet	*ifp = adapter->ifp;
578
579	INIT_DEBUGOUT("igb_detach: begin");
580
581	/* Make sure VLANS are not using driver */
582	if (adapter->ifp->if_vlantrunk != NULL) {
583		device_printf(dev,"Vlan in use, detach first\n");
584		return (EBUSY);
585	}
586
587#ifdef DEVICE_POLLING
588	if (ifp->if_capenable & IFCAP_POLLING)
589		ether_poll_deregister(ifp);
590#endif
591
592	IGB_CORE_LOCK(adapter);
593	adapter->in_detach = 1;
594	igb_stop(adapter);
595	IGB_CORE_UNLOCK(adapter);
596
597	e1000_phy_hw_reset(&adapter->hw);
598
599	/* Give control back to firmware */
600	igb_release_manageability(adapter);
601	igb_release_hw_control(adapter);
602
603	if (adapter->wol) {
604		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
605		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
606		igb_enable_wakeup(dev);
607	}
608
609	ether_ifdetach(adapter->ifp);
610
611	callout_drain(&adapter->timer);
612
613	e1000_remove_device(&adapter->hw);
614	igb_free_pci_resources(adapter);
615	bus_generic_detach(dev);
616	if_free(ifp);
617
618	igb_free_transmit_structures(adapter);
619	igb_free_receive_structures(adapter);
620
621	IGB_CORE_LOCK_DESTROY(adapter);
622
623	return (0);
624}
625
626/*********************************************************************
627 *
628 *  Shutdown entry point
629 *
630 **********************************************************************/
631
632static int
633igb_shutdown(device_t dev)
634{
635	return igb_suspend(dev);
636}
637
638/*
639 * Suspend/resume device methods.
640 */
641static int
642igb_suspend(device_t dev)
643{
644	struct adapter *adapter = device_get_softc(dev);
645
646	IGB_CORE_LOCK(adapter);
647
648	igb_stop(adapter);
649
650        igb_release_manageability(adapter);
651	igb_release_hw_control(adapter);
652
653        if (adapter->wol) {
654                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
655                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
656                igb_enable_wakeup(dev);
657        }
658
659	IGB_CORE_UNLOCK(adapter);
660
661	return bus_generic_suspend(dev);
662}
663
664static int
665igb_resume(device_t dev)
666{
667	struct adapter *adapter = device_get_softc(dev);
668	struct ifnet *ifp = adapter->ifp;
669
670	IGB_CORE_LOCK(adapter);
671	igb_init_locked(adapter);
672	igb_init_manageability(adapter);
673
674	if ((ifp->if_flags & IFF_UP) &&
675	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
676		igb_start(ifp);
677
678	IGB_CORE_UNLOCK(adapter);
679
680	return bus_generic_resume(dev);
681}
682
683
684/*********************************************************************
685 *  Transmit entry point
686 *
687 *  igb_start is called by the stack to initiate a transmit.
688 *  The driver will remain in this routine as long as there are
689 *  packets to transmit and transmit resources are available.
690 *  In case resources are not available stack is notified and
691 *  the packet is requeued.
692 **********************************************************************/
693
694static void
695igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
696{
697	struct adapter	*adapter = ifp->if_softc;
698	struct mbuf	*m_head;
699
700	IGB_TX_LOCK_ASSERT(txr);
701
702	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
703	    IFF_DRV_RUNNING)
704		return;
705	if (!adapter->link_active)
706		return;
707
708	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
709
710		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
711		if (m_head == NULL)
712			break;
713		/*
714		 *  Encapsulation can modify our pointer, and or make it
715		 *  NULL on failure.  In that event, we can't requeue.
716		 */
717		if (igb_xmit(txr, &m_head)) {
718			if (m_head == NULL)
719				break;
720			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
721			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
722			break;
723		}
724
725		/* Send a copy of the frame to the BPF listener */
726		ETHER_BPF_MTAP(ifp, m_head);
727
728		/* Set timeout in case hardware has problems transmitting. */
729		txr->watchdog_timer = IGB_TX_TIMEOUT;
730	}
731}
732
733static void
734igb_start(struct ifnet *ifp)
735{
736	struct adapter	*adapter = ifp->if_softc;
737	struct tx_ring	*txr;
738	u32		queue = 0;
739
740	/*
741	** This is really just here for testing
742	** TX multiqueue, ultimately what is
743	** needed is the flow support in the stack
744	** and appropriate logic here to deal with
745	** it. -jfv
746	*/
747	if (adapter->num_tx_queues > 1)
748		queue = (curcpu % adapter->num_tx_queues);
749
750	txr = &adapter->tx_rings[queue];
751	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
752		IGB_TX_LOCK(txr);
753		igb_start_locked(txr, ifp);
754		IGB_TX_UNLOCK(txr);
755	}
756}
757
758/*********************************************************************
759 *  Ioctl entry point
760 *
761 *  igb_ioctl is called when the user wants to configure the
762 *  interface.
763 *
764 *  return 0 on success, positive on failure
765 **********************************************************************/
766
767static int
768igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
769{
770	struct adapter	*adapter = ifp->if_softc;
771	struct ifreq *ifr = (struct ifreq *)data;
772	struct ifaddr *ifa = (struct ifaddr *)data;
773	int error = 0;
774
775	if (adapter->in_detach)
776		return (error);
777
778	switch (command) {
779	case SIOCSIFADDR:
780		if (ifa->ifa_addr->sa_family == AF_INET) {
781			/*
782			 * XXX
783			 * Since resetting hardware takes a very long time
784			 * and results in link renegotiation we only
785			 * initialize the hardware only when it is absolutely
786			 * required.
787			 */
788			ifp->if_flags |= IFF_UP;
789			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
790				IGB_CORE_LOCK(adapter);
791				igb_init_locked(adapter);
792				IGB_CORE_UNLOCK(adapter);
793			}
794			arp_ifinit(ifp, ifa);
795		} else
796			error = ether_ioctl(ifp, command, data);
797		break;
798	case SIOCSIFMTU:
799	    {
800		int max_frame_size;
801
802		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
803
804		IGB_CORE_LOCK(adapter);
805		max_frame_size = 9234;
806		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
807		    ETHER_CRC_LEN) {
808			IGB_CORE_UNLOCK(adapter);
809			error = EINVAL;
810			break;
811		}
812
813		ifp->if_mtu = ifr->ifr_mtu;
814		adapter->max_frame_size =
815		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
816		igb_init_locked(adapter);
817		IGB_CORE_UNLOCK(adapter);
818		break;
819	    }
820	case SIOCSIFFLAGS:
821		IOCTL_DEBUGOUT("ioctl rcv'd:\
822		    SIOCSIFFLAGS (Set Interface Flags)");
823		IGB_CORE_LOCK(adapter);
824		if (ifp->if_flags & IFF_UP) {
825			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
826				if ((ifp->if_flags ^ adapter->if_flags) &
827				    (IFF_PROMISC | IFF_ALLMULTI)) {
828					igb_disable_promisc(adapter);
829					igb_set_promisc(adapter);
830				}
831			} else
832				igb_init_locked(adapter);
833		} else
834			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
835				igb_stop(adapter);
836		adapter->if_flags = ifp->if_flags;
837		IGB_CORE_UNLOCK(adapter);
838		break;
839	case SIOCADDMULTI:
840	case SIOCDELMULTI:
841		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
842		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
843			IGB_CORE_LOCK(adapter);
844			igb_disable_intr(adapter);
845			igb_set_multi(adapter);
846#ifdef DEVICE_POLLING
847			if (!(ifp->if_capenable & IFCAP_POLLING))
848#endif
849				igb_enable_intr(adapter);
850			IGB_CORE_UNLOCK(adapter);
851		}
852		break;
853	case SIOCSIFMEDIA:
854		/* Check SOL/IDER usage */
855		IGB_CORE_LOCK(adapter);
856		if (e1000_check_reset_block(&adapter->hw)) {
857			IGB_CORE_UNLOCK(adapter);
858			device_printf(adapter->dev, "Media change is"
859			    " blocked due to SOL/IDER session.\n");
860			break;
861		}
862		IGB_CORE_UNLOCK(adapter);
863	case SIOCGIFMEDIA:
864		IOCTL_DEBUGOUT("ioctl rcv'd: \
865		    SIOCxIFMEDIA (Get/Set Interface Media)");
866		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
867		break;
868	case SIOCSIFCAP:
869	    {
870		int mask, reinit;
871
872		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
873		reinit = 0;
874		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
875#ifdef DEVICE_POLLING
876		if (mask & IFCAP_POLLING) {
877			if (ifr->ifr_reqcap & IFCAP_POLLING) {
878				error = ether_poll_register(igb_poll, ifp);
879				if (error)
880					return (error);
881				IGB_CORE_LOCK(adapter);
882				igb_disable_intr(adapter);
883				ifp->if_capenable |= IFCAP_POLLING;
884				IGB_CORE_UNLOCK(adapter);
885			} else {
886				error = ether_poll_deregister(ifp);
887				/* Enable interrupt even in error case */
888				IGB_CORE_LOCK(adapter);
889				igb_enable_intr(adapter);
890				ifp->if_capenable &= ~IFCAP_POLLING;
891				IGB_CORE_UNLOCK(adapter);
892			}
893		}
894#endif
895		if (mask & IFCAP_HWCSUM) {
896			ifp->if_capenable ^= IFCAP_HWCSUM;
897			reinit = 1;
898		}
899		if (mask & IFCAP_TSO4) {
900			ifp->if_capenable ^= IFCAP_TSO4;
901			reinit = 1;
902		}
903		if (mask & IFCAP_VLAN_HWTAGGING) {
904			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
905			reinit = 1;
906		}
907		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
908			igb_init(adapter);
909		VLAN_CAPABILITIES(ifp);
910		break;
911	    }
912
913
914	default:
915		error = ether_ioctl(ifp, command, data);
916		break;
917	}
918
919	return (error);
920}
921
922/*********************************************************************
923 *  Watchdog timer:
924 *
925 *  This routine is called from the local timer every second.
926 *  As long as transmit descriptors are being cleaned the value
927 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
928 *  and we then reset the device.
929 *
930 **********************************************************************/
931
932static void
933igb_watchdog(struct adapter *adapter)
934{
935	struct tx_ring	*txr = adapter->tx_rings;
936	bool		tx_hang = FALSE;
937
938	IGB_CORE_LOCK_ASSERT(adapter);
939
940	/*
941	** The timer is set to 5 every time start() queues a packet.
942	** Then txeof keeps resetting it as long as it cleans at
943	** least one descriptor.
944	** Finally, anytime all descriptors are clean the timer is
945	** set to 0.
946	**
947	** With TX Multiqueue we need to check every queue's timer,
948	** if any time out we do the reset.
949	*/
950	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
951		IGB_TX_LOCK(txr);
952		if (txr->watchdog_timer == 0 ||
953		    (--txr->watchdog_timer)) {
954			IGB_TX_UNLOCK(txr);
955			continue;
956		} else {
957			tx_hang = TRUE;
958			IGB_TX_UNLOCK(txr);
959			break;
960		}
961	}
962	if (tx_hang == FALSE)
963		return;
964
965	/* If we are in this routine because of pause frames, then
966	 * don't reset the hardware.
967	 */
968	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
969	    E1000_STATUS_TXOFF) {
970		txr = adapter->tx_rings; /* reset pointer */
971		for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
972			IGB_TX_LOCK(txr);
973			txr->watchdog_timer = IGB_TX_TIMEOUT;
974			IGB_TX_UNLOCK(txr);
975		}
976		return;
977	}
978
979	if (e1000_check_for_link(&adapter->hw) == 0)
980		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
981
982	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
983		device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
984		    i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
985		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
986		device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
987		    " Next Desc to Clean = %d\n", i, txr->tx_avail,
988		    txr->next_to_clean);
989	}
990
991	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
992	adapter->watchdog_events++;
993
994	igb_init_locked(adapter);
995}
996
997/*********************************************************************
998 *  Init entry point
999 *
1000 *  This routine is used in two ways. It is used by the stack as
1001 *  init entry point in network interface structure. It is also used
1002 *  by the driver as a hw/sw initialization routine to get to a
1003 *  consistent state.
1004 *
1005 *  return 0 on success, positive on failure
1006 **********************************************************************/
1007
1008static void
1009igb_init_locked(struct adapter *adapter)
1010{
1011	struct ifnet	*ifp = adapter->ifp;
1012	device_t	dev = adapter->dev;
1013	u32		pba = 0;
1014
1015	INIT_DEBUGOUT("igb_init: begin");
1016
1017	IGB_CORE_LOCK_ASSERT(adapter);
1018
1019	igb_stop(adapter);
1020
1021	/*
1022	 * Packet Buffer Allocation (PBA)
1023	 * Writing PBA sets the receive portion of the buffer
1024	 * the remainder is used for the transmit buffer.
1025	 */
1026	if (adapter->hw.mac.type == e1000_82575) {
1027		INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1028		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1029		E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1030	}
1031
1032	/* Get the latest mac address, User can use a LAA */
1033        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1034              ETHER_ADDR_LEN);
1035
1036	/* Put the address into the Receive Address Array */
1037	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1038
1039	/* Initialize the hardware */
1040	if (igb_hardware_init(adapter)) {
1041		device_printf(dev, "Unable to initialize the hardware\n");
1042		return;
1043	}
1044	igb_update_link_status(adapter);
1045
1046	/* Setup VLAN support, basic and offload if available */
1047	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1048	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1049		igb_enable_hw_vlans(adapter);
1050
1051	/* Set hardware offload abilities */
1052	ifp->if_hwassist = 0;
1053	if (ifp->if_capenable & IFCAP_TXCSUM)
1054		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1055	if (ifp->if_capenable & IFCAP_TSO4)
1056		ifp->if_hwassist |= CSUM_TSO;
1057
1058	/* Configure for OS presence */
1059	igb_init_manageability(adapter);
1060
1061	/* Prepare transmit descriptors and buffers */
1062	igb_setup_transmit_structures(adapter);
1063	igb_initialize_transmit_units(adapter);
1064
1065	/* Setup Multicast table */
1066	igb_set_multi(adapter);
1067
1068	/* Prepare receive descriptors and buffers */
1069	if (igb_setup_receive_structures(adapter)) {
1070		device_printf(dev, "Could not setup receive structures\n");
1071		igb_stop(adapter);
1072		return;
1073	}
1074	igb_initialize_receive_units(adapter);
1075
1076	/* Don't lose promiscuous settings */
1077	igb_set_promisc(adapter);
1078
1079	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1080	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1081
1082	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1083	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1084
1085	if (adapter->msix > 1) /* Set up queue routing */
1086		igb_configure_queues(adapter);
1087
1088#ifdef DEVICE_POLLING
1089	/*
1090	 * Only enable interrupts if we are not polling, make sure
1091	 * they are off otherwise.
1092	 */
1093	if (ifp->if_capenable & IFCAP_POLLING)
1094		igb_disable_intr(adapter);
1095	else
1096#endif /* DEVICE_POLLING */
1097	{
1098		/* this clears any pending interrupts */
1099		E1000_READ_REG(&adapter->hw, E1000_ICR);
1100		igb_enable_intr(adapter);
1101		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1102	}
1103
1104
1105	/* Don't reset the phy next time init gets called */
1106	adapter->hw.phy.reset_disable = TRUE;
1107}
1108
1109static void
1110igb_init(void *arg)
1111{
1112	struct adapter *adapter = arg;
1113
1114	IGB_CORE_LOCK(adapter);
1115	igb_init_locked(adapter);
1116	IGB_CORE_UNLOCK(adapter);
1117}
1118
1119
1120#ifdef DEVICE_POLLING
1121/*********************************************************************
1122 *
1123 *  Legacy polling routine
1124 *
1125 *********************************************************************/
1126static void
1127igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1128{
1129	struct adapter *adapter = ifp->if_softc;
1130	struct rx_ring *rxr = adapter->rx_rings;
1131	struct tx_ring *txr = adapter->tx_rings;
1132	uint32_t reg_icr;
1133
1134	IGB_CORE_LOCK(adapter);
1135	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1136		IGB_CORE_UNLOCK(adapter);
1137		return;
1138	}
1139
1140	if (cmd == POLL_AND_CHECK_STATUS) {
1141		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1142		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1143			callout_stop(&adapter->timer);
1144			adapter->hw.mac.get_link_status = 1;
1145			igb_update_link_status(adapter);
1146			callout_reset(&adapter->timer, hz,
1147			    igb_local_timer, adapter);
1148		}
1149	}
1150	igb_rxeof(rxr, count);
1151	IGB_CORE_UNLOCK(adapter);
1152
1153	/* With polling we cannot do multiqueue */
1154	IGB_TX_LOCK(txr);
1155	igb_txeof(txr);
1156
1157	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1158		igb_start_locked(txr, ifp);
1159	IGB_TX_UNLOCK(txr);
1160}
1161#endif /* DEVICE_POLLING */
1162
1163
1164static void
1165igb_handle_link(void *context, int pending)
1166{
1167	struct adapter	*adapter = context;
1168	struct ifnet *ifp;
1169
1170	ifp = adapter->ifp;
1171
1172	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1173		return;
1174
1175	IGB_CORE_LOCK(adapter);
1176	callout_stop(&adapter->timer);
1177	igb_update_link_status(adapter);
1178	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1179	IGB_CORE_UNLOCK(adapter);
1180}
1181
1182static void
1183igb_handle_rxtx(void *context, int pending)
1184{
1185	struct adapter	*adapter = context;
1186	struct tx_ring	*txr = adapter->tx_rings;
1187	struct rx_ring	*rxr = adapter->rx_rings;
1188	struct ifnet	*ifp;
1189
1190	ifp = adapter->ifp;
1191
1192	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1193		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1194			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1195		IGB_TX_LOCK(txr);
1196		igb_txeof(txr);
1197
1198		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1199			igb_start_locked(txr, ifp);
1200		IGB_TX_UNLOCK(txr);
1201	}
1202
1203	igb_enable_intr(adapter);
1204}
1205
1206static void
1207igb_handle_rx(void *context, int pending)
1208{
1209	struct rx_ring	*rxr = context;
1210	struct adapter	*adapter = rxr->adapter;
1211	struct ifnet	*ifp = adapter->ifp;
1212
1213	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1214		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1215			/* More to clean, schedule another task */
1216			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1217
1218}
1219
1220static void
1221igb_handle_tx(void *context, int pending)
1222{
1223	struct tx_ring	*txr = context;
1224	struct adapter	*adapter = txr->adapter;
1225	struct ifnet	*ifp = adapter->ifp;
1226
1227	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1228		IGB_TX_LOCK(txr);
1229		igb_txeof(txr);
1230		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1231			igb_start_locked(txr, ifp);
1232		IGB_TX_UNLOCK(txr);
1233	}
1234}
1235
1236
1237/*********************************************************************
1238 *
1239 *  MSI/Legacy Deferred
1240 *  Interrupt Service routine
1241 *
1242 *********************************************************************/
1243static int
1244igb_irq_fast(void *arg)
1245{
1246	struct adapter	*adapter = arg;
1247	struct ifnet	*ifp = adapter->ifp;
1248	uint32_t	reg_icr;
1249
1250	/* Should not happen, but... */
1251	if (ifp->if_capenable & IFCAP_POLLING)
1252                return FILTER_STRAY;
1253
1254	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1255
1256	/* Hot eject?  */
1257	if (reg_icr == 0xffffffff)
1258		return FILTER_STRAY;
1259
1260	/* Definitely not our interrupt.  */
1261	if (reg_icr == 0x0)
1262		return FILTER_STRAY;
1263
1264	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1265		return FILTER_STRAY;
1266
1267	/*
1268	 * Mask interrupts until the taskqueue is finished running.  This is
1269	 * cheap, just assume that it is needed.  This also works around the
1270	 * MSI message reordering errata on certain systems.
1271	 */
1272	igb_disable_intr(adapter);
1273	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1274
1275	/* Link status change */
1276	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1277		adapter->hw.mac.get_link_status = 1;
1278		taskqueue_enqueue(adapter->tq, &adapter->link_task);
1279	}
1280
1281	if (reg_icr & E1000_ICR_RXO)
1282		adapter->rx_overruns++;
1283	return FILTER_HANDLED;
1284}
1285
1286
1287/*********************************************************************
1288 *
1289 *  MSIX TX Interrupt Service routine
1290 *
1291 **********************************************************************/
1292
1293static void
1294igb_msix_tx(void *arg)
1295{
1296	struct tx_ring *txr = arg;
1297	struct adapter *adapter = txr->adapter;
1298	struct ifnet	*ifp = adapter->ifp;
1299
1300	++txr->tx_irq;
1301	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1302		IGB_TX_LOCK(txr);
1303		igb_txeof(txr);
1304		IGB_TX_UNLOCK(txr);
1305		taskqueue_enqueue(adapter->tq, &txr->tx_task);
1306	}
1307	/* Reenable this interrupt */
1308	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1309	return;
1310}
1311
1312/*********************************************************************
1313 *
1314 *  MSIX RX Interrupt Service routine
1315 *
1316 **********************************************************************/
1317
1318static void
1319igb_msix_rx(void *arg)
1320{
1321	struct rx_ring *rxr = arg;
1322	struct adapter *adapter = rxr->adapter;
1323	struct ifnet	*ifp = adapter->ifp;
1324
1325	++rxr->rx_irq;
1326	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1327		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1328			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1329	/* Reenable this interrupt */
1330	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1331	return;
1332}
1333
1334/*********************************************************************
1335 *
1336 *  MSIX Link Interrupt Service routine
1337 *
1338 **********************************************************************/
1339
1340static void
1341igb_msix_link(void *arg)
1342{
1343	struct adapter	*adapter = arg;
1344	u32       	icr;
1345
1346	++adapter->link_irq;
1347	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1348	if (!(icr & E1000_ICR_LSC))
1349		goto spurious;
1350	adapter->hw.mac.get_link_status = 1;
1351	taskqueue_enqueue(adapter->tq, &adapter->link_task);
1352
1353spurious:
1354	/* Rearm */
1355	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1356	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1357	return;
1358}
1359
1360
1361/*********************************************************************
1362 *
1363 *  Media Ioctl callback
1364 *
1365 *  This routine is called whenever the user queries the status of
1366 *  the interface using ifconfig.
1367 *
1368 **********************************************************************/
1369static void
1370igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1371{
1372	struct adapter *adapter = ifp->if_softc;
1373	u_char fiber_type = IFM_1000_SX;
1374
1375	INIT_DEBUGOUT("igb_media_status: begin");
1376
1377	IGB_CORE_LOCK(adapter);
1378	igb_update_link_status(adapter);
1379
1380	ifmr->ifm_status = IFM_AVALID;
1381	ifmr->ifm_active = IFM_ETHER;
1382
1383	if (!adapter->link_active) {
1384		IGB_CORE_UNLOCK(adapter);
1385		return;
1386	}
1387
1388	ifmr->ifm_status |= IFM_ACTIVE;
1389
1390	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1391	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1392		ifmr->ifm_active |= fiber_type | IFM_FDX;
1393	else {
1394		switch (adapter->link_speed) {
1395		case 10:
1396			ifmr->ifm_active |= IFM_10_T;
1397			break;
1398		case 100:
1399			ifmr->ifm_active |= IFM_100_TX;
1400			break;
1401		case 1000:
1402			ifmr->ifm_active |= IFM_1000_T;
1403			break;
1404		}
1405		if (adapter->link_duplex == FULL_DUPLEX)
1406			ifmr->ifm_active |= IFM_FDX;
1407		else
1408			ifmr->ifm_active |= IFM_HDX;
1409	}
1410	IGB_CORE_UNLOCK(adapter);
1411}
1412
1413/*********************************************************************
1414 *
1415 *  Media Ioctl callback
1416 *
1417 *  This routine is called when the user changes speed/duplex using
1418 *  media/mediopt option with ifconfig.
1419 *
1420 **********************************************************************/
1421static int
1422igb_media_change(struct ifnet *ifp)
1423{
1424	struct adapter *adapter = ifp->if_softc;
1425	struct ifmedia  *ifm = &adapter->media;
1426
1427	INIT_DEBUGOUT("igb_media_change: begin");
1428
1429	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1430		return (EINVAL);
1431
1432	IGB_CORE_LOCK(adapter);
1433	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1434	case IFM_AUTO:
1435		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1436		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1437		break;
1438	case IFM_1000_LX:
1439	case IFM_1000_SX:
1440	case IFM_1000_T:
1441		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1442		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1443		break;
1444	case IFM_100_TX:
1445		adapter->hw.mac.autoneg = FALSE;
1446		adapter->hw.phy.autoneg_advertised = 0;
1447		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1448			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1449		else
1450			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1451		break;
1452	case IFM_10_T:
1453		adapter->hw.mac.autoneg = FALSE;
1454		adapter->hw.phy.autoneg_advertised = 0;
1455		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1456			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1457		else
1458			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1459		break;
1460	default:
1461		device_printf(adapter->dev, "Unsupported media type\n");
1462	}
1463
1464	/* As the speed/duplex settings my have changed we need to
1465	 * reset the PHY.
1466	 */
1467	adapter->hw.phy.reset_disable = FALSE;
1468
1469	igb_init_locked(adapter);
1470	IGB_CORE_UNLOCK(adapter);
1471
1472	return (0);
1473}
1474
1475
1476/*********************************************************************
1477 *
1478 *  This routine maps the mbufs to Advanced TX descriptors.
1479 *  used by the 82575 adapter.
1480 *
1481 **********************************************************************/
1482
1483static int
1484igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1485{
1486	struct adapter		*adapter = txr->adapter;
1487	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1488	bus_dmamap_t		map;
1489	struct igb_buffer	*tx_buffer, *tx_buffer_mapped;
1490	union e1000_adv_tx_desc	*txd = NULL;
1491	struct mbuf		*m_head;
1492	u32			olinfo_status = 0, cmd_type_len = 0;
1493	int			nsegs, i, j, error, first, last = 0;
1494	u32			hdrlen = 0, offload = 0;
1495
1496	m_head = *m_headp;
1497
1498
1499	/* Set basic descriptor constants */
1500	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1501	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1502	if (m_head->m_flags & M_VLANTAG)
1503		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1504
1505        /*
1506         * Force a cleanup if number of TX descriptors
1507         * available hits the threshold
1508         */
1509	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1510		igb_txeof(txr);
1511		/* Now do we at least have a minimal? */
1512		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1513			txr->no_desc_avail++;
1514			return (ENOBUFS);
1515		}
1516	}
1517
1518	/*
1519         * Map the packet for DMA.
1520	 *
1521	 * Capture the first descriptor index,
1522	 * this descriptor will have the index
1523	 * of the EOP which is the only one that
1524	 * now gets a DONE bit writeback.
1525	 */
1526	first = txr->next_avail_desc;
1527	tx_buffer = &txr->tx_buffers[first];
1528	tx_buffer_mapped = tx_buffer;
1529	map = tx_buffer->map;
1530
1531	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1532	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1533
1534	if (error == EFBIG) {
1535		struct mbuf *m;
1536
1537		m = m_defrag(*m_headp, M_DONTWAIT);
1538		if (m == NULL) {
1539			adapter->mbuf_alloc_failed++;
1540			m_freem(*m_headp);
1541			*m_headp = NULL;
1542			return (ENOBUFS);
1543		}
1544		*m_headp = m;
1545
1546		/* Try it again */
1547		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1548		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1549
1550		if (error == ENOMEM) {
1551			adapter->no_tx_dma_setup++;
1552			return (error);
1553		} else if (error != 0) {
1554			adapter->no_tx_dma_setup++;
1555			m_freem(*m_headp);
1556			*m_headp = NULL;
1557			return (error);
1558		}
1559	} else if (error == ENOMEM) {
1560		adapter->no_tx_dma_setup++;
1561		return (error);
1562	} else if (error != 0) {
1563		adapter->no_tx_dma_setup++;
1564		m_freem(*m_headp);
1565		*m_headp = NULL;
1566		return (error);
1567	}
1568
1569	/* Check again to be sure we have enough descriptors */
1570        if (nsegs > (txr->tx_avail - 2)) {
1571                txr->no_desc_avail++;
1572		bus_dmamap_unload(txr->txtag, map);
1573		return (ENOBUFS);
1574        }
1575	m_head = *m_headp;
1576
1577        /*
1578         * Set up the context descriptor:
1579         * used when any hardware offload is done.
1580	 * This includes CSUM, VLAN, and TSO. It
1581	 * will use the first descriptor.
1582         */
1583        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1584		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1585			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1586			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1587			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1588		} else
1589			return (ENXIO);
1590	} else
1591		/* Do all other context descriptor setup */
1592	offload = igb_tx_ctx_setup(txr, m_head);
1593	if (offload == TRUE)
1594		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1595	/* Calculate payload length */
1596	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1597	    << E1000_ADVTXD_PAYLEN_SHIFT);
1598
1599	/* Set up our transmit descriptors */
1600	i = txr->next_avail_desc;
1601	for (j = 0; j < nsegs; j++) {
1602		bus_size_t seg_len;
1603		bus_addr_t seg_addr;
1604
1605		tx_buffer = &txr->tx_buffers[i];
1606		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1607		seg_addr = segs[j].ds_addr;
1608		seg_len  = segs[j].ds_len;
1609
1610		txd->read.buffer_addr = htole64(seg_addr);
1611		txd->read.cmd_type_len = htole32(
1612		    adapter->txd_cmd | cmd_type_len | seg_len);
1613		txd->read.olinfo_status = htole32(olinfo_status);
1614		last = i;
1615		if (++i == adapter->num_tx_desc)
1616			i = 0;
1617		tx_buffer->m_head = NULL;
1618		tx_buffer->next_eop = -1;
1619	}
1620
1621	txr->next_avail_desc = i;
1622	txr->tx_avail -= nsegs;
1623
1624        tx_buffer->m_head = m_head;
1625	tx_buffer_mapped->map = tx_buffer->map;
1626	tx_buffer->map = map;
1627        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1628
1629        /*
1630         * Last Descriptor of Packet
1631	 * needs End Of Packet (EOP)
1632	 * and Report Status (RS)
1633         */
1634        txd->read.cmd_type_len |=
1635	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1636	/*
1637	 * Keep track in the first buffer which
1638	 * descriptor will be written back
1639	 */
1640	tx_buffer = &txr->tx_buffers[first];
1641	tx_buffer->next_eop = last;
1642
1643	/*
1644	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1645	 * that this frame is available to transmit.
1646	 */
1647	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1648	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1649	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1650	++txr->tx_packets;
1651
1652	return (0);
1653
1654}
1655
1656static void
1657igb_set_promisc(struct adapter *adapter)
1658{
1659	struct ifnet	*ifp = adapter->ifp;
1660	uint32_t	reg_rctl;
1661
1662	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1663
1664	if (ifp->if_flags & IFF_PROMISC) {
1665		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1666		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1667	} else if (ifp->if_flags & IFF_ALLMULTI) {
1668		reg_rctl |= E1000_RCTL_MPE;
1669		reg_rctl &= ~E1000_RCTL_UPE;
1670		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1671	}
1672}
1673
1674static void
1675igb_disable_promisc(struct adapter *adapter)
1676{
1677	uint32_t	reg_rctl;
1678
1679	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1680
1681	reg_rctl &=  (~E1000_RCTL_UPE);
1682	reg_rctl &=  (~E1000_RCTL_MPE);
1683	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1684}
1685
1686
1687/*********************************************************************
1688 *  Multicast Update
1689 *
1690 *  This routine is called whenever multicast address list is updated.
1691 *
1692 **********************************************************************/
1693
1694static void
1695igb_set_multi(struct adapter *adapter)
1696{
1697	struct ifnet	*ifp = adapter->ifp;
1698	struct ifmultiaddr *ifma;
1699	uint32_t reg_rctl = 0;
1700	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
1701	int mcnt = 0;
1702
1703	IOCTL_DEBUGOUT("igb_set_multi: begin");
1704
1705	IF_ADDR_LOCK(ifp);
1706	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1707		if (ifma->ifma_addr->sa_family != AF_LINK)
1708			continue;
1709
1710		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1711			break;
1712
1713		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1714		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1715		mcnt++;
1716	}
1717	IF_ADDR_UNLOCK(ifp);
1718
1719	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1720		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1721		reg_rctl |= E1000_RCTL_MPE;
1722		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1723	} else
1724		e1000_update_mc_addr_list(&adapter->hw, mta,
1725		    mcnt, 1, adapter->hw.mac.rar_entry_count);
1726}
1727
1728
1729/*********************************************************************
1730 *  Timer routine
1731 *
1732 *  This routine checks for link status and updates statistics.
1733 *
1734 **********************************************************************/
1735
1736static void
1737igb_local_timer(void *arg)
1738{
1739	struct adapter	*adapter = arg;
1740	struct ifnet	*ifp = adapter->ifp;
1741
1742	IGB_CORE_LOCK_ASSERT(adapter);
1743
1744	igb_update_link_status(adapter);
1745	igb_update_stats_counters(adapter);
1746
1747	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1748		igb_print_hw_stats(adapter);
1749
1750	/*
1751	 * Each second we check the watchdog to
1752	 * protect against hardware hangs.
1753	 */
1754	igb_watchdog(adapter);
1755
1756	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1757
1758}
1759
1760static void
1761igb_update_link_status(struct adapter *adapter)
1762{
1763	struct e1000_hw *hw = &adapter->hw;
1764	struct ifnet *ifp = adapter->ifp;
1765	device_t dev = adapter->dev;
1766	struct tx_ring *txr = adapter->tx_rings;
1767	u32 link_check = 0;
1768
1769	/* Get the cached link value or read for real */
1770        switch (hw->phy.media_type) {
1771        case e1000_media_type_copper:
1772                if (hw->mac.get_link_status) {
1773			/* Do the work to read phy */
1774                        e1000_check_for_link(hw);
1775                        link_check = !hw->mac.get_link_status;
1776                } else
1777                        link_check = TRUE;
1778                break;
1779        case e1000_media_type_fiber:
1780                e1000_check_for_link(hw);
1781                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1782                                 E1000_STATUS_LU);
1783                break;
1784        case e1000_media_type_internal_serdes:
1785                e1000_check_for_link(hw);
1786                link_check = adapter->hw.mac.serdes_has_link;
1787                break;
1788        default:
1789        case e1000_media_type_unknown:
1790                break;
1791        }
1792
1793	/* Now we check if a transition has happened */
1794	if (link_check && (adapter->link_active == 0)) {
1795		e1000_get_speed_and_duplex(&adapter->hw,
1796		    &adapter->link_speed, &adapter->link_duplex);
1797		if (bootverbose)
1798			device_printf(dev, "Link is up %d Mbps %s\n",
1799			    adapter->link_speed,
1800			    ((adapter->link_duplex == FULL_DUPLEX) ?
1801			    "Full Duplex" : "Half Duplex"));
1802		adapter->link_active = 1;
1803		ifp->if_baudrate = adapter->link_speed * 1000000;
1804		if_link_state_change(ifp, LINK_STATE_UP);
1805	} else if (!link_check && (adapter->link_active == 1)) {
1806		ifp->if_baudrate = adapter->link_speed = 0;
1807		adapter->link_duplex = 0;
1808		if (bootverbose)
1809			device_printf(dev, "Link is Down\n");
1810		adapter->link_active = 0;
1811		if_link_state_change(ifp, LINK_STATE_DOWN);
1812		/* Turn off watchdogs */
1813		for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
1814			txr->watchdog_timer = FALSE;
1815	}
1816}
1817
1818/*********************************************************************
1819 *
1820 *  This routine disables all traffic on the adapter by issuing a
1821 *  global reset on the MAC and deallocates TX/RX buffers.
1822 *
1823 **********************************************************************/
1824
1825static void
1826igb_stop(void *arg)
1827{
1828	struct adapter	*adapter = arg;
1829	struct ifnet	*ifp = adapter->ifp;
1830
1831	IGB_CORE_LOCK_ASSERT(adapter);
1832
1833	INIT_DEBUGOUT("igb_stop: begin");
1834
1835	igb_disable_intr(adapter);
1836
1837	callout_stop(&adapter->timer);
1838
1839	/* Tell the stack that the interface is no longer active */
1840	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1841
1842
1843	e1000_reset_hw(&adapter->hw);
1844	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
1845}
1846
1847
1848/*********************************************************************
1849 *
1850 *  Determine hardware revision.
1851 *
1852 **********************************************************************/
1853static void
1854igb_identify_hardware(struct adapter *adapter)
1855{
1856	device_t dev = adapter->dev;
1857
1858	/* Make sure our PCI config space has the necessary stuff set */
1859	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
1860	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
1861	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
1862		device_printf(dev, "Memory Access and/or Bus Master bits "
1863		    "were not set!\n");
1864		adapter->hw.bus.pci_cmd_word |=
1865		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
1866		pci_write_config(dev, PCIR_COMMAND,
1867		    adapter->hw.bus.pci_cmd_word, 2);
1868	}
1869
1870	/* Save off the information about this board */
1871	adapter->hw.vendor_id = pci_get_vendor(dev);
1872	adapter->hw.device_id = pci_get_device(dev);
1873	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
1874	adapter->hw.subsystem_vendor_id =
1875	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
1876	adapter->hw.subsystem_device_id =
1877	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
1878
1879	/* Do Shared Code Init and Setup */
1880	if (e1000_set_mac_type(&adapter->hw)) {
1881		device_printf(dev, "Setup init failure\n");
1882		return;
1883	}
1884}
1885
1886static int
1887igb_allocate_pci_resources(struct adapter *adapter)
1888{
1889	device_t	dev = adapter->dev;
1890	int		rid, error = 0;
1891
1892	rid = PCIR_BAR(0);
1893	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
1894	    &rid, RF_ACTIVE);
1895	if (adapter->pci_mem == NULL) {
1896		device_printf(dev, "Unable to allocate bus resource: memory\n");
1897		return (ENXIO);
1898	}
1899	adapter->osdep.mem_bus_space_tag =
1900	    rman_get_bustag(adapter->pci_mem);
1901	adapter->osdep.mem_bus_space_handle =
1902	    rman_get_bushandle(adapter->pci_mem);
1903	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
1904
1905	/*
1906	** Init the resource arrays
1907	*/
1908	for (int i = 0; i < IGB_MSIX_VEC; i++) {
1909		adapter->rid[i] = i + 1; /* MSI/X RID starts at 1 */
1910		adapter->tag[i] = NULL;
1911		adapter->res[i] = NULL;
1912	}
1913
1914	adapter->num_tx_queues = 1; /* Defaults for Legacy or MSI */
1915	adapter->num_rx_queues = 1;
1916
1917	/* This will setup either MSI/X or MSI */
1918	adapter->msix = igb_setup_msix(adapter);
1919
1920	adapter->hw.back = &adapter->osdep;
1921
1922	return (error);
1923}
1924
1925/*********************************************************************
1926 *
1927 *  Setup the Legacy or MSI Interrupt handler
1928 *
1929 **********************************************************************/
1930static int
1931igb_allocate_legacy(struct adapter *adapter)
1932{
1933	device_t dev = adapter->dev;
1934	int error;
1935
1936	/* Turn off all interrupts */
1937	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
1938
1939	/* Legacy RID at 0 */
1940	if (adapter->msix == 0)
1941		adapter->rid[0] = 0;
1942
1943	/* We allocate a single interrupt resource */
1944	adapter->res[0] = bus_alloc_resource_any(dev,
1945	    SYS_RES_IRQ, &adapter->rid[0], RF_SHAREABLE | RF_ACTIVE);
1946	if (adapter->res[0] == NULL) {
1947		device_printf(dev, "Unable to allocate bus resource: "
1948		    "interrupt\n");
1949		return (ENXIO);
1950	}
1951
1952	/*
1953	 * Try allocating a fast interrupt and the associated deferred
1954	 * processing contexts.
1955	 */
1956	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
1957	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
1958	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
1959	    taskqueue_thread_enqueue, &adapter->tq);
1960	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
1961	    device_get_nameunit(adapter->dev));
1962	if ((error = bus_setup_intr(dev, adapter->res[0],
1963	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter,
1964	    &adapter->tag[0])) != 0) {
1965		device_printf(dev, "Failed to register fast interrupt "
1966			    "handler: %d\n", error);
1967		taskqueue_free(adapter->tq);
1968		adapter->tq = NULL;
1969		return (error);
1970	}
1971
1972	return (0);
1973}
1974
1975
1976/*********************************************************************
1977 *
1978 *  Setup the MSIX Interrupt handlers:
1979 *
1980 **********************************************************************/
1981static int
1982igb_allocate_msix(struct adapter *adapter)
1983{
1984	device_t dev = adapter->dev;
1985	struct tx_ring *txr = adapter->tx_rings;
1986	struct rx_ring *rxr = adapter->rx_rings;
1987	int error, vector = 0;
1988
1989	/*
1990	 * Setup the interrupt handlers
1991	 */
1992
1993	/* TX Setup */
1994	for (int i = 0; i < adapter->num_tx_queues; i++, vector++, txr++) {
1995		adapter->res[vector] = bus_alloc_resource_any(dev,
1996		    SYS_RES_IRQ, &adapter->rid[vector],
1997		    RF_SHAREABLE | RF_ACTIVE);
1998		if (adapter->res[vector] == NULL) {
1999			device_printf(dev,
2000			    "Unable to allocate bus resource: "
2001			    "MSIX TX Interrupt\n");
2002			return (ENXIO);
2003		}
2004		error = bus_setup_intr(dev, adapter->res[vector],
2005	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_tx,
2006		    txr, &adapter->tag[vector]);
2007		if (error) {
2008			adapter->res[vector] = NULL;
2009			device_printf(dev, "Failed to register TX handler");
2010			return (error);
2011		}
2012		/* Make tasklet for deferred handling - one per queue */
2013		TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2014		if (adapter->hw.mac.type == e1000_82575) {
2015			txr->eims = E1000_EICR_TX_QUEUE0 << i;
2016			/* MSIXBM registers start at 0 */
2017			txr->msix = adapter->rid[vector] - 1;
2018		} else {
2019			txr->eims = 1 << vector;
2020			txr->msix = vector;
2021		}
2022	}
2023
2024	/* RX Setup */
2025	for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rxr++) {
2026		adapter->res[vector] = bus_alloc_resource_any(dev,
2027		    SYS_RES_IRQ, &adapter->rid[vector],
2028		    RF_SHAREABLE | RF_ACTIVE);
2029		if (adapter->res[vector] == NULL) {
2030			device_printf(dev,
2031			    "Unable to allocate bus resource: "
2032			    "MSIX RX Interrupt\n");
2033			return (ENXIO);
2034		}
2035		error = bus_setup_intr(dev, adapter->res[vector],
2036	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_rx,
2037		    rxr, &adapter->tag[vector]);
2038		if (error) {
2039			adapter->res[vector] = NULL;
2040			device_printf(dev, "Failed to register RX handler");
2041			return (error);
2042		}
2043		TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2044		if (adapter->hw.mac.type == e1000_82575) {
2045			rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2046			rxr->msix = adapter->rid[vector] - 1;
2047		} else {
2048			rxr->eims = 1 << vector;
2049			rxr->msix = vector;
2050		}
2051	}
2052
2053	/* And Link */
2054	adapter->res[vector] = bus_alloc_resource_any(dev,
2055	    SYS_RES_IRQ, &adapter->rid[vector],
2056		    RF_SHAREABLE | RF_ACTIVE);
2057	if (adapter->res[vector] == NULL) {
2058		device_printf(dev,
2059		    "Unable to allocate bus resource: "
2060		    "MSIX Link Interrupt\n");
2061		return (ENXIO);
2062	}
2063	if ((error = bus_setup_intr(dev, adapter->res[vector],
2064	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link,
2065	    adapter, &adapter->tag[vector])) != 0) {
2066		device_printf(dev, "Failed to register Link handler");
2067		return (error);
2068	}
2069	if (adapter->hw.mac.type == e1000_82575)
2070		adapter->linkvec = adapter->rid[vector] - 1;
2071	else
2072		adapter->linkvec = vector;
2073
2074	/* Make tasklet for deferred link interrupt handling */
2075	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2076
2077	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2078	    taskqueue_thread_enqueue, &adapter->tq);
2079	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2080	    device_get_nameunit(adapter->dev));
2081
2082	return (0);
2083}
2084
2085static void
2086igb_configure_queues(struct adapter *adapter)
2087{
2088	struct	e1000_hw *hw = &adapter->hw;
2089	struct	tx_ring	*txr;
2090	struct	rx_ring	*rxr;
2091
2092	/* Turn on MSIX */
2093	{ /* 82575 */
2094		int tmp;
2095
2096                /* enable MSI-X PBA support*/
2097		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2098                tmp |= E1000_CTRL_EXT_PBA_CLR;
2099                /* Auto-Mask interrupts upon ICR read. */
2100                tmp |= E1000_CTRL_EXT_EIAME;
2101                tmp |= E1000_CTRL_EXT_IRCA;
2102                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2103
2104	 	/* Set the interrupt throttling rate. */
2105		for (int i = 0; i < IGB_MSIX_VEC; i++)
2106			E1000_WRITE_REG(&adapter->hw,
2107			    E1000_EITR(i), DEFAULT_ITR);
2108
2109		/* TX */
2110		for (int i = 0; i < adapter->num_tx_queues; i++) {
2111			txr = &adapter->tx_rings[i];
2112			E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2113			    txr->eims);
2114			adapter->eims_mask |= txr->eims;
2115		}
2116
2117		/* RX */
2118		for (int i = 0; i < adapter->num_rx_queues; i++) {
2119			rxr = &adapter->rx_rings[i];
2120			E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2121			    rxr->eims);
2122			adapter->eims_mask |= rxr->eims;
2123		}
2124
2125		/* Link */
2126		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2127		    E1000_EIMS_OTHER);
2128		adapter->link_mask |= E1000_EIMS_OTHER;
2129		adapter->eims_mask |= adapter->link_mask;
2130	}
2131	return;
2132}
2133
2134
2135static void
2136igb_free_pci_resources(struct adapter *adapter)
2137{
2138	device_t dev = adapter->dev;
2139
2140	/* Make sure the for loop below runs once */
2141	if (adapter->msix == 0)
2142		adapter->msix = 1;
2143
2144	/*
2145	 * First release all the interrupt resources:
2146	 *      notice that since these are just kept
2147	 *      in an array we can do the same logic
2148	 *      whether its MSIX or just legacy.
2149	 */
2150	for (int i = 0; i < adapter->msix; i++) {
2151		if (adapter->tag[i] != NULL) {
2152			bus_teardown_intr(dev, adapter->res[i],
2153			    adapter->tag[i]);
2154			adapter->tag[i] = NULL;
2155		}
2156		if (adapter->res[i] != NULL) {
2157			bus_release_resource(dev, SYS_RES_IRQ,
2158			    adapter->rid[i], adapter->res[i]);
2159		}
2160	}
2161
2162	if (adapter->msix)
2163		pci_release_msi(dev);
2164
2165	if (adapter->msix_mem != NULL)
2166		bus_release_resource(dev, SYS_RES_MEMORY,
2167		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2168
2169	if (adapter->pci_mem != NULL)
2170		bus_release_resource(dev, SYS_RES_MEMORY,
2171		    PCIR_BAR(0), adapter->pci_mem);
2172
2173}
2174
2175/*
2176 * Setup Either MSI/X or MSI
2177 */
2178static int
2179igb_setup_msix(struct adapter *adapter)
2180{
2181	device_t dev = adapter->dev;
2182	int rid, want, queues, msgs;
2183
2184	/* First try MSI/X */
2185	rid = PCIR_BAR(IGB_MSIX_BAR);
2186	adapter->msix_mem = bus_alloc_resource_any(dev,
2187	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2188       	if (!adapter->msix_mem) {
2189		/* May not be enabled */
2190		device_printf(adapter->dev,
2191		    "Unable to map MSIX table \n");
2192		goto msi;
2193	}
2194
2195	msgs = pci_msix_count(dev);
2196	if (msgs == 0) { /* system has msix disabled */
2197		bus_release_resource(dev, SYS_RES_MEMORY,
2198		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2199		adapter->msix_mem = NULL;
2200		goto msi;
2201	}
2202
2203	/* Limit by the number set in header */
2204	if (msgs > IGB_MSIX_VEC)
2205		msgs = IGB_MSIX_VEC;
2206
2207	/* Figure out a reasonable auto config value */
2208	queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2209
2210	if (igb_tx_queues == 0)
2211		igb_tx_queues = queues;
2212	if (igb_rx_queues == 0)
2213		igb_rx_queues = queues;
2214	want = igb_tx_queues + igb_rx_queues + 1;
2215	if (msgs >= want)
2216		msgs = want;
2217	else {
2218               	device_printf(adapter->dev,
2219		    "MSIX Configuration Problem, "
2220		    "%d vectors configured, but %d queues wanted!\n",
2221		    msgs, want);
2222		return (ENXIO);
2223	}
2224	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2225               	device_printf(adapter->dev,
2226		    "Using MSIX interrupts with %d vectors\n", msgs);
2227		adapter->num_tx_queues = igb_tx_queues;
2228		adapter->num_rx_queues = igb_rx_queues;
2229		return (msgs);
2230	}
2231msi:
2232       	msgs = pci_msi_count(dev);
2233       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2234               	device_printf(adapter->dev,"Using MSI interrupt\n");
2235	return (msgs);
2236}
2237
2238/*********************************************************************
2239 *
2240 *  Initialize the hardware to a configuration
2241 *  as specified by the adapter structure.
2242 *
2243 **********************************************************************/
2244static int
2245igb_hardware_init(struct adapter *adapter)
2246{
2247	device_t	dev = adapter->dev;
2248	u32		rx_buffer_size;
2249
2250	INIT_DEBUGOUT("igb_hardware_init: begin");
2251
2252	/* Issue a global reset */
2253	e1000_reset_hw(&adapter->hw);
2254
2255	/* Let the firmware know the OS is in control */
2256	igb_get_hw_control(adapter);
2257
2258	/*
2259	 * These parameters control the automatic generation (Tx) and
2260	 * response (Rx) to Ethernet PAUSE frames.
2261	 * - High water mark should allow for at least two frames to be
2262	 *   received after sending an XOFF.
2263	 * - Low water mark works best when it is very near the high water mark.
2264	 *   This allows the receiver to restart by sending XON when it has
2265	 *   drained a bit. Here we use an arbitary value of 1500 which will
2266	 *   restart after one full frame is pulled from the buffer. There
2267	 *   could be several smaller frames in the buffer and if so they will
2268	 *   not trigger the XON until their total number reduces the buffer
2269	 *   by 1500.
2270	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2271	 */
2272		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2273		    E1000_PBA) & 0xffff) << 10 );
2274
2275	adapter->hw.fc.high_water = rx_buffer_size -
2276	    roundup2(adapter->max_frame_size, 1024);
2277	adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2278
2279	adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2280	adapter->hw.fc.send_xon = TRUE;
2281	adapter->hw.fc.type = e1000_fc_full;
2282
2283	if (e1000_init_hw(&adapter->hw) < 0) {
2284		device_printf(dev, "Hardware Initialization Failed\n");
2285		return (EIO);
2286	}
2287
2288	e1000_check_for_link(&adapter->hw);
2289
2290	return (0);
2291}
2292
2293/*********************************************************************
2294 *
2295 *  Setup networking device structure and register an interface.
2296 *
2297 **********************************************************************/
2298static void
2299igb_setup_interface(device_t dev, struct adapter *adapter)
2300{
2301	struct ifnet   *ifp;
2302
2303	INIT_DEBUGOUT("igb_setup_interface: begin");
2304
2305	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2306	if (ifp == NULL)
2307		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2308	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2309	ifp->if_mtu = ETHERMTU;
2310	ifp->if_init =  igb_init;
2311	ifp->if_softc = adapter;
2312	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2313	ifp->if_ioctl = igb_ioctl;
2314	ifp->if_start = igb_start;
2315	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2316	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2317	IFQ_SET_READY(&ifp->if_snd);
2318
2319	ether_ifattach(ifp, adapter->hw.mac.addr);
2320
2321	ifp->if_capabilities = ifp->if_capenable = 0;
2322
2323	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2324	ifp->if_capabilities |= IFCAP_TSO4;
2325	ifp->if_capenable = ifp->if_capabilities;
2326
2327	/*
2328	 * Tell the upper layer(s) we support long frames.
2329	 */
2330	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2331	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2332	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2333
2334#ifdef DEVICE_POLLING
2335	if (adapter->msix > 1)
2336		device_printf(adapter->dev, "POLLING not supported with MSIX\n");
2337	else
2338		ifp->if_capabilities |= IFCAP_POLLING;
2339#endif
2340
2341	/*
2342	 * Specify the media types supported by this adapter and register
2343	 * callbacks to update media and link information
2344	 */
2345	ifmedia_init(&adapter->media, IFM_IMASK,
2346	    igb_media_change, igb_media_status);
2347	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2348	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2349		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2350			    0, NULL);
2351		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2352	} else {
2353		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2354		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2355			    0, NULL);
2356		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2357			    0, NULL);
2358		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2359			    0, NULL);
2360		if (adapter->hw.phy.type != e1000_phy_ife) {
2361			ifmedia_add(&adapter->media,
2362				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2363			ifmedia_add(&adapter->media,
2364				IFM_ETHER | IFM_1000_T, 0, NULL);
2365		}
2366	}
2367	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2368	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2369}
2370
2371
2372/*
2373 * Manage DMA'able memory.
2374 */
2375static void
2376igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2377{
2378	if (error)
2379		return;
2380	*(bus_addr_t *) arg = segs[0].ds_addr;
2381}
2382
2383static int
2384igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2385        struct igb_dma_alloc *dma, int mapflags)
2386{
2387	int error;
2388
2389	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2390				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2391				BUS_SPACE_MAXADDR,	/* lowaddr */
2392				BUS_SPACE_MAXADDR,	/* highaddr */
2393				NULL, NULL,		/* filter, filterarg */
2394				size,			/* maxsize */
2395				1,			/* nsegments */
2396				size,			/* maxsegsize */
2397				0,			/* flags */
2398				NULL,			/* lockfunc */
2399				NULL,			/* lockarg */
2400				&dma->dma_tag);
2401	if (error) {
2402		device_printf(adapter->dev,
2403		    "%s: bus_dma_tag_create failed: %d\n",
2404		    __func__, error);
2405		goto fail_0;
2406	}
2407
2408	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2409	    BUS_DMA_NOWAIT, &dma->dma_map);
2410	if (error) {
2411		device_printf(adapter->dev,
2412		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2413		    __func__, (uintmax_t)size, error);
2414		goto fail_2;
2415	}
2416
2417	dma->dma_paddr = 0;
2418	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2419	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2420	if (error || dma->dma_paddr == 0) {
2421		device_printf(adapter->dev,
2422		    "%s: bus_dmamap_load failed: %d\n",
2423		    __func__, error);
2424		goto fail_3;
2425	}
2426
2427	return (0);
2428
2429fail_3:
2430	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2431fail_2:
2432	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2433	bus_dma_tag_destroy(dma->dma_tag);
2434fail_0:
2435	dma->dma_map = NULL;
2436	dma->dma_tag = NULL;
2437
2438	return (error);
2439}
2440
2441static void
2442igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2443{
2444	if (dma->dma_tag == NULL)
2445		return;
2446	if (dma->dma_map != NULL) {
2447		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2448		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2449		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2450		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2451		dma->dma_map = NULL;
2452	}
2453	bus_dma_tag_destroy(dma->dma_tag);
2454	dma->dma_tag = NULL;
2455}
2456
2457
2458/*********************************************************************
2459 *
2460 *  Allocate memory for the transmit and receive rings, and then
2461 *  the descriptors associated with each, called only once at attach.
2462 *
2463 **********************************************************************/
2464static int
2465igb_allocate_queues(struct adapter *adapter)
2466{
2467	device_t dev = adapter->dev;
2468	struct tx_ring *txr;
2469	struct rx_ring *rxr;
2470	int rsize, tsize, error = E1000_SUCCESS;
2471	int txconf = 0, rxconf = 0;
2472	char	name_string[16];
2473
2474	/* First allocate the TX ring struct memory */
2475	if (!(adapter->tx_rings =
2476	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2477	    adapter->num_tx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2478		device_printf(dev, "Unable to allocate TX ring memory\n");
2479		error = ENOMEM;
2480		goto fail;
2481	}
2482	txr = adapter->tx_rings;
2483
2484	/* Next allocate the RX */
2485	if (!(adapter->rx_rings =
2486	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2487	    adapter->num_rx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2488		device_printf(dev, "Unable to allocate RX ring memory\n");
2489		error = ENOMEM;
2490		goto rx_fail;
2491	}
2492	rxr = adapter->rx_rings;
2493
2494	tsize = roundup2(adapter->num_tx_desc *
2495	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2496	/*
2497	 * Now set up the TX queues, txconf is needed to handle the
2498	 * possibility that things fail midcourse and we need to
2499	 * undo memory gracefully
2500	 */
2501	for (int i = 0; i < adapter->num_tx_queues; i++, txconf++) {
2502		/* Set up some basics */
2503		txr = &adapter->tx_rings[i];
2504		txr->adapter = adapter;
2505		txr->me = i;
2506
2507		/* Initialize the TX lock */
2508		snprintf(name_string, sizeof(name_string), "%s:tx(%d)",
2509		    device_get_nameunit(dev), txr->me);
2510		mtx_init(&txr->tx_mtx, name_string, NULL, MTX_DEF);
2511
2512		if (igb_dma_malloc(adapter, tsize,
2513			&txr->txdma, BUS_DMA_NOWAIT)) {
2514			device_printf(dev,
2515			    "Unable to allocate TX Descriptor memory\n");
2516			error = ENOMEM;
2517			goto err_tx_desc;
2518		}
2519		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2520		bzero((void *)txr->tx_base, tsize);
2521
2522        	/* Now allocate transmit buffers for the ring */
2523        	if (igb_allocate_transmit_buffers(txr)) {
2524			device_printf(dev,
2525			    "Critical Failure setting up transmit buffers\n");
2526			error = ENOMEM;
2527			goto err_tx_desc;
2528        	}
2529
2530	}
2531
2532	/*
2533	 * Next the RX queues...
2534	 */
2535	rsize = roundup2(adapter->num_rx_desc *
2536	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2537	for (int i = 0; i < adapter->num_rx_queues; i++, rxconf++) {
2538		rxr = &adapter->rx_rings[i];
2539		rxr->adapter = adapter;
2540		rxr->me = i;
2541
2542		/* Initialize the RX lock */
2543		snprintf(name_string, sizeof(name_string), "%s:rx(%d)",
2544		    device_get_nameunit(dev), txr->me);
2545		mtx_init(&rxr->rx_mtx, name_string, NULL, MTX_DEF);
2546
2547		if (igb_dma_malloc(adapter, rsize,
2548			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2549			device_printf(dev,
2550			    "Unable to allocate RxDescriptor memory\n");
2551			error = ENOMEM;
2552			goto err_rx_desc;
2553		}
2554		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2555		bzero((void *)rxr->rx_base, rsize);
2556
2557        	/* Allocate receive buffers for the ring*/
2558		if (igb_allocate_receive_buffers(rxr)) {
2559			device_printf(dev,
2560			    "Critical Failure setting up receive buffers\n");
2561			error = ENOMEM;
2562			goto err_rx_desc;
2563		}
2564	}
2565
2566	return (0);
2567
2568err_rx_desc:
2569	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2570		igb_dma_free(adapter, &rxr->rxdma);
2571err_tx_desc:
2572	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2573		igb_dma_free(adapter, &txr->txdma);
2574	free(adapter->rx_rings, M_DEVBUF);
2575rx_fail:
2576	free(adapter->tx_rings, M_DEVBUF);
2577fail:
2578	return (error);
2579}
2580
2581/*********************************************************************
2582 *
2583 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2584 *  the information needed to transmit a packet on the wire. This is
2585 *  called only once at attach, setup is done every reset.
2586 *
2587 **********************************************************************/
2588static int
2589igb_allocate_transmit_buffers(struct tx_ring *txr)
2590{
2591	struct adapter *adapter = txr->adapter;
2592	device_t dev = adapter->dev;
2593	struct igb_buffer *txbuf;
2594	int error, i;
2595
2596	/*
2597	 * Setup DMA descriptor areas.
2598	 */
2599	if ((error = bus_dma_tag_create(NULL,		/* parent */
2600			       PAGE_SIZE, 0,		/* alignment, bounds */
2601			       BUS_SPACE_MAXADDR,	/* lowaddr */
2602			       BUS_SPACE_MAXADDR,	/* highaddr */
2603			       NULL, NULL,		/* filter, filterarg */
2604			       IGB_TSO_SIZE,		/* maxsize */
2605			       IGB_MAX_SCATTER,		/* nsegments */
2606			       PAGE_SIZE,		/* maxsegsize */
2607			       0,			/* flags */
2608			       NULL,			/* lockfunc */
2609			       NULL,			/* lockfuncarg */
2610			       &txr->txtag))) {
2611		device_printf(dev,"Unable to allocate TX DMA tag\n");
2612		goto fail;
2613	}
2614
2615	if (!(txr->tx_buffers =
2616	    (struct igb_buffer *) malloc(sizeof(struct igb_buffer) *
2617	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2618		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2619		error = ENOMEM;
2620		goto fail;
2621	}
2622
2623        /* Create the descriptor buffer dma maps */
2624	txbuf = txr->tx_buffers;
2625	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2626		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2627		if (error != 0) {
2628			device_printf(dev, "Unable to create TX DMA map\n");
2629			goto fail;
2630		}
2631	}
2632
2633	return 0;
2634fail:
2635	/* We free all, it handles case where we are in the middle */
2636	igb_free_transmit_structures(adapter);
2637	return (error);
2638}
2639
2640/*********************************************************************
2641 *
2642 *  Initialize a transmit ring.
2643 *
2644 **********************************************************************/
2645static void
2646igb_setup_transmit_ring(struct tx_ring *txr)
2647{
2648	struct adapter *adapter = txr->adapter;
2649	struct igb_buffer *txbuf;
2650	int i;
2651
2652	/* Clear the old ring contents */
2653	bzero((void *)txr->tx_base,
2654	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2655	/* Reset indices */
2656	txr->next_avail_desc = 0;
2657	txr->next_to_clean = 0;
2658
2659	/* Free any existing tx buffers. */
2660        txbuf = txr->tx_buffers;
2661	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2662		if (txbuf->m_head != NULL) {
2663			bus_dmamap_sync(txr->txtag, txbuf->map,
2664			    BUS_DMASYNC_POSTWRITE);
2665			bus_dmamap_unload(txr->txtag, txbuf->map);
2666			m_freem(txbuf->m_head);
2667			txbuf->m_head = NULL;
2668		}
2669		/* clear the watch index */
2670		txbuf->next_eop = -1;
2671        }
2672
2673	/* Set number of descriptors available */
2674	txr->tx_avail = adapter->num_tx_desc;
2675
2676	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2677	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2678
2679}
2680
2681/*********************************************************************
2682 *
2683 *  Initialize all transmit rings.
2684 *
2685 **********************************************************************/
2686static void
2687igb_setup_transmit_structures(struct adapter *adapter)
2688{
2689	struct tx_ring *txr = adapter->tx_rings;
2690
2691	for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
2692		igb_setup_transmit_ring(txr);
2693
2694	return;
2695}
2696
2697/*********************************************************************
2698 *
2699 *  Enable transmit unit.
2700 *
2701 **********************************************************************/
2702static void
2703igb_initialize_transmit_units(struct adapter *adapter)
2704{
2705	struct tx_ring	*txr = adapter->tx_rings;
2706	u32		tctl, txdctl, tipg = 0;
2707
2708	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2709
2710	/* Setup the Base and Length of the Tx Descriptor Rings */
2711	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2712		u64 bus_addr = txr->txdma.dma_paddr;
2713
2714		E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2715		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2716		E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2717		    (uint32_t)(bus_addr >> 32));
2718		E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2719		    (uint32_t)bus_addr);
2720
2721		/* Setup the HW Tx Head and Tail descriptor pointers */
2722		E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2723		E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2724
2725		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2726		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2727		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2728
2729		/* Setup Transmit Descriptor Base Settings */
2730		adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2731
2732		txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
2733		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2734		E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
2735	}
2736
2737	/* Set the default values for the Tx Inter Packet Gap timer */
2738	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2739	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
2740		tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2741	else
2742		tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2743
2744	tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2745	tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2746
2747	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
2748	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
2749	E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value);
2750
2751	/* Program the Transmit Control Register */
2752	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2753	tctl &= ~E1000_TCTL_CT;
2754	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2755		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2756
2757	/* This write will effectively turn on the transmit unit. */
2758	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2759
2760}
2761
2762/*********************************************************************
2763 *
2764 *  Free all transmit rings.
2765 *
2766 **********************************************************************/
2767static void
2768igb_free_transmit_structures(struct adapter *adapter)
2769{
2770	struct tx_ring *txr = adapter->tx_rings;
2771
2772	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2773		IGB_TX_LOCK(txr);
2774		igb_free_transmit_buffers(txr);
2775		igb_dma_free(adapter, &txr->txdma);
2776		IGB_TX_UNLOCK(txr);
2777		IGB_TX_LOCK_DESTROY(txr);
2778	}
2779	free(adapter->tx_rings, M_DEVBUF);
2780}
2781
2782/*********************************************************************
2783 *
2784 *  Free transmit ring related data structures.
2785 *
2786 **********************************************************************/
2787static void
2788igb_free_transmit_buffers(struct tx_ring *txr)
2789{
2790	struct adapter *adapter = txr->adapter;
2791	struct igb_buffer *tx_buffer;
2792	int             i;
2793
2794	INIT_DEBUGOUT("free_transmit_ring: begin");
2795
2796	if (txr->tx_buffers == NULL)
2797		return;
2798
2799	tx_buffer = txr->tx_buffers;
2800	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2801		if (tx_buffer->m_head != NULL) {
2802			bus_dmamap_sync(txr->txtag, tx_buffer->map,
2803			    BUS_DMASYNC_POSTWRITE);
2804			bus_dmamap_unload(txr->txtag,
2805			    tx_buffer->map);
2806			m_freem(tx_buffer->m_head);
2807			tx_buffer->m_head = NULL;
2808			if (tx_buffer->map != NULL) {
2809				bus_dmamap_destroy(txr->txtag,
2810				    tx_buffer->map);
2811				tx_buffer->map = NULL;
2812			}
2813		} else if (tx_buffer->map != NULL) {
2814			bus_dmamap_unload(txr->txtag,
2815			    tx_buffer->map);
2816			bus_dmamap_destroy(txr->txtag,
2817			    tx_buffer->map);
2818			tx_buffer->map = NULL;
2819		}
2820	}
2821
2822	if (txr->tx_buffers != NULL) {
2823		free(txr->tx_buffers, M_DEVBUF);
2824		txr->tx_buffers = NULL;
2825	}
2826	if (txr->txtag != NULL) {
2827		bus_dma_tag_destroy(txr->txtag);
2828		txr->txtag = NULL;
2829	}
2830	return;
2831}
2832
2833/**********************************************************************
2834 *
2835 *  Setup work for hardware segmentation offload (TSO) on
2836 *  adapters using advanced tx descriptors (82575)
2837 *
2838 **********************************************************************/
2839static boolean_t
2840igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
2841{
2842	struct adapter *adapter = txr->adapter;
2843	struct e1000_adv_tx_context_desc *TXD;
2844	struct igb_buffer        *tx_buffer;
2845	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2846	u32 mss_l4len_idx = 0;
2847	u16 vtag = 0;
2848	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2849	struct ether_vlan_header *eh;
2850	struct ip *ip;
2851	struct tcphdr *th;
2852
2853
2854	/*
2855	 * Determine where frame payload starts.
2856	 * Jump over vlan headers if already present
2857	 */
2858	eh = mtod(mp, struct ether_vlan_header *);
2859	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
2860		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2861	else
2862		ehdrlen = ETHER_HDR_LEN;
2863
2864	/* Ensure we have at least the IP+TCP header in the first mbuf. */
2865	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
2866		return FALSE;
2867
2868	/* Only supports IPV4 for now */
2869	ctxd = txr->next_avail_desc;
2870	tx_buffer = &txr->tx_buffers[ctxd];
2871	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
2872
2873	ip = (struct ip *)(mp->m_data + ehdrlen);
2874	if (ip->ip_p != IPPROTO_TCP)
2875                return FALSE;   /* 0 */
2876	ip->ip_sum = 0;
2877	ip_hlen = ip->ip_hl << 2;
2878	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
2879	th->th_sum = in_pseudo(ip->ip_src.s_addr,
2880	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
2881	tcp_hlen = th->th_off << 2;
2882	/*
2883	 * Calculate header length, this is used
2884	 * in the transmit desc in igb_xmit
2885	 */
2886	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
2887
2888	/* VLAN MACLEN IPLEN */
2889	if (mp->m_flags & M_VLANTAG) {
2890		vtag = htole16(mp->m_pkthdr.ether_vtag);
2891		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
2892	}
2893
2894	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
2895	vlan_macip_lens |= ip_hlen;
2896	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
2897
2898	/* ADV DTYPE TUCMD */
2899	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
2900	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
2901	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2902	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
2903
2904	/* MSS L4LEN IDX */
2905	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
2906	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
2907	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
2908
2909	TXD->seqnum_seed = htole32(0);
2910	tx_buffer->m_head = NULL;
2911	tx_buffer->next_eop = -1;
2912
2913	if (++ctxd == adapter->num_tx_desc)
2914		ctxd = 0;
2915
2916	txr->tx_avail--;
2917	txr->next_avail_desc = ctxd;
2918	return TRUE;
2919}
2920
2921
2922/*********************************************************************
2923 *
2924 *  Context Descriptor setup for VLAN or CSUM
2925 *
2926 **********************************************************************/
2927
2928static int
2929igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
2930{
2931	struct adapter *adapter = txr->adapter;
2932	struct e1000_adv_tx_context_desc *TXD;
2933	struct igb_buffer        *tx_buffer;
2934	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2935	struct ether_vlan_header *eh;
2936	struct ip *ip = NULL;
2937	struct ip6_hdr *ip6;
2938	int  ehdrlen, ip_hlen = 0;
2939	u16	etype;
2940	u8	ipproto = 0;
2941	bool	offload = TRUE;
2942	u16 vtag = 0;
2943
2944	int ctxd = txr->next_avail_desc;
2945	tx_buffer = &txr->tx_buffers[ctxd];
2946	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
2947
2948	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
2949		offload = FALSE; /* Only here to handle VLANs */
2950	/*
2951	** In advanced descriptors the vlan tag must
2952	** be placed into the descriptor itself.
2953	*/
2954	if (mp->m_flags & M_VLANTAG) {
2955		vtag = htole16(mp->m_pkthdr.ether_vtag);
2956		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
2957	} else if (offload == FALSE)
2958		return FALSE;
2959	/*
2960	 * Determine where frame payload starts.
2961	 * Jump over vlan headers if already present,
2962	 * helpful for QinQ too.
2963	 */
2964	eh = mtod(mp, struct ether_vlan_header *);
2965	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2966		etype = ntohs(eh->evl_proto);
2967		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2968	} else {
2969		etype = ntohs(eh->evl_encap_proto);
2970		ehdrlen = ETHER_HDR_LEN;
2971	}
2972
2973	/* Set the ether header length */
2974	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
2975
2976	switch (etype) {
2977		case ETHERTYPE_IP:
2978			ip = (struct ip *)(mp->m_data + ehdrlen);
2979			ip_hlen = ip->ip_hl << 2;
2980			if (mp->m_len < ehdrlen + ip_hlen) {
2981				offload = FALSE;
2982				break;
2983			}
2984			ipproto = ip->ip_p;
2985			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
2986			break;
2987		case ETHERTYPE_IPV6:
2988			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
2989			ip_hlen = sizeof(struct ip6_hdr);
2990			if (mp->m_len < ehdrlen + ip_hlen)
2991				return FALSE; /* failure */
2992			ipproto = ip6->ip6_nxt;
2993			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
2994			break;
2995		default:
2996			offload = FALSE;
2997			break;
2998	}
2999
3000	vlan_macip_lens |= ip_hlen;
3001	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3002
3003	switch (ipproto) {
3004		case IPPROTO_TCP:
3005			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3006				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3007			break;
3008		case IPPROTO_UDP:
3009		{
3010			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3011				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3012			break;
3013		}
3014		default:
3015			offload = FALSE;
3016			break;
3017	}
3018
3019	/* Now copy bits into descriptor */
3020	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3021	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3022	TXD->seqnum_seed = htole32(0);
3023	TXD->mss_l4len_idx = htole32(0);
3024
3025	tx_buffer->m_head = NULL;
3026	tx_buffer->next_eop = -1;
3027
3028	/* We've consumed the first desc, adjust counters */
3029	if (++ctxd == adapter->num_tx_desc)
3030		ctxd = 0;
3031	txr->next_avail_desc = ctxd;
3032	--txr->tx_avail;
3033
3034        return (offload);
3035}
3036
3037
3038/**********************************************************************
3039 *
3040 *  Examine each tx_buffer in the used queue. If the hardware is done
3041 *  processing the packet then free associated resources. The
3042 *  tx_buffer is put back on the free queue.
3043 *
3044 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3045 **********************************************************************/
3046static bool
3047igb_txeof(struct tx_ring *txr)
3048{
3049	struct adapter	*adapter = txr->adapter;
3050        int first, last, done, num_avail;
3051        struct igb_buffer *tx_buffer;
3052        struct e1000_tx_desc   *tx_desc, *eop_desc;
3053	struct ifnet   *ifp = adapter->ifp;
3054
3055	IGB_TX_LOCK_ASSERT(txr);
3056
3057        if (txr->tx_avail == adapter->num_tx_desc)
3058                return FALSE;
3059
3060        num_avail = txr->tx_avail;
3061        first = txr->next_to_clean;
3062        tx_desc = &txr->tx_base[first];
3063        tx_buffer = &txr->tx_buffers[first];
3064	last = tx_buffer->next_eop;
3065        eop_desc = &txr->tx_base[last];
3066
3067	/*
3068	 * What this does is get the index of the
3069	 * first descriptor AFTER the EOP of the
3070	 * first packet, that way we can do the
3071	 * simple comparison on the inner while loop.
3072	 */
3073	if (++last == adapter->num_tx_desc)
3074 		last = 0;
3075	done = last;
3076
3077        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3078            BUS_DMASYNC_POSTREAD);
3079
3080        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3081		/* We clean the range of the packet */
3082		while (first != done) {
3083                	tx_desc->upper.data = 0;
3084                	tx_desc->lower.data = 0;
3085                	tx_desc->buffer_addr = 0;
3086                	num_avail++;
3087
3088			if (tx_buffer->m_head) {
3089				ifp->if_opackets++;
3090				bus_dmamap_sync(txr->txtag,
3091				    tx_buffer->map,
3092				    BUS_DMASYNC_POSTWRITE);
3093				bus_dmamap_unload(txr->txtag,
3094				    tx_buffer->map);
3095
3096                        	m_freem(tx_buffer->m_head);
3097                        	tx_buffer->m_head = NULL;
3098                	}
3099			tx_buffer->next_eop = -1;
3100
3101	                if (++first == adapter->num_tx_desc)
3102				first = 0;
3103
3104	                tx_buffer = &txr->tx_buffers[first];
3105			tx_desc = &txr->tx_base[first];
3106		}
3107		/* See if we can continue to the next packet */
3108		last = tx_buffer->next_eop;
3109		if (last != -1) {
3110        		eop_desc = &txr->tx_base[last];
3111			/* Get new done point */
3112			if (++last == adapter->num_tx_desc) last = 0;
3113			done = last;
3114		} else
3115			break;
3116        }
3117        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3118            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3119
3120        txr->next_to_clean = first;
3121
3122        /*
3123         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3124         * that it is OK to send packets.
3125         * If there are no pending descriptors, clear the timeout. Otherwise,
3126         * if some descriptors have been freed, restart the timeout.
3127         */
3128        if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3129                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3130		/* All clean, turn off the timer */
3131                if (num_avail == adapter->num_tx_desc) {
3132			txr->watchdog_timer = 0;
3133        		txr->tx_avail = num_avail;
3134			return FALSE;
3135		}
3136		/* Some cleaned, reset the timer */
3137                else if (num_avail != txr->tx_avail)
3138			txr->watchdog_timer = IGB_TX_TIMEOUT;
3139        }
3140        txr->tx_avail = num_avail;
3141        return TRUE;
3142}
3143
3144
3145/*********************************************************************
3146 *
3147 *  Get a buffer from system mbuf buffer pool.
3148 *
3149 **********************************************************************/
3150static int
3151igb_get_buf(struct rx_ring *rxr, int i)
3152{
3153	struct adapter		*adapter = rxr->adapter;
3154	struct mbuf		*m;
3155	bus_dma_segment_t	segs[1];
3156	bus_dmamap_t		map;
3157	struct igb_buffer	*rx_buffer;
3158	int			error, nsegs;
3159
3160	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3161	if (m == NULL) {
3162		adapter->mbuf_cluster_failed++;
3163		return (ENOBUFS);
3164	}
3165	m->m_len = m->m_pkthdr.len = MCLBYTES;
3166
3167	if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3168		m_adj(m, ETHER_ALIGN);
3169
3170	/*
3171	 * Using memory from the mbuf cluster pool, invoke the
3172	 * bus_dma machinery to arrange the memory mapping.
3173	 */
3174	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3175	    rxr->rx_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT);
3176	if (error != 0) {
3177		m_free(m);
3178		return (error);
3179	}
3180
3181	/* If nsegs is wrong then the stack is corrupt. */
3182	KASSERT(nsegs == 1, ("Too many segments returned!"));
3183
3184	rx_buffer = &rxr->rx_buffers[i];
3185	if (rx_buffer->m_head != NULL)
3186		bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3187
3188	map = rx_buffer->map;
3189	rx_buffer->map = rxr->rx_spare_map;
3190	rxr->rx_spare_map = map;
3191	bus_dmamap_sync(rxr->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3192	rx_buffer->m_head = m;
3193
3194	rxr->rx_base[i].read.pkt_addr = htole64(segs[0].ds_addr);
3195	return (0);
3196}
3197
3198
3199/*********************************************************************
3200 *
3201 *  Allocate memory for rx_buffer structures. Since we use one
3202 *  rx_buffer per received packet, the maximum number of rx_buffer's
3203 *  that we'll need is equal to the number of receive descriptors
3204 *  that we've allocated.
3205 *
3206 **********************************************************************/
3207static int
3208igb_allocate_receive_buffers(struct rx_ring *rxr)
3209{
3210	struct	adapter 	*adapter = rxr->adapter;
3211	device_t 		dev = adapter->dev;
3212	struct igb_buffer 	*rxbuf;
3213	int             	i, bsize, error;
3214
3215	bsize = sizeof(struct igb_buffer) * adapter->num_rx_desc;
3216	if (!(rxr->rx_buffers =
3217	    (struct igb_buffer *) malloc(bsize,
3218	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3219		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3220		error = ENOMEM;
3221		goto fail;
3222	}
3223
3224	if ((error = bus_dma_tag_create(NULL,		/* parent */
3225				   PAGE_SIZE, 0,	/* alignment, bounds */
3226				   BUS_SPACE_MAXADDR,	/* lowaddr */
3227				   BUS_SPACE_MAXADDR,	/* highaddr */
3228				   NULL, NULL,		/* filter, filterarg */
3229				   MCLBYTES,		/* maxsize */
3230				   1,			/* nsegments */
3231				   MCLBYTES,		/* maxsegsize */
3232				   0,			/* flags */
3233				   NULL,		/* lockfunc */
3234				   NULL,		/* lockfuncarg */
3235				   &rxr->rxtag))) {
3236		device_printf(dev, "Unable to create RX Small DMA tag\n");
3237		goto fail;
3238	}
3239
3240	/* Create the spare map (used by getbuf) */
3241        error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3242	     &rxr->rx_spare_map);
3243	if (error) {
3244		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3245		    __func__, error);
3246		goto fail;
3247	}
3248
3249	for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3250		rxbuf = &rxr->rx_buffers[i];
3251		error = bus_dmamap_create(rxr->rxtag,
3252		    BUS_DMA_NOWAIT, &rxbuf->map);
3253		if (error) {
3254			device_printf(dev, "Unable to create Small RX DMA map\n");
3255			goto fail;
3256		}
3257	}
3258
3259	return (0);
3260
3261fail:
3262	/* Frees all, but can handle partial completion */
3263	igb_free_receive_structures(adapter);
3264	return (error);
3265}
3266
3267/*********************************************************************
3268 *
3269 *  Initialize a receive ring and its buffers.
3270 *
3271 **********************************************************************/
3272static int
3273igb_setup_receive_ring(struct rx_ring *rxr)
3274{
3275	struct	adapter	*adapter;
3276	struct igb_buffer *rxbuf;
3277	int j, rsize;
3278
3279	adapter = rxr->adapter;
3280	rsize = roundup2(adapter->num_rx_desc *
3281	    sizeof(union e1000_adv_rx_desc), 4096);
3282	/* Clear the ring contents */
3283	bzero((void *)rxr->rx_base, rsize);
3284
3285	/*
3286	** Free current RX buffers: the size buffer
3287	** that is loaded is indicated by the buffer
3288	** bigbuf value.
3289	*/
3290	for (int i = 0; i < adapter->num_rx_desc; i++) {
3291		rxbuf = &rxr->rx_buffers[i];
3292		if (rxbuf->m_head != NULL) {
3293			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3294			    BUS_DMASYNC_POSTREAD);
3295			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3296			m_freem(rxbuf->m_head);
3297			rxbuf->m_head = NULL;
3298		}
3299	}
3300
3301	for (j = 0; j < adapter->num_rx_desc; j++) {
3302		if (igb_get_buf(rxr, j) == ENOBUFS) {
3303			rxr->rx_buffers[j].m_head = NULL;
3304			rxr->rx_base[j].read.pkt_addr = 0;
3305			goto fail;
3306		}
3307	}
3308
3309	/* Setup our descriptor indices */
3310	rxr->next_to_check = 0;
3311	rxr->last_cleaned = 0;
3312
3313	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3314	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3315
3316	return (0);
3317fail:
3318	/*
3319	 * We need to clean up any buffers allocated so far
3320	 * 'j' is the failing index, decrement it to get the
3321	 * last success.
3322	 */
3323	for (--j; j < 0; j--) {
3324		rxbuf = &rxr->rx_buffers[j];
3325		if (rxbuf->m_head != NULL) {
3326			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3327			    BUS_DMASYNC_POSTREAD);
3328			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3329			m_freem(rxbuf->m_head);
3330			rxbuf->m_head = NULL;
3331		}
3332	}
3333	return (ENOBUFS);
3334}
3335
3336/*********************************************************************
3337 *
3338 *  Initialize all receive rings.
3339 *
3340 **********************************************************************/
3341static int
3342igb_setup_receive_structures(struct adapter *adapter)
3343{
3344	struct rx_ring *rxr = adapter->rx_rings;
3345	int i, j;
3346
3347	for (i = 0; i < adapter->num_rx_queues; i++, rxr++)
3348		if (igb_setup_receive_ring(rxr))
3349			goto fail;
3350
3351	return (0);
3352fail:
3353	/*
3354	 * Free RX buffers allocated so far, we will only handle
3355	 * the rings that completed, the failing case will have
3356	 * cleaned up for itself. The value of 'i' will be the
3357	 * failed ring so we must pre-decrement it.
3358	 */
3359	rxr = adapter->rx_rings;
3360	for (--i; i > 0; i--, rxr++) {
3361		for (j = 0; j < adapter->num_rx_desc; j++) {
3362			struct igb_buffer *rxbuf;
3363			rxbuf = &rxr->rx_buffers[j];
3364			if (rxbuf->m_head != NULL) {
3365				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3366			  	  BUS_DMASYNC_POSTREAD);
3367				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3368				m_freem(rxbuf->m_head);
3369				rxbuf->m_head = NULL;
3370			}
3371		}
3372	}
3373
3374	return (ENOBUFS);
3375}
3376
3377/*********************************************************************
3378 *
3379 *  Enable receive unit.
3380 *
3381 **********************************************************************/
3382static void
3383igb_initialize_receive_units(struct adapter *adapter)
3384{
3385	struct rx_ring	*rxr = adapter->rx_rings;
3386	struct ifnet	*ifp = adapter->ifp;
3387	u32		rctl, rxcsum, psize;
3388
3389	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3390
3391	/*
3392	 * Make sure receives are disabled while setting
3393	 * up the descriptor ring
3394	 */
3395	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3396	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3397
3398	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3399	    adapter->rx_abs_int_delay.value);
3400
3401	/* Setup the Base and Length of the Rx Descriptor Rings */
3402	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3403		u64 bus_addr = rxr->rxdma.dma_paddr;
3404		u32 rxdctl, srrctl;
3405
3406		E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3407		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3408		E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3409		    (uint32_t)(bus_addr >> 32));
3410		E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3411		    (uint32_t)bus_addr);
3412		/* Use Advanced Descriptor type */
3413		srrctl = E1000_READ_REG(&adapter->hw, E1000_SRRCTL(i));
3414		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3415		E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3416		/* Enable this Queue */
3417		rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3418		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3419		rxdctl &= 0xFFF00000;
3420		rxdctl |= IGB_RX_PTHRESH;
3421		rxdctl |= IGB_RX_HTHRESH << 8;
3422		rxdctl |= IGB_RX_WTHRESH << 16;
3423		E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3424	}
3425
3426	/*
3427	** Setup for RX MultiQueue
3428	*/
3429	if (adapter->num_rx_queues >1) {
3430		u32 random[10], mrqc, shift = 0;
3431		union igb_reta {
3432			u32 dword;
3433			u8  bytes[4];
3434		} reta;
3435
3436		arc4rand(&random, sizeof(random), 0);
3437		if (adapter->hw.mac.type == e1000_82575)
3438			shift = 6;
3439		/* Warning FM follows */
3440		for (int i = 0; i < 128; i++) {
3441			reta.bytes[i & 3] =
3442			    (i % adapter->num_rx_queues) << shift;
3443			if ((i & 3) == 3)
3444				E1000_WRITE_REG(&adapter->hw,
3445				    E1000_RETA(i & ~3), reta.dword);
3446		}
3447		/* Now fill in hash table */
3448		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3449		for (int i = 0; i < 10; i++)
3450			E1000_WRITE_REG_ARRAY(&adapter->hw,
3451			    E1000_RSSRK(0), i, random[i]);
3452
3453		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3454		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3455		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3456		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3457		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3458		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3459		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3460		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3461
3462		E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3463
3464		/*
3465		** NOTE: Receive Full-Packet Checksum Offload
3466		** is mutually exclusive with Multiqueue. However
3467		** this is not the same as TCP/IP checksums which
3468		** still work.
3469		*/
3470		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3471		rxcsum |= E1000_RXCSUM_PCSD;
3472		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3473	} else if (ifp->if_capenable & IFCAP_RXCSUM) {
3474		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3475		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3476		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3477	}
3478
3479	/* Setup the Receive Control Register */
3480	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3481	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3482		   E1000_RCTL_RDMTS_HALF |
3483		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3484
3485	/* Make sure VLAN Filters are off */
3486	rctl &= ~E1000_RCTL_VFE;
3487
3488	rctl &= ~E1000_RCTL_SBP;
3489
3490	switch (adapter->rx_buffer_len) {
3491	default:
3492	case 2048:
3493		rctl |= E1000_RCTL_SZ_2048;
3494		break;
3495	case 4096:
3496		rctl |= E1000_RCTL_SZ_4096 |
3497		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3498		break;
3499	case 8192:
3500		rctl |= E1000_RCTL_SZ_8192 |
3501		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3502		break;
3503	case 16384:
3504		rctl |= E1000_RCTL_SZ_16384 |
3505		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3506		break;
3507	}
3508
3509	if (ifp->if_mtu > ETHERMTU) {
3510		/* Set maximum packet len */
3511		psize = adapter->max_frame_size;
3512		/* are we on a vlan? */
3513		if (adapter->ifp->if_vlantrunk != NULL)
3514			psize += VLAN_TAG_SIZE;
3515		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3516		rctl |= E1000_RCTL_LPE;
3517	} else
3518		rctl &= ~E1000_RCTL_LPE;
3519
3520	/* Enable Receives */
3521	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3522
3523	/*
3524	 * Setup the HW Rx Head and Tail Descriptor Pointers
3525	 *   - needs to be after enable
3526	 */
3527	for (int i = 0; i < adapter->num_rx_queues; i++) {
3528		E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3529		E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3530		     adapter->num_rx_desc - 1);
3531	}
3532	return;
3533}
3534
3535/*********************************************************************
3536 *
3537 *  Free receive rings.
3538 *
3539 **********************************************************************/
3540static void
3541igb_free_receive_structures(struct adapter *adapter)
3542{
3543	struct rx_ring *rxr = adapter->rx_rings;
3544
3545	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3546		igb_free_receive_buffers(rxr);
3547		igb_dma_free(adapter, &rxr->rxdma);
3548	}
3549
3550	free(adapter->rx_rings, M_DEVBUF);
3551}
3552
3553/*********************************************************************
3554 *
3555 *  Free receive ring data structures.
3556 *
3557 **********************************************************************/
3558static void
3559igb_free_receive_buffers(struct rx_ring *rxr)
3560{
3561	struct adapter	*adapter = rxr->adapter;
3562	struct igb_buffer *rx_buffer;
3563
3564	INIT_DEBUGOUT("free_receive_structures: begin");
3565
3566	if (rxr->rx_spare_map) {
3567		bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3568		rxr->rx_spare_map = NULL;
3569	}
3570
3571	/* Cleanup any existing buffers */
3572	if (rxr->rx_buffers != NULL) {
3573		rx_buffer = &rxr->rx_buffers[0];
3574		for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3575			if (rx_buffer->m_head != NULL) {
3576				bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3577				    BUS_DMASYNC_POSTREAD);
3578				bus_dmamap_unload(rxr->rxtag,
3579				    rx_buffer->map);
3580				m_freem(rx_buffer->m_head);
3581				rx_buffer->m_head = NULL;
3582			} else if (rx_buffer->map != NULL)
3583				bus_dmamap_unload(rxr->rxtag,
3584				    rx_buffer->map);
3585			if (rx_buffer->map != NULL) {
3586				bus_dmamap_destroy(rxr->rxtag,
3587				    rx_buffer->map);
3588				rx_buffer->map = NULL;
3589			}
3590		}
3591	}
3592
3593	if (rxr->rx_buffers != NULL) {
3594		free(rxr->rx_buffers, M_DEVBUF);
3595		rxr->rx_buffers = NULL;
3596	}
3597
3598	if (rxr->rxtag != NULL) {
3599		bus_dma_tag_destroy(rxr->rxtag);
3600		rxr->rxtag = NULL;
3601	}
3602}
3603/*********************************************************************
3604 *
3605 *  This routine executes in interrupt context. It replenishes
3606 *  the mbufs in the descriptor and sends data which has been
3607 *  dma'ed into host memory to upper layer.
3608 *
3609 *  We loop at most count times if count is > 0, or until done if
3610 *  count < 0.
3611 *
3612 *  Return TRUE if all clean, FALSE otherwise
3613 *********************************************************************/
3614static bool
3615igb_rxeof(struct rx_ring *rxr, int count)
3616{
3617	struct adapter	*adapter = rxr->adapter;
3618	struct ifnet	*ifp;
3619	struct mbuf	*mp;
3620	uint8_t		accept_frame = 0;
3621	uint8_t		eop = 0;
3622	uint16_t 	len, desc_len, prev_len_adj;
3623	int		i;
3624	union e1000_adv_rx_desc   *cur;
3625	u32		staterr;
3626
3627	IGB_RX_LOCK(rxr);
3628	ifp = adapter->ifp;
3629	i = rxr->next_to_check;
3630	cur = &rxr->rx_base[i];
3631	staterr = cur->wb.upper.status_error;
3632
3633	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3634	    BUS_DMASYNC_POSTREAD);
3635
3636	if (!(staterr & E1000_RXD_STAT_DD)) {
3637		IGB_RX_UNLOCK(rxr);
3638		return FALSE;
3639	}
3640
3641	while ((staterr & E1000_RXD_STAT_DD) &&
3642	    (count != 0) &&
3643	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3644		struct mbuf *m = NULL;
3645
3646		mp = rxr->rx_buffers[i].m_head;
3647		/*
3648		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3649		 * needs to access the last received byte in the mbuf.
3650		 */
3651		bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
3652		    BUS_DMASYNC_POSTREAD);
3653
3654		accept_frame = 1;
3655		prev_len_adj = 0;
3656		desc_len = le16toh(cur->wb.upper.length);
3657		if (staterr & E1000_RXD_STAT_EOP) {
3658			count--;
3659			eop = 1;
3660			if (desc_len < ETHER_CRC_LEN) {
3661				len = 0;
3662				prev_len_adj = ETHER_CRC_LEN - desc_len;
3663			} else
3664				len = desc_len - ETHER_CRC_LEN;
3665		} else {
3666			eop = 0;
3667			len = desc_len;
3668		}
3669
3670		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
3671			u32	pkt_len = desc_len;
3672
3673			if (rxr->fmp != NULL)
3674				pkt_len += rxr->fmp->m_pkthdr.len;
3675
3676			accept_frame = 0;
3677		}
3678
3679		if (accept_frame) {
3680			if (igb_get_buf(rxr, i) != 0) {
3681				ifp->if_iqdrops++;
3682				goto discard;
3683			}
3684
3685			/* Assign correct length to the current fragment */
3686			mp->m_len = len;
3687
3688			if (rxr->fmp == NULL) {
3689				mp->m_pkthdr.len = len;
3690				rxr->fmp = mp; /* Store the first mbuf */
3691				rxr->lmp = mp;
3692			} else {
3693				/* Chain mbuf's together */
3694				mp->m_flags &= ~M_PKTHDR;
3695				/*
3696				 * Adjust length of previous mbuf in chain if
3697				 * we received less than 4 bytes in the last
3698				 * descriptor.
3699				 */
3700				if (prev_len_adj > 0) {
3701					rxr->lmp->m_len -= prev_len_adj;
3702					rxr->fmp->m_pkthdr.len -=
3703					    prev_len_adj;
3704				}
3705				rxr->lmp->m_next = mp;
3706				rxr->lmp = rxr->lmp->m_next;
3707				rxr->fmp->m_pkthdr.len += len;
3708			}
3709
3710			if (eop) {
3711				rxr->fmp->m_pkthdr.rcvif = ifp;
3712				ifp->if_ipackets++;
3713				rxr->rx_packets++;
3714				rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
3715
3716				igb_rx_checksum(staterr, rxr->fmp);
3717#ifndef __NO_STRICT_ALIGNMENT
3718				if (adapter->max_frame_size >
3719				    (MCLBYTES - ETHER_ALIGN) &&
3720				    igb_fixup_rx(rxr) != 0)
3721					goto skip;
3722#endif
3723				if (staterr & E1000_RXD_STAT_VP) {
3724					rxr->fmp->m_pkthdr.ether_vtag =
3725					    le16toh(cur->wb.upper.vlan);
3726					rxr->fmp->m_flags |= M_VLANTAG;
3727				}
3728#ifndef __NO_STRICT_ALIGNMENT
3729skip:
3730#endif
3731				m = rxr->fmp;
3732				rxr->fmp = NULL;
3733				rxr->lmp = NULL;
3734			}
3735		} else {
3736			ifp->if_ierrors++;
3737discard:
3738			/* Reuse loaded DMA map and just update mbuf chain */
3739			mp = rxr->rx_buffers[i].m_head;
3740			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3741			mp->m_data = mp->m_ext.ext_buf;
3742			mp->m_next = NULL;
3743			if (adapter->max_frame_size <=
3744			    (MCLBYTES - ETHER_ALIGN))
3745				m_adj(mp, ETHER_ALIGN);
3746			if (rxr->fmp != NULL) {
3747				m_freem(rxr->fmp);
3748				rxr->fmp = NULL;
3749				rxr->lmp = NULL;
3750			}
3751			m = NULL;
3752		}
3753
3754		/* Zero out the receive descriptors status. */
3755		cur->wb.upper.status_error = 0;
3756		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3757		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3758
3759		rxr->last_cleaned = i; /* For updating tail */
3760
3761		/* Advance our pointers to the next descriptor. */
3762		if (++i == adapter->num_rx_desc)
3763			i = 0;
3764
3765		if (m != NULL) {
3766			rxr->next_to_check = i;
3767			/* Pass up to the stack */
3768			IGB_RX_UNLOCK(rxr);
3769			(*ifp->if_input)(ifp, m);
3770			IGB_RX_LOCK(rxr);
3771			i = rxr->next_to_check;
3772		}
3773		/* Get the next descriptor */
3774		cur = &rxr->rx_base[i];
3775		staterr = cur->wb.upper.status_error;
3776	}
3777	rxr->next_to_check = i;
3778
3779	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3780	E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
3781	IGB_RX_UNLOCK(rxr);
3782
3783	if (!((staterr) & E1000_RXD_STAT_DD))
3784		return FALSE;
3785
3786	return TRUE;
3787}
3788
3789#ifndef __NO_STRICT_ALIGNMENT
3790/*
3791 * When jumbo frames are enabled we should realign entire payload on
3792 * architecures with strict alignment. This is serious design mistake of 8254x
3793 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3794 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3795 * payload. On architecures without strict alignment restrictions 8254x still
3796 * performs unaligned memory access which would reduce the performance too.
3797 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3798 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3799 * existing mbuf chain.
3800 *
3801 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3802 * not used at all on architectures with strict alignment.
3803 */
3804static int
3805igb_fixup_rx(struct rx_ring *rxr)
3806{
3807	struct adapter *adapter = rxr->adapter;
3808	struct mbuf *m, *n;
3809	int error;
3810
3811	error = 0;
3812	m = rxr->fmp;
3813	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3814		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3815		m->m_data += ETHER_HDR_LEN;
3816	} else {
3817		MGETHDR(n, M_DONTWAIT, MT_DATA);
3818		if (n != NULL) {
3819			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3820			m->m_data += ETHER_HDR_LEN;
3821			m->m_len -= ETHER_HDR_LEN;
3822			n->m_len = ETHER_HDR_LEN;
3823			M_MOVE_PKTHDR(n, m);
3824			n->m_next = m;
3825			rxr->fmp = n;
3826		} else {
3827			adapter->dropped_pkts++;
3828			m_freem(adapter->fmp);
3829			rxr->fmp = NULL;
3830			error = ENOMEM;
3831		}
3832	}
3833
3834	return (error);
3835}
3836#endif
3837
3838/*********************************************************************
3839 *
3840 *  Verify that the hardware indicated that the checksum is valid.
3841 *  Inform the stack about the status of checksum so that stack
3842 *  doesn't spend time verifying the checksum.
3843 *
3844 *********************************************************************/
3845static void
3846igb_rx_checksum(u32 staterr, struct mbuf *mp)
3847{
3848	u16 status = (u16)staterr;
3849	u8  errors = (u8) (staterr >> 24);
3850
3851	/* Ignore Checksum bit is set */
3852	if (status & E1000_RXD_STAT_IXSM) {
3853		mp->m_pkthdr.csum_flags = 0;
3854		return;
3855	}
3856
3857	if (status & E1000_RXD_STAT_IPCS) {
3858		/* Did it pass? */
3859		if (!(errors & E1000_RXD_ERR_IPE)) {
3860			/* IP Checksum Good */
3861			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3862			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3863
3864		} else
3865			mp->m_pkthdr.csum_flags = 0;
3866	}
3867
3868	if (status & E1000_RXD_STAT_TCPCS) {
3869		/* Did it pass? */
3870		if (!(errors & E1000_RXD_ERR_TCPE)) {
3871			mp->m_pkthdr.csum_flags |=
3872			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3873			mp->m_pkthdr.csum_data = htons(0xffff);
3874		}
3875	}
3876	return;
3877}
3878
3879/*
3880 * This turns on the hardware offload of the VLAN
3881 * tag insertion and strip
3882 */
3883static void
3884igb_enable_hw_vlans(struct adapter *adapter)
3885{
3886	uint32_t ctrl;
3887
3888	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
3889	ctrl |= E1000_CTRL_VME;
3890	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
3891}
3892
3893static void
3894igb_enable_intr(struct adapter *adapter)
3895{
3896	/* With RSS set up what to auto clear */
3897	if (adapter->msix_mem) {
3898		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
3899		    adapter->eims_mask);
3900		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
3901		    adapter->eims_mask);
3902		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
3903		    adapter->eims_mask);
3904		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
3905		    E1000_IMS_LSC);
3906	} else {
3907		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
3908		    IMS_ENABLE_MASK);
3909	}
3910	E1000_WRITE_FLUSH(&adapter->hw);
3911
3912	return;
3913}
3914
3915static void
3916igb_disable_intr(struct adapter *adapter)
3917{
3918	if (adapter->msix_mem) {
3919		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
3920		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
3921	}
3922	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
3923	E1000_WRITE_FLUSH(&adapter->hw);
3924	return;
3925}
3926
3927/*
3928 * Bit of a misnomer, what this really means is
3929 * to enable OS management of the system... aka
3930 * to disable special hardware management features
3931 */
3932static void
3933igb_init_manageability(struct adapter *adapter)
3934{
3935	/* A shared code workaround */
3936#define E1000_82542_MANC2H E1000_MANC2H
3937	if (adapter->has_manage) {
3938		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
3939		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
3940
3941		/* disable hardware interception of ARP */
3942		manc &= ~(E1000_MANC_ARP_EN);
3943
3944                /* enable receiving management packets to the host */
3945		manc |= E1000_MANC_EN_MNG2HOST;
3946#define E1000_MNG2HOST_PORT_623 (1 << 5)
3947#define E1000_MNG2HOST_PORT_664 (1 << 6)
3948		manc2h |= E1000_MNG2HOST_PORT_623;
3949		manc2h |= E1000_MNG2HOST_PORT_664;
3950		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
3951
3952		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
3953	}
3954}
3955
3956/*
3957 * Give control back to hardware management
3958 * controller if there is one.
3959 */
3960static void
3961igb_release_manageability(struct adapter *adapter)
3962{
3963	if (adapter->has_manage) {
3964		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
3965
3966		/* re-enable hardware interception of ARP */
3967		manc |= E1000_MANC_ARP_EN;
3968		manc &= ~E1000_MANC_EN_MNG2HOST;
3969
3970		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
3971	}
3972}
3973
3974/*
3975 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
3976 * For ASF and Pass Through versions of f/w this means that
3977 * the driver is loaded.
3978 *
3979 */
3980static void
3981igb_get_hw_control(struct adapter *adapter)
3982{
3983	u32 ctrl_ext;
3984
3985	/* Let firmware know the driver has taken over */
3986	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
3987	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
3988	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
3989}
3990
3991/*
3992 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
3993 * For ASF and Pass Through versions of f/w this means that the
3994 * driver is no longer loaded.
3995 *
3996 */
3997static void
3998igb_release_hw_control(struct adapter *adapter)
3999{
4000	u32 ctrl_ext;
4001
4002	/* Let firmware taken over control of h/w */
4003	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4004	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4005	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4006}
4007
4008static int
4009igb_is_valid_ether_addr(uint8_t *addr)
4010{
4011	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4012
4013	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4014		return (FALSE);
4015	}
4016
4017	return (TRUE);
4018}
4019
4020
4021/*
4022 * Enable PCI Wake On Lan capability
4023 */
4024void
4025igb_enable_wakeup(device_t dev)
4026{
4027	u16     cap, status;
4028	u8      id;
4029
4030	/* First find the capabilities pointer*/
4031	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4032	/* Read the PM Capabilities */
4033	id = pci_read_config(dev, cap, 1);
4034	if (id != PCIY_PMG)     /* Something wrong */
4035		return;
4036	/* OK, we have the power capabilities, so
4037	   now get the status register */
4038	cap += PCIR_POWER_STATUS;
4039	status = pci_read_config(dev, cap, 2);
4040	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4041	pci_write_config(dev, cap, status, 2);
4042	return;
4043}
4044
4045
4046/**********************************************************************
4047 *
4048 *  Update the board statistics counters.
4049 *
4050 **********************************************************************/
4051static void
4052igb_update_stats_counters(struct adapter *adapter)
4053{
4054	struct ifnet   *ifp;
4055
4056	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4057	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4058		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4059		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4060	}
4061	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4062	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4063	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4064	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4065
4066	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4067	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4068	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4069	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4070	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4071	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4072	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4073	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4074	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4075	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4076	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4077	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4078	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4079	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4080	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4081	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4082	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4083	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4084	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4085	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4086
4087	/* For the 64-bit byte counters the low dword must be read first. */
4088	/* Both registers clear on the read of the high dword */
4089
4090	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4091	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4092
4093	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4094	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4095	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4096	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4097	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4098
4099	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4100	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4101
4102	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4103	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4104	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4105	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4106	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4107	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4108	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4109	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4110	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4111	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4112
4113	adapter->stats.algnerrc +=
4114		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4115	adapter->stats.rxerrc +=
4116		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4117	adapter->stats.tncrs +=
4118		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4119	adapter->stats.cexterr +=
4120		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4121	adapter->stats.tsctc +=
4122		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4123	adapter->stats.tsctfc +=
4124		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4125	ifp = adapter->ifp;
4126
4127	ifp->if_collisions = adapter->stats.colc;
4128
4129	/* Rx Errors */
4130	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4131	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4132	    adapter->stats.ruc + adapter->stats.roc +
4133	    adapter->stats.mpc + adapter->stats.cexterr;
4134
4135	/* Tx Errors */
4136	ifp->if_oerrors = adapter->stats.ecol +
4137	    adapter->stats.latecol + adapter->watchdog_events;
4138}
4139
4140
4141/**********************************************************************
4142 *
4143 *  This routine is called only when igb_display_debug_stats is enabled.
4144 *  This routine provides a way to take a look at important statistics
4145 *  maintained by the driver and hardware.
4146 *
4147 **********************************************************************/
4148static void
4149igb_print_debug_info(struct adapter *adapter)
4150{
4151	device_t dev = adapter->dev;
4152	struct rx_ring *rxr = adapter->rx_rings;
4153	struct tx_ring *txr = adapter->tx_rings;
4154	uint8_t *hw_addr = adapter->hw.hw_addr;
4155
4156	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4157	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4158	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4159	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4160
4161#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4162	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4163	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4164	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4165	/* Kawela only */
4166	device_printf(dev, "IVAR0 = 0x%x IVAR1 = 0x%x IVAR_MISC = 0x%x\n",
4167	    E1000_READ_REG_ARRAY(&adapter->hw, E1000_IVAR0, 0),
4168	    E1000_READ_REG_ARRAY(&adapter->hw, E1000_IVAR0, 1),
4169	    E1000_READ_REG(&adapter->hw, E1000_IVAR_MISC));
4170#endif
4171
4172	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4173	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4174	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4175	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4176	    adapter->hw.fc.high_water,
4177	    adapter->hw.fc.low_water);
4178	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4179	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4180	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4181	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4182	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4183	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4184
4185	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
4186		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4187		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4188		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4189		device_printf(dev, "no descriptors avail event = %lu\n",
4190		    txr->no_desc_avail);
4191		device_printf(dev, "TX(%d) MSIX IRQ Handled = %lu\n", txr->me,
4192		    txr->tx_irq);
4193		device_printf(dev, "TX(%d) Packets sent = %lu\n", txr->me,
4194		    txr->tx_packets);
4195	}
4196
4197	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
4198		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4199		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4200		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4201		device_printf(dev, "RX(%d) Packets received = %lu\n", rxr->me,
4202		    rxr->rx_packets);
4203		device_printf(dev, "RX(%d) Byte count = %lu\n", rxr->me,
4204		    rxr->rx_bytes);
4205		device_printf(dev, "RX(%d) MSIX IRQ Handled = %lu\n", rxr->me,
4206		    rxr->rx_irq);
4207	}
4208	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4209
4210	device_printf(dev, "Std mbuf failed = %ld\n",
4211	    adapter->mbuf_alloc_failed);
4212	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4213	    adapter->mbuf_cluster_failed);
4214	device_printf(dev, "Driver dropped packets = %ld\n",
4215	    adapter->dropped_pkts);
4216	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4217		adapter->no_tx_dma_setup);
4218}
4219
4220static void
4221igb_print_hw_stats(struct adapter *adapter)
4222{
4223	device_t dev = adapter->dev;
4224
4225	device_printf(dev, "Excessive collisions = %lld\n",
4226	    (long long)adapter->stats.ecol);
4227#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4228	device_printf(dev, "Symbol errors = %lld\n",
4229	    (long long)adapter->stats.symerrs);
4230#endif
4231	device_printf(dev, "Sequence errors = %lld\n",
4232	    (long long)adapter->stats.sec);
4233	device_printf(dev, "Defer count = %lld\n",
4234	    (long long)adapter->stats.dc);
4235	device_printf(dev, "Missed Packets = %lld\n",
4236	    (long long)adapter->stats.mpc);
4237	device_printf(dev, "Receive No Buffers = %lld\n",
4238	    (long long)adapter->stats.rnbc);
4239	/* RLEC is inaccurate on some hardware, calculate our own. */
4240	device_printf(dev, "Receive Length Errors = %lld\n",
4241	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4242	device_printf(dev, "Receive errors = %lld\n",
4243	    (long long)adapter->stats.rxerrc);
4244	device_printf(dev, "Crc errors = %lld\n",
4245	    (long long)adapter->stats.crcerrs);
4246	device_printf(dev, "Alignment errors = %lld\n",
4247	    (long long)adapter->stats.algnerrc);
4248	/* On 82575 these are collision counts */
4249	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4250	    (long long)adapter->stats.cexterr);
4251	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4252	device_printf(dev, "watchdog timeouts = %ld\n",
4253	    adapter->watchdog_events);
4254	device_printf(dev, "XON Rcvd = %lld\n",
4255	    (long long)adapter->stats.xonrxc);
4256	device_printf(dev, "XON Xmtd = %lld\n",
4257	    (long long)adapter->stats.xontxc);
4258	device_printf(dev, "XOFF Rcvd = %lld\n",
4259	    (long long)adapter->stats.xoffrxc);
4260	device_printf(dev, "XOFF Xmtd = %lld\n",
4261	    (long long)adapter->stats.xofftxc);
4262	device_printf(dev, "Good Packets Rcvd = %lld\n",
4263	    (long long)adapter->stats.gprc);
4264	device_printf(dev, "Good Packets Xmtd = %lld\n",
4265	    (long long)adapter->stats.gptc);
4266	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4267	    (long long)adapter->stats.tsctc);
4268	device_printf(dev, "TSO Contexts Failed = %lld\n",
4269	    (long long)adapter->stats.tsctfc);
4270}
4271
4272/**********************************************************************
4273 *
4274 *  This routine provides a way to dump out the adapter eeprom,
4275 *  often a useful debug/service tool. This only dumps the first
4276 *  32 words, stuff that matters is in that extent.
4277 *
4278 **********************************************************************/
4279static void
4280igb_print_nvm_info(struct adapter *adapter)
4281{
4282	u16	eeprom_data;
4283	int	i, j, row = 0;
4284
4285	/* Its a bit crude, but it gets the job done */
4286	printf("\nInterface EEPROM Dump:\n");
4287	printf("Offset\n0x0000  ");
4288	for (i = 0, j = 0; i < 32; i++, j++) {
4289		if (j == 8) { /* Make the offset block */
4290			j = 0; ++row;
4291			printf("\n0x00%x0  ",row);
4292		}
4293		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4294		printf("%04x ", eeprom_data);
4295	}
4296	printf("\n");
4297}
4298
4299static int
4300igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4301{
4302	struct adapter *adapter;
4303	int error;
4304	int result;
4305
4306	result = -1;
4307	error = sysctl_handle_int(oidp, &result, 0, req);
4308
4309	if (error || !req->newptr)
4310		return (error);
4311
4312	if (result == 1) {
4313		adapter = (struct adapter *)arg1;
4314		igb_print_debug_info(adapter);
4315	}
4316	/*
4317	 * This value will cause a hex dump of the
4318	 * first 32 16-bit words of the EEPROM to
4319	 * the screen.
4320	 */
4321	if (result == 2) {
4322		adapter = (struct adapter *)arg1;
4323		igb_print_nvm_info(adapter);
4324        }
4325
4326	return (error);
4327}
4328
4329
4330static int
4331igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4332{
4333	struct adapter *adapter;
4334	int error;
4335	int result;
4336
4337	result = -1;
4338	error = sysctl_handle_int(oidp, &result, 0, req);
4339
4340	if (error || !req->newptr)
4341		return (error);
4342
4343	if (result == 1) {
4344		adapter = (struct adapter *)arg1;
4345		igb_print_hw_stats(adapter);
4346	}
4347
4348	return (error);
4349}
4350
4351static int
4352igb_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4353{
4354	struct igb_int_delay_info *info;
4355	struct adapter *adapter;
4356	uint32_t regval;
4357	int error;
4358	int usecs;
4359	int ticks;
4360
4361	info = (struct igb_int_delay_info *)arg1;
4362	usecs = info->value;
4363	error = sysctl_handle_int(oidp, &usecs, 0, req);
4364	if (error != 0 || req->newptr == NULL)
4365		return (error);
4366	if (usecs < 0 || usecs > IGB_TICKS_TO_USECS(65535))
4367		return (EINVAL);
4368	info->value = usecs;
4369	ticks = IGB_USECS_TO_TICKS(usecs);
4370
4371	adapter = info->adapter;
4372
4373	IGB_CORE_LOCK(adapter);
4374	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4375	regval = (regval & ~0xffff) | (ticks & 0xffff);
4376	/* Handle a few special cases. */
4377	switch (info->offset) {
4378	case E1000_RDTR:
4379		break;
4380	case E1000_TIDV:
4381		if (ticks == 0) {
4382			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4383			/* Don't write 0 into the TIDV register. */
4384			regval++;
4385		} else
4386			if (adapter->hw.mac.type < e1000_82575)
4387				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4388		break;
4389	}
4390	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4391	IGB_CORE_UNLOCK(adapter);
4392	return (0);
4393}
4394
4395static void
4396igb_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4397	const char *description, struct igb_int_delay_info *info,
4398	int offset, int value)
4399{
4400	info->adapter = adapter;
4401	info->offset = offset;
4402	info->value = value;
4403	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4404	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4405	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4406	    info, 0, igb_sysctl_int_delay, "I", description);
4407}
4408
4409static void
4410igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4411	const char *description, int *limit, int value)
4412{
4413	*limit = value;
4414	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4415	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4416	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4417}
4418
4419
4420