if_igb.c revision 181041
1/******************************************************************************
2
3  Copyright (c) 2001-2008, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 181041 2008-07-31 02:22:53Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#endif
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/bus.h>
42#include <sys/endian.h>
43#include <sys/kernel.h>
44#include <sys/kthread.h>
45#include <sys/malloc.h>
46#include <sys/mbuf.h>
47#include <sys/module.h>
48#include <sys/rman.h>
49#include <sys/socket.h>
50#include <sys/sockio.h>
51#include <sys/sysctl.h>
52#include <sys/taskqueue.h>
53#include <sys/eventhandler.h>
54#include <sys/pcpu.h>
55#ifdef IGB_TIMESYNC
56#include <sys/ioccom.h>
57#include <sys/time.h>
58#endif
59#include <machine/bus.h>
60#include <machine/resource.h>
61
62#include <net/bpf.h>
63#include <net/ethernet.h>
64#include <net/if.h>
65#include <net/if_arp.h>
66#include <net/if_dl.h>
67#include <net/if_media.h>
68
69#include <net/if_types.h>
70#include <net/if_vlan_var.h>
71
72#include <netinet/in_systm.h>
73#include <netinet/in.h>
74#include <netinet/if_ether.h>
75#include <netinet/ip.h>
76#include <netinet/ip6.h>
77#include <netinet/tcp.h>
78#include <netinet/tcp_lro.h>
79#include <netinet/udp.h>
80
81#include <machine/in_cksum.h>
82#include <dev/pci/pcivar.h>
83#include <dev/pci/pcireg.h>
84
85#include "e1000_api.h"
86#include "e1000_82575.h"
87#include "if_igb.h"
88
89/*********************************************************************
90 *  Set this to one to display debug statistics
91 *********************************************************************/
92int	igb_display_debug_stats = 0;
93
94/*********************************************************************
95 *  Driver version:
96 *********************************************************************/
97char igb_driver_version[] = "version - 1.3.0";
98
99
100/*********************************************************************
101 *  PCI Device ID Table
102 *
103 *  Used by probe to select devices to load on
104 *  Last field stores an index into e1000_strings
105 *  Last entry must be all 0s
106 *
107 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
108 *********************************************************************/
109
110static igb_vendor_info_t igb_vendor_info_array[] =
111{
112	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
113	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
114						PCI_ANY_ID, PCI_ANY_ID, 0},
115	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
116						PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	/* required last entry */
121	{ 0, 0, 0, 0, 0}
122};
123
124/*********************************************************************
125 *  Table of branding strings for all supported NICs.
126 *********************************************************************/
127
128static char *igb_strings[] = {
129	"Intel(R) PRO/1000 Network Connection"
130};
131
132/*********************************************************************
133 *  Function prototypes
134 *********************************************************************/
135static int	igb_probe(device_t);
136static int	igb_attach(device_t);
137static int	igb_detach(device_t);
138static int	igb_shutdown(device_t);
139static int	igb_suspend(device_t);
140static int	igb_resume(device_t);
141static void	igb_start(struct ifnet *);
142static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
143static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
144static void	igb_watchdog(struct adapter *);
145static void	igb_init(void *);
146static void	igb_init_locked(struct adapter *);
147static void	igb_stop(void *);
148static void	igb_media_status(struct ifnet *, struct ifmediareq *);
149static int	igb_media_change(struct ifnet *);
150static void	igb_identify_hardware(struct adapter *);
151static int	igb_allocate_pci_resources(struct adapter *);
152static int	igb_allocate_msix(struct adapter *);
153static int	igb_allocate_legacy(struct adapter *);
154static int	igb_setup_msix(struct adapter *);
155static void	igb_free_pci_resources(struct adapter *);
156static void	igb_local_timer(void *);
157static int	igb_hardware_init(struct adapter *);
158static void	igb_setup_interface(device_t, struct adapter *);
159static int	igb_allocate_queues(struct adapter *);
160static void	igb_configure_queues(struct adapter *);
161
162static int	igb_allocate_transmit_buffers(struct tx_ring *);
163static void	igb_setup_transmit_structures(struct adapter *);
164static void	igb_setup_transmit_ring(struct tx_ring *);
165static void	igb_initialize_transmit_units(struct adapter *);
166static void	igb_free_transmit_structures(struct adapter *);
167static void	igb_free_transmit_buffers(struct tx_ring *);
168
169static int	igb_allocate_receive_buffers(struct rx_ring *);
170static int	igb_setup_receive_structures(struct adapter *);
171static int	igb_setup_receive_ring(struct rx_ring *);
172static void	igb_initialize_receive_units(struct adapter *);
173static void	igb_free_receive_structures(struct adapter *);
174static void	igb_free_receive_buffers(struct rx_ring *);
175
176static void	igb_enable_intr(struct adapter *);
177static void	igb_disable_intr(struct adapter *);
178static void	igb_update_stats_counters(struct adapter *);
179static bool	igb_txeof(struct tx_ring *);
180static bool	igb_rxeof(struct rx_ring *, int);
181#ifndef __NO_STRICT_ALIGNMENT
182static int	igb_fixup_rx(struct rx_ring *);
183#endif
184static void	igb_rx_checksum(u32, struct mbuf *);
185static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
186static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
187static void	igb_set_promisc(struct adapter *);
188static void	igb_disable_promisc(struct adapter *);
189static void	igb_set_multi(struct adapter *);
190static void	igb_print_hw_stats(struct adapter *);
191static void	igb_update_link_status(struct adapter *);
192static int	igb_get_buf(struct rx_ring *, int);
193#ifdef IGB_HW_VLAN_SUPPORT
194static void	igb_register_vlan(void *, struct ifnet *, u16);
195static void	igb_unregister_vlan(void *, struct ifnet *, u16);
196#endif
197static int	igb_xmit(struct tx_ring *, struct mbuf **);
198static int	igb_dma_malloc(struct adapter *, bus_size_t,
199		    struct igb_dma_alloc *, int);
200static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
201static void	igb_print_debug_info(struct adapter *);
202static void	igb_print_nvm_info(struct adapter *);
203static int 	igb_is_valid_ether_addr(u8 *);
204static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
205static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
206static int	igb_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
207static void	igb_add_int_delay_sysctl(struct adapter *, const char *,
208		    const char *, struct igb_int_delay_info *, int, int);
209/* Management and WOL Support */
210static void	igb_init_manageability(struct adapter *);
211static void	igb_release_manageability(struct adapter *);
212static void     igb_get_hw_control(struct adapter *);
213static void     igb_release_hw_control(struct adapter *);
214static void     igb_enable_wakeup(device_t);
215
216#ifdef IGB_TIMESYNC
217/* Precision Time sync support */
218static int igb_tsync_init(struct adapter *);
219static void igb_tsync_disable(struct adapter *);
220#endif
221
222static int	igb_irq_fast(void *);
223static void	igb_add_rx_process_limit(struct adapter *, const char *,
224		    const char *, int *, int);
225static void	igb_handle_rxtx(void *context, int pending);
226static void	igb_handle_tx(void *context, int pending);
227static void	igb_handle_rx(void *context, int pending);
228static void	igb_handle_link(void *context, int pending);
229
230/* These are MSIX only irq handlers */
231static void	igb_msix_rx(void *);
232static void	igb_msix_tx(void *);
233static void	igb_msix_link(void *);
234
235#ifdef DEVICE_POLLING
236static poll_handler_t igb_poll;
237#endif
238
239/*********************************************************************
240 *  FreeBSD Device Interface Entry Points
241 *********************************************************************/
242
243static device_method_t igb_methods[] = {
244	/* Device interface */
245	DEVMETHOD(device_probe, igb_probe),
246	DEVMETHOD(device_attach, igb_attach),
247	DEVMETHOD(device_detach, igb_detach),
248	DEVMETHOD(device_shutdown, igb_shutdown),
249	DEVMETHOD(device_suspend, igb_suspend),
250	DEVMETHOD(device_resume, igb_resume),
251	{0, 0}
252};
253
254static driver_t igb_driver = {
255	"igb", igb_methods, sizeof(struct adapter),
256};
257
258static devclass_t igb_devclass;
259DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
260MODULE_DEPEND(igb, pci, 1, 1, 1);
261MODULE_DEPEND(igb, ether, 1, 1, 1);
262
263/*********************************************************************
264 *  Tunable default values.
265 *********************************************************************/
266
267#define IGB_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
268#define IGB_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
269#define M_TSO_LEN			66
270
271/* Allow common code without TSO */
272#ifndef CSUM_TSO
273#define CSUM_TSO	0
274#endif
275
276static int igb_tx_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_TIDV);
277static int igb_rx_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_RDTR);
278static int igb_tx_abs_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_TADV);
279static int igb_rx_abs_int_delay_dflt = IGB_TICKS_TO_USECS(IGB_RADV);
280static int igb_rxd = IGB_DEFAULT_RXD;
281static int igb_txd = IGB_DEFAULT_TXD;
282static int igb_smart_pwr_down = FALSE;
283TUNABLE_INT("hw.igb.tx_int_delay", &igb_tx_int_delay_dflt);
284TUNABLE_INT("hw.igb.rx_int_delay", &igb_rx_int_delay_dflt);
285TUNABLE_INT("hw.igb.tx_abs_int_delay", &igb_tx_abs_int_delay_dflt);
286TUNABLE_INT("hw.igb.rx_abs_int_delay", &igb_rx_abs_int_delay_dflt);
287TUNABLE_INT("hw.igb.rxd", &igb_rxd);
288TUNABLE_INT("hw.igb.txd", &igb_txd);
289TUNABLE_INT("hw.igb.smart_pwr_down", &igb_smart_pwr_down);
290
291/*
292** IF YOU CHANGE THESE: be sure and change IGB_MSIX_VEC in
293** if_igb.h to match. These can be autoconfigured if set to
294** 0, it will then be based on number of cpus.
295*/
296static int igb_tx_queues = 1;
297static int igb_rx_queues = 1;
298TUNABLE_INT("hw.igb.tx_queues", &igb_tx_queues);
299TUNABLE_INT("hw.igb.rx_queues", &igb_rx_queues);
300
301/* How many packets rxeof tries to clean at a time */
302static int igb_rx_process_limit = 100;
303TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
304
305/* Flow control setting - default to none */
306static int igb_fc_setting = 0;
307TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
308
309/*
310 * Should the driver do LRO on the RX end
311 *  this can be toggled on the fly, but the
312 *  interface must be reset (down/up) for it
313 *  to take effect.
314 */
315static int igb_enable_lro = 1;
316TUNABLE_INT("hw.igb.enable_lro", &igb_enable_lro);
317
318extern int mp_ncpus;
319/*********************************************************************
320 *  Device identification routine
321 *
322 *  igb_probe determines if the driver should be loaded on
323 *  adapter based on PCI vendor/device id of the adapter.
324 *
325 *  return BUS_PROBE_DEFAULT on success, positive on failure
326 *********************************************************************/
327
328static int
329igb_probe(device_t dev)
330{
331	char		adapter_name[60];
332	uint16_t	pci_vendor_id = 0;
333	uint16_t	pci_device_id = 0;
334	uint16_t	pci_subvendor_id = 0;
335	uint16_t	pci_subdevice_id = 0;
336	igb_vendor_info_t *ent;
337
338	INIT_DEBUGOUT("igb_probe: begin");
339
340	pci_vendor_id = pci_get_vendor(dev);
341	if (pci_vendor_id != IGB_VENDOR_ID)
342		return (ENXIO);
343
344	pci_device_id = pci_get_device(dev);
345	pci_subvendor_id = pci_get_subvendor(dev);
346	pci_subdevice_id = pci_get_subdevice(dev);
347
348	ent = igb_vendor_info_array;
349	while (ent->vendor_id != 0) {
350		if ((pci_vendor_id == ent->vendor_id) &&
351		    (pci_device_id == ent->device_id) &&
352
353		    ((pci_subvendor_id == ent->subvendor_id) ||
354		    (ent->subvendor_id == PCI_ANY_ID)) &&
355
356		    ((pci_subdevice_id == ent->subdevice_id) ||
357		    (ent->subdevice_id == PCI_ANY_ID))) {
358			sprintf(adapter_name, "%s %s",
359				igb_strings[ent->index],
360				igb_driver_version);
361			device_set_desc_copy(dev, adapter_name);
362			return (BUS_PROBE_DEFAULT);
363		}
364		ent++;
365	}
366
367	return (ENXIO);
368}
369
370/*********************************************************************
371 *  Device initialization routine
372 *
373 *  The attach entry point is called when the driver is being loaded.
374 *  This routine identifies the type of hardware, allocates all resources
375 *  and initializes the hardware.
376 *
377 *  return 0 on success, positive on failure
378 *********************************************************************/
379
380static int
381igb_attach(device_t dev)
382{
383	struct adapter	*adapter;
384	int		error = 0;
385	u16		eeprom_data;
386
387	INIT_DEBUGOUT("igb_attach: begin");
388
389	adapter = device_get_softc(dev);
390	adapter->dev = adapter->osdep.dev = dev;
391	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
392
393	/* SYSCTL stuff */
394	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
395	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
396	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
397	    igb_sysctl_debug_info, "I", "Debug Information");
398
399	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
400	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
401	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
402	    igb_sysctl_stats, "I", "Statistics");
403
404	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
405	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
406	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
407	    &igb_fc_setting, 0, "Flow Control");
408
409	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
410
411	/* Determine hardware and mac info */
412	igb_identify_hardware(adapter);
413
414	/* Setup PCI resources */
415	if (igb_allocate_pci_resources(adapter)) {
416		device_printf(dev, "Allocation of PCI resources failed\n");
417		error = ENXIO;
418		goto err_pci;
419	}
420
421	/* Do Shared Code initialization */
422	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
423		device_printf(dev, "Setup of Shared code failed\n");
424		error = ENXIO;
425		goto err_pci;
426	}
427
428	e1000_get_bus_info(&adapter->hw);
429
430	/* Set up some sysctls for the tunable interrupt delays */
431	igb_add_int_delay_sysctl(adapter, "rx_int_delay",
432	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
433	    E1000_REGISTER(&adapter->hw, E1000_RDTR), igb_rx_int_delay_dflt);
434	igb_add_int_delay_sysctl(adapter, "tx_int_delay",
435	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
436	    E1000_REGISTER(&adapter->hw, E1000_TIDV), igb_tx_int_delay_dflt);
437	igb_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
438	    "receive interrupt delay limit in usecs",
439	    &adapter->rx_abs_int_delay,
440	    E1000_REGISTER(&adapter->hw, E1000_RADV),
441	    igb_rx_abs_int_delay_dflt);
442	igb_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
443	    "transmit interrupt delay limit in usecs",
444	    &adapter->tx_abs_int_delay,
445	    E1000_REGISTER(&adapter->hw, E1000_TADV),
446	    igb_tx_abs_int_delay_dflt);
447
448	/* Sysctls for limiting the amount of work done in the taskqueue */
449	igb_add_rx_process_limit(adapter, "rx_processing_limit",
450	    "max number of rx packets to process", &adapter->rx_process_limit,
451	    igb_rx_process_limit);
452
453	/*
454	 * Validate number of transmit and receive descriptors. It
455	 * must not exceed hardware maximum, and must be multiple
456	 * of E1000_DBA_ALIGN.
457	 */
458	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
459	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
460		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
461		    IGB_DEFAULT_TXD, igb_txd);
462		adapter->num_tx_desc = IGB_DEFAULT_TXD;
463	} else
464		adapter->num_tx_desc = igb_txd;
465	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
466	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
467		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
468		    IGB_DEFAULT_RXD, igb_rxd);
469		adapter->num_rx_desc = IGB_DEFAULT_RXD;
470	} else
471		adapter->num_rx_desc = igb_rxd;
472
473	adapter->hw.mac.autoneg = DO_AUTO_NEG;
474	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
475	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
476	adapter->rx_buffer_len = 2048;
477
478	/* Copper options */
479	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
480		adapter->hw.phy.mdix = AUTO_ALL_MODES;
481		adapter->hw.phy.disable_polarity_correction = FALSE;
482		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
483	}
484
485	/*
486	 * Set the frame limits assuming
487	 * standard ethernet sized frames.
488	 */
489	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
490	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
491
492	/*
493	 * This controls when hardware reports transmit completion
494	 * status.
495	 */
496	adapter->hw.mac.report_tx_early = 1;
497
498	/*
499	** Allocate and Setup Queues
500	*/
501	if (igb_allocate_queues(adapter)) {
502		error = ENOMEM;
503		goto err_hw_init;
504	}
505
506	/* Make sure we have a good EEPROM before we read from it */
507	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
508		/*
509		** Some PCI-E parts fail the first check due to
510		** the link being in sleep state, call it again,
511		** if it fails a second time its a real issue.
512		*/
513		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
514			device_printf(dev,
515			    "The EEPROM Checksum Is Not Valid\n");
516			error = EIO;
517			goto err_late;
518		}
519	}
520
521	/* Initialize the hardware */
522	if (igb_hardware_init(adapter)) {
523		device_printf(dev, "Unable to initialize the hardware\n");
524		error = EIO;
525		goto err_late;
526	}
527
528	/* Copy the permanent MAC address out of the EEPROM */
529	if (e1000_read_mac_addr(&adapter->hw) < 0) {
530		device_printf(dev, "EEPROM read error while reading MAC"
531		    " address\n");
532		error = EIO;
533		goto err_late;
534	}
535
536	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
537		device_printf(dev, "Invalid MAC address\n");
538		error = EIO;
539		goto err_late;
540	}
541
542	/*
543	** Configure Interrupts
544	*/
545	if (adapter->msix > 1) /* MSIX */
546		error = igb_allocate_msix(adapter);
547	else /* MSI or Legacy */
548		error = igb_allocate_legacy(adapter);
549	if (error)
550		goto err_late;
551
552	/* Setup OS specific network interface */
553	igb_setup_interface(dev, adapter);
554
555	/* Initialize statistics */
556	igb_update_stats_counters(adapter);
557
558	adapter->hw.mac.get_link_status = 1;
559	igb_update_link_status(adapter);
560
561	/* Indicate SOL/IDER usage */
562	if (e1000_check_reset_block(&adapter->hw))
563		device_printf(dev,
564		    "PHY reset is blocked due to SOL/IDER session.\n");
565
566	/* Determine if we have to control management hardware */
567	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
568
569	/*
570	 * Setup Wake-on-Lan
571	 */
572	/* APME bit in EEPROM is mapped to WUC.APME */
573	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
574	if (eeprom_data)
575		adapter->wol = E1000_WUFC_MAG;
576
577#ifdef IGB_HW_VLAN_SUPPORT
578	/* Register for VLAN events */
579	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
580	     igb_register_vlan, 0, EVENTHANDLER_PRI_FIRST);
581	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
582	     igb_unregister_vlan, 0, EVENTHANDLER_PRI_FIRST);
583#endif
584
585	/* Tell the stack that the interface is not active */
586	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
587
588	INIT_DEBUGOUT("igb_attach: end");
589
590	return (0);
591
592err_late:
593	igb_free_transmit_structures(adapter);
594	igb_free_receive_structures(adapter);
595	igb_release_hw_control(adapter);
596err_hw_init:
597	e1000_remove_device(&adapter->hw);
598err_pci:
599	igb_free_pci_resources(adapter);
600	IGB_CORE_LOCK_DESTROY(adapter);
601
602	return (error);
603}
604
605/*********************************************************************
606 *  Device removal routine
607 *
608 *  The detach entry point is called when the driver is being removed.
609 *  This routine stops the adapter and deallocates all the resources
610 *  that were allocated for driver operation.
611 *
612 *  return 0 on success, positive on failure
613 *********************************************************************/
614
615static int
616igb_detach(device_t dev)
617{
618	struct adapter	*adapter = device_get_softc(dev);
619	struct ifnet	*ifp = adapter->ifp;
620
621	INIT_DEBUGOUT("igb_detach: begin");
622
623	/* Make sure VLANS are not using driver */
624	if (adapter->ifp->if_vlantrunk != NULL) {
625		device_printf(dev,"Vlan in use, detach first\n");
626		return (EBUSY);
627	}
628
629#ifdef DEVICE_POLLING
630	if (ifp->if_capenable & IFCAP_POLLING)
631		ether_poll_deregister(ifp);
632#endif
633
634	IGB_CORE_LOCK(adapter);
635	adapter->in_detach = 1;
636	igb_stop(adapter);
637	IGB_CORE_UNLOCK(adapter);
638
639	e1000_phy_hw_reset(&adapter->hw);
640
641	/* Give control back to firmware */
642	igb_release_manageability(adapter);
643	igb_release_hw_control(adapter);
644
645	if (adapter->wol) {
646		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
647		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
648		igb_enable_wakeup(dev);
649	}
650
651#ifdef IGB_HW_VLAN_SUPPORT
652	/* Unregister VLAN events */
653	if (adapter->vlan_attach != NULL)
654		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
655	if (adapter->vlan_detach != NULL)
656		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
657#endif
658
659	ether_ifdetach(adapter->ifp);
660
661	callout_drain(&adapter->timer);
662
663	e1000_remove_device(&adapter->hw);
664	igb_free_pci_resources(adapter);
665	bus_generic_detach(dev);
666	if_free(ifp);
667
668	igb_free_transmit_structures(adapter);
669	igb_free_receive_structures(adapter);
670
671	IGB_CORE_LOCK_DESTROY(adapter);
672
673	return (0);
674}
675
676/*********************************************************************
677 *
678 *  Shutdown entry point
679 *
680 **********************************************************************/
681
682static int
683igb_shutdown(device_t dev)
684{
685	return igb_suspend(dev);
686}
687
688/*
689 * Suspend/resume device methods.
690 */
691static int
692igb_suspend(device_t dev)
693{
694	struct adapter *adapter = device_get_softc(dev);
695
696	IGB_CORE_LOCK(adapter);
697
698	igb_stop(adapter);
699
700        igb_release_manageability(adapter);
701	igb_release_hw_control(adapter);
702
703        if (adapter->wol) {
704                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
705                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
706                igb_enable_wakeup(dev);
707        }
708
709	IGB_CORE_UNLOCK(adapter);
710
711	return bus_generic_suspend(dev);
712}
713
714static int
715igb_resume(device_t dev)
716{
717	struct adapter *adapter = device_get_softc(dev);
718	struct ifnet *ifp = adapter->ifp;
719
720	IGB_CORE_LOCK(adapter);
721	igb_init_locked(adapter);
722	igb_init_manageability(adapter);
723
724	if ((ifp->if_flags & IFF_UP) &&
725	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
726		igb_start(ifp);
727
728	IGB_CORE_UNLOCK(adapter);
729
730	return bus_generic_resume(dev);
731}
732
733
734/*********************************************************************
735 *  Transmit entry point
736 *
737 *  igb_start is called by the stack to initiate a transmit.
738 *  The driver will remain in this routine as long as there are
739 *  packets to transmit and transmit resources are available.
740 *  In case resources are not available stack is notified and
741 *  the packet is requeued.
742 **********************************************************************/
743
744static void
745igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
746{
747	struct adapter	*adapter = ifp->if_softc;
748	struct mbuf	*m_head;
749
750	IGB_TX_LOCK_ASSERT(txr);
751
752	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
753	    IFF_DRV_RUNNING)
754		return;
755	if (!adapter->link_active)
756		return;
757
758	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
759
760		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
761		if (m_head == NULL)
762			break;
763		/*
764		 *  Encapsulation can modify our pointer, and or make it
765		 *  NULL on failure.  In that event, we can't requeue.
766		 */
767		if (igb_xmit(txr, &m_head)) {
768			if (m_head == NULL)
769				break;
770			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
771			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
772			break;
773		}
774
775		/* Send a copy of the frame to the BPF listener */
776		ETHER_BPF_MTAP(ifp, m_head);
777
778		/* Set timeout in case hardware has problems transmitting. */
779		txr->watchdog_timer = IGB_TX_TIMEOUT;
780	}
781}
782
783static void
784igb_start(struct ifnet *ifp)
785{
786	struct adapter	*adapter = ifp->if_softc;
787	struct tx_ring	*txr;
788	u32		queue = 0;
789
790	/*
791	** This is really just here for testing
792	** TX multiqueue, ultimately what is
793	** needed is the flow support in the stack
794	** and appropriate logic here to deal with
795	** it. -jfv
796	*/
797	if (adapter->num_tx_queues > 1)
798		queue = (curcpu % adapter->num_tx_queues);
799
800	txr = &adapter->tx_rings[queue];
801	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
802		IGB_TX_LOCK(txr);
803		igb_start_locked(txr, ifp);
804		IGB_TX_UNLOCK(txr);
805	}
806}
807
808/*********************************************************************
809 *  Ioctl entry point
810 *
811 *  igb_ioctl is called when the user wants to configure the
812 *  interface.
813 *
814 *  return 0 on success, positive on failure
815 **********************************************************************/
816
817static int
818igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
819{
820	struct adapter	*adapter = ifp->if_softc;
821	struct ifreq *ifr = (struct ifreq *)data;
822	struct ifaddr *ifa = (struct ifaddr *)data;
823	int error = 0;
824
825	if (adapter->in_detach)
826		return (error);
827
828	switch (command) {
829	case SIOCSIFADDR:
830		if (ifa->ifa_addr->sa_family == AF_INET) {
831			/*
832			 * XXX
833			 * Since resetting hardware takes a very long time
834			 * and results in link renegotiation we only
835			 * initialize the hardware only when it is absolutely
836			 * required.
837			 */
838			ifp->if_flags |= IFF_UP;
839			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
840				IGB_CORE_LOCK(adapter);
841				igb_init_locked(adapter);
842				IGB_CORE_UNLOCK(adapter);
843			}
844			arp_ifinit(ifp, ifa);
845		} else
846			error = ether_ioctl(ifp, command, data);
847		break;
848	case SIOCSIFMTU:
849	    {
850		int max_frame_size;
851
852		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
853
854		IGB_CORE_LOCK(adapter);
855		max_frame_size = 9234;
856		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
857		    ETHER_CRC_LEN) {
858			IGB_CORE_UNLOCK(adapter);
859			error = EINVAL;
860			break;
861		}
862
863		ifp->if_mtu = ifr->ifr_mtu;
864		adapter->max_frame_size =
865		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
866		igb_init_locked(adapter);
867		IGB_CORE_UNLOCK(adapter);
868		break;
869	    }
870	case SIOCSIFFLAGS:
871		IOCTL_DEBUGOUT("ioctl rcv'd:\
872		    SIOCSIFFLAGS (Set Interface Flags)");
873		IGB_CORE_LOCK(adapter);
874		if (ifp->if_flags & IFF_UP) {
875			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
876				if ((ifp->if_flags ^ adapter->if_flags) &
877				    (IFF_PROMISC | IFF_ALLMULTI)) {
878					igb_disable_promisc(adapter);
879					igb_set_promisc(adapter);
880				}
881			} else
882				igb_init_locked(adapter);
883		} else
884			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
885				igb_stop(adapter);
886		adapter->if_flags = ifp->if_flags;
887		IGB_CORE_UNLOCK(adapter);
888		break;
889	case SIOCADDMULTI:
890	case SIOCDELMULTI:
891		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
892		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
893			IGB_CORE_LOCK(adapter);
894			igb_disable_intr(adapter);
895			igb_set_multi(adapter);
896#ifdef DEVICE_POLLING
897			if (!(ifp->if_capenable & IFCAP_POLLING))
898#endif
899				igb_enable_intr(adapter);
900			IGB_CORE_UNLOCK(adapter);
901		}
902		break;
903	case SIOCSIFMEDIA:
904		/* Check SOL/IDER usage */
905		IGB_CORE_LOCK(adapter);
906		if (e1000_check_reset_block(&adapter->hw)) {
907			IGB_CORE_UNLOCK(adapter);
908			device_printf(adapter->dev, "Media change is"
909			    " blocked due to SOL/IDER session.\n");
910			break;
911		}
912		IGB_CORE_UNLOCK(adapter);
913	case SIOCGIFMEDIA:
914		IOCTL_DEBUGOUT("ioctl rcv'd: \
915		    SIOCxIFMEDIA (Get/Set Interface Media)");
916		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
917		break;
918	case SIOCSIFCAP:
919	    {
920		int mask, reinit;
921
922		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
923		reinit = 0;
924		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
925#ifdef DEVICE_POLLING
926		if (mask & IFCAP_POLLING) {
927			if (ifr->ifr_reqcap & IFCAP_POLLING) {
928				error = ether_poll_register(igb_poll, ifp);
929				if (error)
930					return (error);
931				IGB_CORE_LOCK(adapter);
932				igb_disable_intr(adapter);
933				ifp->if_capenable |= IFCAP_POLLING;
934				IGB_CORE_UNLOCK(adapter);
935			} else {
936				error = ether_poll_deregister(ifp);
937				/* Enable interrupt even in error case */
938				IGB_CORE_LOCK(adapter);
939				igb_enable_intr(adapter);
940				ifp->if_capenable &= ~IFCAP_POLLING;
941				IGB_CORE_UNLOCK(adapter);
942			}
943		}
944#endif
945		if (mask & IFCAP_HWCSUM) {
946			ifp->if_capenable ^= IFCAP_HWCSUM;
947			reinit = 1;
948		}
949		if (mask & IFCAP_TSO4) {
950			ifp->if_capenable ^= IFCAP_TSO4;
951			reinit = 1;
952		}
953		if (mask & IFCAP_VLAN_HWTAGGING) {
954			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
955			reinit = 1;
956		}
957		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
958			igb_init(adapter);
959		VLAN_CAPABILITIES(ifp);
960		break;
961	    }
962
963#ifdef IGB_TIMESYNC
964	/*
965	** IOCTL support for Precision Time (IEEE 1588) Support
966	*/
967	case IGB_TIMESYNC_READTS:
968	    {
969		u32 rx_ctl, tx_ctl;
970		struct igb_tsync_read *tdata;
971
972		tdata = (struct igb_tsync_read *) ifr->ifr_data;
973
974		if (tdata->read_current_time) {
975			getnanotime(&tdata->system_time);
976			tdata->network_time = E1000_READ_REG(&adapter->hw,
977			    E1000_SYSTIML);
978			tdata->network_time |=
979			    (u64)E1000_READ_REG(&adapter->hw,
980			    E1000_SYSTIMH ) << 32;
981                }
982
983		rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
984		tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
985
986		if (rx_ctl & 0x1) {
987			u32 tmp;
988			unsigned char *tmp_cp;
989
990			tdata->rx_valid = 1;
991			tdata->rx_stamp = E1000_READ_REG(&adapter->hw, E1000_RXSTMPL);
992			tdata->rx_stamp |= (u64)E1000_READ_REG(&adapter->hw,
993			    E1000_RXSTMPH) << 32;
994
995			tmp = E1000_READ_REG(&adapter->hw, E1000_RXSATRL);
996			tmp_cp = (unsigned char *) &tmp;
997			tdata->srcid[0] = tmp_cp[0];
998			tdata->srcid[1] = tmp_cp[1];
999			tdata->srcid[2] = tmp_cp[2];
1000			tdata->srcid[3] = tmp_cp[3];
1001			tmp = E1000_READ_REG(&adapter->hw, E1000_RXSATRH);
1002			tmp_cp = (unsigned char *) &tmp;
1003			tdata->srcid[4] = tmp_cp[0];
1004			tdata->srcid[5] = tmp_cp[1];
1005			tdata->seqid = tmp >> 16;
1006			tdata->seqid = htons(tdata->seqid);
1007		} else
1008			tdata->rx_valid = 0;
1009
1010		if (tx_ctl & 0x1) {
1011			tdata->tx_valid = 1;
1012			tdata->tx_stamp = E1000_READ_REG(&adapter->hw, E1000_TXSTMPL);
1013			tdata->tx_stamp |= (u64) E1000_READ_REG(&adapter->hw,
1014			    E1000_TXSTMPH) << 32;
1015		} else
1016			tdata->tx_valid = 0;
1017
1018		return (0);
1019	    }
1020#endif	/* IGB_TIMESYNC */
1021
1022	default:
1023		error = ether_ioctl(ifp, command, data);
1024		break;
1025	}
1026
1027	return (error);
1028}
1029
1030/*********************************************************************
1031 *  Watchdog timer:
1032 *
1033 *  This routine is called from the local timer every second.
1034 *  As long as transmit descriptors are being cleaned the value
1035 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1036 *  and we then reset the device.
1037 *
1038 **********************************************************************/
1039
1040static void
1041igb_watchdog(struct adapter *adapter)
1042{
1043	struct tx_ring	*txr = adapter->tx_rings;
1044	bool		tx_hang = FALSE;
1045
1046	IGB_CORE_LOCK_ASSERT(adapter);
1047
1048	/*
1049	** The timer is set to 5 every time start() queues a packet.
1050	** Then txeof keeps resetting it as long as it cleans at
1051	** least one descriptor.
1052	** Finally, anytime all descriptors are clean the timer is
1053	** set to 0.
1054	**
1055	** With TX Multiqueue we need to check every queue's timer,
1056	** if any time out we do the reset.
1057	*/
1058	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1059		IGB_TX_LOCK(txr);
1060		if (txr->watchdog_timer == 0 ||
1061		    (--txr->watchdog_timer)) {
1062			IGB_TX_UNLOCK(txr);
1063			continue;
1064		} else {
1065			tx_hang = TRUE;
1066			IGB_TX_UNLOCK(txr);
1067			break;
1068		}
1069	}
1070	if (tx_hang == FALSE)
1071		return;
1072
1073	/* If we are in this routine because of pause frames, then
1074	 * don't reset the hardware.
1075	 */
1076	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1077	    E1000_STATUS_TXOFF) {
1078		txr = adapter->tx_rings; /* reset pointer */
1079		for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1080			IGB_TX_LOCK(txr);
1081			txr->watchdog_timer = IGB_TX_TIMEOUT;
1082			IGB_TX_UNLOCK(txr);
1083		}
1084		return;
1085	}
1086
1087	if (e1000_check_for_link(&adapter->hw) == 0)
1088		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1089
1090	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1091		device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
1092		    i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
1093		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
1094		device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
1095		    " Next Desc to Clean = %d\n", i, txr->tx_avail,
1096		    txr->next_to_clean);
1097	}
1098
1099	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1100	adapter->watchdog_events++;
1101
1102	igb_init_locked(adapter);
1103}
1104
1105/*********************************************************************
1106 *  Init entry point
1107 *
1108 *  This routine is used in two ways. It is used by the stack as
1109 *  init entry point in network interface structure. It is also used
1110 *  by the driver as a hw/sw initialization routine to get to a
1111 *  consistent state.
1112 *
1113 *  return 0 on success, positive on failure
1114 **********************************************************************/
1115
1116static void
1117igb_init_locked(struct adapter *adapter)
1118{
1119	struct ifnet	*ifp = adapter->ifp;
1120	device_t	dev = adapter->dev;
1121	u32		pba = 0;
1122
1123	INIT_DEBUGOUT("igb_init: begin");
1124
1125	IGB_CORE_LOCK_ASSERT(adapter);
1126
1127	igb_stop(adapter);
1128
1129	/*
1130	 * Packet Buffer Allocation (PBA)
1131	 * Writing PBA sets the receive portion of the buffer
1132	 * the remainder is used for the transmit buffer.
1133	 */
1134	if (adapter->hw.mac.type == e1000_82575) {
1135		INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1136		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1137		E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1138	}
1139
1140	/* Get the latest mac address, User can use a LAA */
1141        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1142              ETHER_ADDR_LEN);
1143
1144	/* Put the address into the Receive Address Array */
1145	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1146
1147	/* Initialize the hardware */
1148	if (igb_hardware_init(adapter)) {
1149		device_printf(dev, "Unable to initialize the hardware\n");
1150		return;
1151	}
1152	igb_update_link_status(adapter);
1153
1154	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1155
1156#ifndef IGB_HW_VLAN_SUPPORT
1157	/* New register interface replaces this but
1158	   waiting on kernel support to be added */
1159	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1160		u32 ctrl;
1161		ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1162		ctrl |= E1000_CTRL_VME;
1163		E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1164	}
1165#endif
1166
1167	/* Set hardware offload abilities */
1168	ifp->if_hwassist = 0;
1169	if (ifp->if_capenable & IFCAP_TXCSUM)
1170		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1171	if (ifp->if_capenable & IFCAP_TSO4)
1172		ifp->if_hwassist |= CSUM_TSO;
1173
1174	/* Configure for OS presence */
1175	igb_init_manageability(adapter);
1176
1177	/* Prepare transmit descriptors and buffers */
1178	igb_setup_transmit_structures(adapter);
1179	igb_initialize_transmit_units(adapter);
1180
1181	/* Setup Multicast table */
1182	igb_set_multi(adapter);
1183
1184	/* Prepare receive descriptors and buffers */
1185	if (igb_setup_receive_structures(adapter)) {
1186		device_printf(dev, "Could not setup receive structures\n");
1187		igb_stop(adapter);
1188		return;
1189	}
1190	igb_initialize_receive_units(adapter);
1191
1192	/* Don't lose promiscuous settings */
1193	igb_set_promisc(adapter);
1194
1195	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1196	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1197
1198	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1199	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1200
1201	if (adapter->msix > 1) /* Set up queue routing */
1202		igb_configure_queues(adapter);
1203	else
1204		E1000_WRITE_REG(&adapter->hw, E1000_EITR(0), DEFAULT_ITR);
1205
1206#ifdef DEVICE_POLLING
1207	/*
1208	 * Only enable interrupts if we are not polling, make sure
1209	 * they are off otherwise.
1210	 */
1211	if (ifp->if_capenable & IFCAP_POLLING)
1212		igb_disable_intr(adapter);
1213	else
1214#endif /* DEVICE_POLLING */
1215	{
1216		/* this clears any pending interrupts */
1217		E1000_READ_REG(&adapter->hw, E1000_ICR);
1218		igb_enable_intr(adapter);
1219		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1220	}
1221
1222#ifdef IGB_TIMESYNC
1223	/* Initialize IEEE 1588 Time sync if available */
1224	if (adapter->hw.mac.type == e1000_82576)
1225		igb_tsync_init(adapter);
1226#endif
1227
1228	/* Don't reset the phy next time init gets called */
1229	adapter->hw.phy.reset_disable = TRUE;
1230}
1231
1232static void
1233igb_init(void *arg)
1234{
1235	struct adapter *adapter = arg;
1236
1237	IGB_CORE_LOCK(adapter);
1238	igb_init_locked(adapter);
1239	IGB_CORE_UNLOCK(adapter);
1240}
1241
1242
1243#ifdef DEVICE_POLLING
1244/*********************************************************************
1245 *
1246 *  Legacy polling routine
1247 *
1248 *********************************************************************/
1249static void
1250igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1251{
1252	struct adapter *adapter = ifp->if_softc;
1253	struct rx_ring *rxr = adapter->rx_rings;
1254	struct tx_ring *txr = adapter->tx_rings;
1255	uint32_t reg_icr;
1256
1257	IGB_CORE_LOCK(adapter);
1258	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1259		IGB_CORE_UNLOCK(adapter);
1260		return;
1261	}
1262
1263	if (cmd == POLL_AND_CHECK_STATUS) {
1264		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1265		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1266			callout_stop(&adapter->timer);
1267			adapter->hw.mac.get_link_status = 1;
1268			igb_update_link_status(adapter);
1269			callout_reset(&adapter->timer, hz,
1270			    igb_local_timer, adapter);
1271		}
1272	}
1273	igb_rxeof(rxr, count);
1274	IGB_CORE_UNLOCK(adapter);
1275
1276	/* With polling we cannot do multiqueue */
1277	IGB_TX_LOCK(txr);
1278	igb_txeof(txr);
1279
1280	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1281		igb_start_locked(txr, ifp);
1282	IGB_TX_UNLOCK(txr);
1283}
1284#endif /* DEVICE_POLLING */
1285
1286
1287static void
1288igb_handle_link(void *context, int pending)
1289{
1290	struct adapter	*adapter = context;
1291	struct ifnet *ifp;
1292
1293	ifp = adapter->ifp;
1294
1295	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1296		return;
1297
1298	IGB_CORE_LOCK(adapter);
1299	callout_stop(&adapter->timer);
1300	igb_update_link_status(adapter);
1301	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1302	IGB_CORE_UNLOCK(adapter);
1303}
1304
1305static void
1306igb_handle_rxtx(void *context, int pending)
1307{
1308	struct adapter	*adapter = context;
1309	struct tx_ring	*txr = adapter->tx_rings;
1310	struct rx_ring	*rxr = adapter->rx_rings;
1311	struct ifnet	*ifp;
1312
1313	ifp = adapter->ifp;
1314
1315	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1316		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1317			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1318		IGB_TX_LOCK(txr);
1319		igb_txeof(txr);
1320
1321		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1322			igb_start_locked(txr, ifp);
1323		IGB_TX_UNLOCK(txr);
1324	}
1325
1326	igb_enable_intr(adapter);
1327}
1328
1329static void
1330igb_handle_rx(void *context, int pending)
1331{
1332	struct rx_ring	*rxr = context;
1333	struct adapter	*adapter = rxr->adapter;
1334	struct ifnet	*ifp = adapter->ifp;
1335
1336	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1337		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1338			/* More to clean, schedule another task */
1339			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1340
1341}
1342
1343static void
1344igb_handle_tx(void *context, int pending)
1345{
1346	struct tx_ring	*txr = context;
1347	struct adapter	*adapter = txr->adapter;
1348	struct ifnet	*ifp = adapter->ifp;
1349
1350	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1351		IGB_TX_LOCK(txr);
1352		igb_txeof(txr);
1353		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1354			igb_start_locked(txr, ifp);
1355		IGB_TX_UNLOCK(txr);
1356	}
1357}
1358
1359
1360/*********************************************************************
1361 *
1362 *  MSI/Legacy Deferred
1363 *  Interrupt Service routine
1364 *
1365 *********************************************************************/
1366static int
1367igb_irq_fast(void *arg)
1368{
1369	struct adapter	*adapter = arg;
1370	struct ifnet	*ifp = adapter->ifp;
1371	uint32_t	reg_icr;
1372
1373	/* Should not happen, but... */
1374	if (ifp->if_capenable & IFCAP_POLLING)
1375                return FILTER_STRAY;
1376
1377	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1378
1379	/* Hot eject?  */
1380	if (reg_icr == 0xffffffff)
1381		return FILTER_STRAY;
1382
1383	/* Definitely not our interrupt.  */
1384	if (reg_icr == 0x0)
1385		return FILTER_STRAY;
1386
1387	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1388		return FILTER_STRAY;
1389
1390	/*
1391	 * Mask interrupts until the taskqueue is finished running.  This is
1392	 * cheap, just assume that it is needed.  This also works around the
1393	 * MSI message reordering errata on certain systems.
1394	 */
1395	igb_disable_intr(adapter);
1396	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1397
1398	/* Link status change */
1399	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1400		adapter->hw.mac.get_link_status = 1;
1401		taskqueue_enqueue(adapter->tq, &adapter->link_task);
1402	}
1403
1404	if (reg_icr & E1000_ICR_RXO)
1405		adapter->rx_overruns++;
1406	return FILTER_HANDLED;
1407}
1408
1409
1410/*********************************************************************
1411 *
1412 *  MSIX TX Interrupt Service routine
1413 *
1414 **********************************************************************/
1415
1416static void
1417igb_msix_tx(void *arg)
1418{
1419	struct tx_ring *txr = arg;
1420	struct adapter *adapter = txr->adapter;
1421	struct ifnet	*ifp = adapter->ifp;
1422
1423	++txr->tx_irq;
1424	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1425		IGB_TX_LOCK(txr);
1426		igb_txeof(txr);
1427		IGB_TX_UNLOCK(txr);
1428		taskqueue_enqueue(adapter->tq, &txr->tx_task);
1429	}
1430	/* Reenable this interrupt */
1431	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1432	return;
1433}
1434
1435/*********************************************************************
1436 *
1437 *  MSIX RX Interrupt Service routine
1438 *
1439 **********************************************************************/
1440
1441static void
1442igb_msix_rx(void *arg)
1443{
1444	struct rx_ring *rxr = arg;
1445	struct adapter *adapter = rxr->adapter;
1446	struct ifnet	*ifp = adapter->ifp;
1447
1448	++rxr->rx_irq;
1449	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1450		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1451			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1452	/* Reenable this interrupt */
1453	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1454	return;
1455}
1456
1457/*********************************************************************
1458 *
1459 *  MSIX Link Interrupt Service routine
1460 *
1461 **********************************************************************/
1462
1463static void
1464igb_msix_link(void *arg)
1465{
1466	struct adapter	*adapter = arg;
1467	u32       	icr;
1468
1469	++adapter->link_irq;
1470	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1471	if (!(icr & E1000_ICR_LSC))
1472		goto spurious;
1473	adapter->hw.mac.get_link_status = 1;
1474	taskqueue_enqueue(adapter->tq, &adapter->link_task);
1475
1476spurious:
1477	/* Rearm */
1478	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1479	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1480	return;
1481}
1482
1483
1484/*********************************************************************
1485 *
1486 *  Media Ioctl callback
1487 *
1488 *  This routine is called whenever the user queries the status of
1489 *  the interface using ifconfig.
1490 *
1491 **********************************************************************/
1492static void
1493igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1494{
1495	struct adapter *adapter = ifp->if_softc;
1496	u_char fiber_type = IFM_1000_SX;
1497
1498	INIT_DEBUGOUT("igb_media_status: begin");
1499
1500	IGB_CORE_LOCK(adapter);
1501	igb_update_link_status(adapter);
1502
1503	ifmr->ifm_status = IFM_AVALID;
1504	ifmr->ifm_active = IFM_ETHER;
1505
1506	if (!adapter->link_active) {
1507		IGB_CORE_UNLOCK(adapter);
1508		return;
1509	}
1510
1511	ifmr->ifm_status |= IFM_ACTIVE;
1512
1513	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1514	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1515		ifmr->ifm_active |= fiber_type | IFM_FDX;
1516	else {
1517		switch (adapter->link_speed) {
1518		case 10:
1519			ifmr->ifm_active |= IFM_10_T;
1520			break;
1521		case 100:
1522			ifmr->ifm_active |= IFM_100_TX;
1523			break;
1524		case 1000:
1525			ifmr->ifm_active |= IFM_1000_T;
1526			break;
1527		}
1528		if (adapter->link_duplex == FULL_DUPLEX)
1529			ifmr->ifm_active |= IFM_FDX;
1530		else
1531			ifmr->ifm_active |= IFM_HDX;
1532	}
1533	IGB_CORE_UNLOCK(adapter);
1534}
1535
1536/*********************************************************************
1537 *
1538 *  Media Ioctl callback
1539 *
1540 *  This routine is called when the user changes speed/duplex using
1541 *  media/mediopt option with ifconfig.
1542 *
1543 **********************************************************************/
1544static int
1545igb_media_change(struct ifnet *ifp)
1546{
1547	struct adapter *adapter = ifp->if_softc;
1548	struct ifmedia  *ifm = &adapter->media;
1549
1550	INIT_DEBUGOUT("igb_media_change: begin");
1551
1552	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1553		return (EINVAL);
1554
1555	IGB_CORE_LOCK(adapter);
1556	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1557	case IFM_AUTO:
1558		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1559		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1560		break;
1561	case IFM_1000_LX:
1562	case IFM_1000_SX:
1563	case IFM_1000_T:
1564		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1565		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1566		break;
1567	case IFM_100_TX:
1568		adapter->hw.mac.autoneg = FALSE;
1569		adapter->hw.phy.autoneg_advertised = 0;
1570		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1571			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1572		else
1573			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1574		break;
1575	case IFM_10_T:
1576		adapter->hw.mac.autoneg = FALSE;
1577		adapter->hw.phy.autoneg_advertised = 0;
1578		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1579			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1580		else
1581			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1582		break;
1583	default:
1584		device_printf(adapter->dev, "Unsupported media type\n");
1585	}
1586
1587	/* As the speed/duplex settings my have changed we need to
1588	 * reset the PHY.
1589	 */
1590	adapter->hw.phy.reset_disable = FALSE;
1591
1592	igb_init_locked(adapter);
1593	IGB_CORE_UNLOCK(adapter);
1594
1595	return (0);
1596}
1597
1598
1599/*********************************************************************
1600 *
1601 *  This routine maps the mbufs to Advanced TX descriptors.
1602 *  used by the 82575 adapter.
1603 *
1604 **********************************************************************/
1605
1606static int
1607igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1608{
1609	struct adapter		*adapter = txr->adapter;
1610	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1611	bus_dmamap_t		map;
1612	struct igb_buffer	*tx_buffer, *tx_buffer_mapped;
1613	union e1000_adv_tx_desc	*txd = NULL;
1614	struct mbuf		*m_head;
1615	u32			olinfo_status = 0, cmd_type_len = 0;
1616	int			nsegs, i, j, error, first, last = 0;
1617	u32			hdrlen = 0, offload = 0;
1618
1619	m_head = *m_headp;
1620
1621
1622	/* Set basic descriptor constants */
1623	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1624	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1625	if (m_head->m_flags & M_VLANTAG)
1626		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1627
1628        /*
1629         * Force a cleanup if number of TX descriptors
1630         * available hits the threshold
1631         */
1632	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1633		igb_txeof(txr);
1634		/* Now do we at least have a minimal? */
1635		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1636			txr->no_desc_avail++;
1637			return (ENOBUFS);
1638		}
1639	}
1640
1641	/*
1642         * Map the packet for DMA.
1643	 *
1644	 * Capture the first descriptor index,
1645	 * this descriptor will have the index
1646	 * of the EOP which is the only one that
1647	 * now gets a DONE bit writeback.
1648	 */
1649	first = txr->next_avail_desc;
1650	tx_buffer = &txr->tx_buffers[first];
1651	tx_buffer_mapped = tx_buffer;
1652	map = tx_buffer->map;
1653
1654	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1655	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1656
1657	if (error == EFBIG) {
1658		struct mbuf *m;
1659
1660		m = m_defrag(*m_headp, M_DONTWAIT);
1661		if (m == NULL) {
1662			adapter->mbuf_alloc_failed++;
1663			m_freem(*m_headp);
1664			*m_headp = NULL;
1665			return (ENOBUFS);
1666		}
1667		*m_headp = m;
1668
1669		/* Try it again */
1670		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1671		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1672
1673		if (error == ENOMEM) {
1674			adapter->no_tx_dma_setup++;
1675			return (error);
1676		} else if (error != 0) {
1677			adapter->no_tx_dma_setup++;
1678			m_freem(*m_headp);
1679			*m_headp = NULL;
1680			return (error);
1681		}
1682	} else if (error == ENOMEM) {
1683		adapter->no_tx_dma_setup++;
1684		return (error);
1685	} else if (error != 0) {
1686		adapter->no_tx_dma_setup++;
1687		m_freem(*m_headp);
1688		*m_headp = NULL;
1689		return (error);
1690	}
1691
1692	/* Check again to be sure we have enough descriptors */
1693        if (nsegs > (txr->tx_avail - 2)) {
1694                txr->no_desc_avail++;
1695		bus_dmamap_unload(txr->txtag, map);
1696		return (ENOBUFS);
1697        }
1698	m_head = *m_headp;
1699
1700        /*
1701         * Set up the context descriptor:
1702         * used when any hardware offload is done.
1703	 * This includes CSUM, VLAN, and TSO. It
1704	 * will use the first descriptor.
1705         */
1706        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1707		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1708			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1709			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1710			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1711		} else
1712			return (ENXIO);
1713	} else
1714		/* Do all other context descriptor setup */
1715	offload = igb_tx_ctx_setup(txr, m_head);
1716	if (offload == TRUE)
1717		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1718#ifdef IGB_TIMESYNC
1719	if (offload == IGB_TIMESTAMP)
1720		cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
1721#endif
1722	/* Calculate payload length */
1723	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1724	    << E1000_ADVTXD_PAYLEN_SHIFT);
1725
1726	/* Set up our transmit descriptors */
1727	i = txr->next_avail_desc;
1728	for (j = 0; j < nsegs; j++) {
1729		bus_size_t seg_len;
1730		bus_addr_t seg_addr;
1731
1732		tx_buffer = &txr->tx_buffers[i];
1733		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1734		seg_addr = segs[j].ds_addr;
1735		seg_len  = segs[j].ds_len;
1736
1737		txd->read.buffer_addr = htole64(seg_addr);
1738		txd->read.cmd_type_len = htole32(
1739		    adapter->txd_cmd | cmd_type_len | seg_len);
1740		txd->read.olinfo_status = htole32(olinfo_status);
1741		last = i;
1742		if (++i == adapter->num_tx_desc)
1743			i = 0;
1744		tx_buffer->m_head = NULL;
1745		tx_buffer->next_eop = -1;
1746	}
1747
1748	txr->next_avail_desc = i;
1749	txr->tx_avail -= nsegs;
1750
1751        tx_buffer->m_head = m_head;
1752	tx_buffer_mapped->map = tx_buffer->map;
1753	tx_buffer->map = map;
1754        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1755
1756        /*
1757         * Last Descriptor of Packet
1758	 * needs End Of Packet (EOP)
1759	 * and Report Status (RS)
1760         */
1761        txd->read.cmd_type_len |=
1762	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1763	/*
1764	 * Keep track in the first buffer which
1765	 * descriptor will be written back
1766	 */
1767	tx_buffer = &txr->tx_buffers[first];
1768	tx_buffer->next_eop = last;
1769
1770	/*
1771	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1772	 * that this frame is available to transmit.
1773	 */
1774	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1775	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1776	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1777	++txr->tx_packets;
1778
1779	return (0);
1780
1781}
1782
1783static void
1784igb_set_promisc(struct adapter *adapter)
1785{
1786	struct ifnet	*ifp = adapter->ifp;
1787	uint32_t	reg_rctl;
1788
1789	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1790
1791	if (ifp->if_flags & IFF_PROMISC) {
1792		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1793		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1794	} else if (ifp->if_flags & IFF_ALLMULTI) {
1795		reg_rctl |= E1000_RCTL_MPE;
1796		reg_rctl &= ~E1000_RCTL_UPE;
1797		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1798	}
1799}
1800
1801static void
1802igb_disable_promisc(struct adapter *adapter)
1803{
1804	uint32_t	reg_rctl;
1805
1806	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1807
1808	reg_rctl &=  (~E1000_RCTL_UPE);
1809	reg_rctl &=  (~E1000_RCTL_MPE);
1810	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1811}
1812
1813
1814/*********************************************************************
1815 *  Multicast Update
1816 *
1817 *  This routine is called whenever multicast address list is updated.
1818 *
1819 **********************************************************************/
1820
1821static void
1822igb_set_multi(struct adapter *adapter)
1823{
1824	struct ifnet	*ifp = adapter->ifp;
1825	struct ifmultiaddr *ifma;
1826	uint32_t reg_rctl = 0;
1827	uint8_t  mta[512]; /* Largest MTS is 4096 bits */
1828	int mcnt = 0;
1829
1830	IOCTL_DEBUGOUT("igb_set_multi: begin");
1831
1832	IF_ADDR_LOCK(ifp);
1833	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1834		if (ifma->ifma_addr->sa_family != AF_LINK)
1835			continue;
1836
1837		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1838			break;
1839
1840		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1841		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1842		mcnt++;
1843	}
1844	IF_ADDR_UNLOCK(ifp);
1845
1846	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1847		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1848		reg_rctl |= E1000_RCTL_MPE;
1849		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1850	} else
1851		e1000_update_mc_addr_list(&adapter->hw, mta,
1852		    mcnt, 1, adapter->hw.mac.rar_entry_count);
1853}
1854
1855
1856/*********************************************************************
1857 *  Timer routine
1858 *
1859 *  This routine checks for link status and updates statistics.
1860 *
1861 **********************************************************************/
1862
1863static void
1864igb_local_timer(void *arg)
1865{
1866	struct adapter	*adapter = arg;
1867	struct ifnet	*ifp = adapter->ifp;
1868
1869	IGB_CORE_LOCK_ASSERT(adapter);
1870
1871	igb_update_link_status(adapter);
1872	igb_update_stats_counters(adapter);
1873
1874	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1875		igb_print_hw_stats(adapter);
1876
1877	/*
1878	 * Each second we check the watchdog to
1879	 * protect against hardware hangs.
1880	 */
1881	igb_watchdog(adapter);
1882
1883	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1884
1885}
1886
1887static void
1888igb_update_link_status(struct adapter *adapter)
1889{
1890	struct e1000_hw *hw = &adapter->hw;
1891	struct ifnet *ifp = adapter->ifp;
1892	device_t dev = adapter->dev;
1893	struct tx_ring *txr = adapter->tx_rings;
1894	u32 link_check = 0;
1895
1896	/* Get the cached link value or read for real */
1897        switch (hw->phy.media_type) {
1898        case e1000_media_type_copper:
1899                if (hw->mac.get_link_status) {
1900			/* Do the work to read phy */
1901                        e1000_check_for_link(hw);
1902                        link_check = !hw->mac.get_link_status;
1903                } else
1904                        link_check = TRUE;
1905                break;
1906        case e1000_media_type_fiber:
1907                e1000_check_for_link(hw);
1908                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1909                                 E1000_STATUS_LU);
1910                break;
1911        case e1000_media_type_internal_serdes:
1912                e1000_check_for_link(hw);
1913                link_check = adapter->hw.mac.serdes_has_link;
1914                break;
1915        default:
1916        case e1000_media_type_unknown:
1917                break;
1918        }
1919
1920	/* Now we check if a transition has happened */
1921	if (link_check && (adapter->link_active == 0)) {
1922		e1000_get_speed_and_duplex(&adapter->hw,
1923		    &adapter->link_speed, &adapter->link_duplex);
1924		if (bootverbose)
1925			device_printf(dev, "Link is up %d Mbps %s\n",
1926			    adapter->link_speed,
1927			    ((adapter->link_duplex == FULL_DUPLEX) ?
1928			    "Full Duplex" : "Half Duplex"));
1929		adapter->link_active = 1;
1930		ifp->if_baudrate = adapter->link_speed * 1000000;
1931		if_link_state_change(ifp, LINK_STATE_UP);
1932	} else if (!link_check && (adapter->link_active == 1)) {
1933		ifp->if_baudrate = adapter->link_speed = 0;
1934		adapter->link_duplex = 0;
1935		if (bootverbose)
1936			device_printf(dev, "Link is Down\n");
1937		adapter->link_active = 0;
1938		if_link_state_change(ifp, LINK_STATE_DOWN);
1939		/* Turn off watchdogs */
1940		for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
1941			txr->watchdog_timer = FALSE;
1942	}
1943}
1944
1945/*********************************************************************
1946 *
1947 *  This routine disables all traffic on the adapter by issuing a
1948 *  global reset on the MAC and deallocates TX/RX buffers.
1949 *
1950 **********************************************************************/
1951
1952static void
1953igb_stop(void *arg)
1954{
1955	struct adapter	*adapter = arg;
1956	struct ifnet	*ifp = adapter->ifp;
1957
1958	IGB_CORE_LOCK_ASSERT(adapter);
1959
1960	INIT_DEBUGOUT("igb_stop: begin");
1961
1962	igb_disable_intr(adapter);
1963
1964	callout_stop(&adapter->timer);
1965
1966	/* Tell the stack that the interface is no longer active */
1967	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1968
1969#ifdef IGB_TIMESYNC
1970	/* Disable IEEE 1588 Time sync */
1971	if (adapter->hw.mac.type == e1000_82576)
1972		igb_tsync_disable(adapter);
1973#endif
1974
1975	e1000_reset_hw(&adapter->hw);
1976	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
1977}
1978
1979
1980/*********************************************************************
1981 *
1982 *  Determine hardware revision.
1983 *
1984 **********************************************************************/
1985static void
1986igb_identify_hardware(struct adapter *adapter)
1987{
1988	device_t dev = adapter->dev;
1989
1990	/* Make sure our PCI config space has the necessary stuff set */
1991	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
1992	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
1993	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
1994		device_printf(dev, "Memory Access and/or Bus Master bits "
1995		    "were not set!\n");
1996		adapter->hw.bus.pci_cmd_word |=
1997		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
1998		pci_write_config(dev, PCIR_COMMAND,
1999		    adapter->hw.bus.pci_cmd_word, 2);
2000	}
2001
2002	/* Save off the information about this board */
2003	adapter->hw.vendor_id = pci_get_vendor(dev);
2004	adapter->hw.device_id = pci_get_device(dev);
2005	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2006	adapter->hw.subsystem_vendor_id =
2007	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2008	adapter->hw.subsystem_device_id =
2009	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2010
2011	/* Do Shared Code Init and Setup */
2012	if (e1000_set_mac_type(&adapter->hw)) {
2013		device_printf(dev, "Setup init failure\n");
2014		return;
2015	}
2016}
2017
2018static int
2019igb_allocate_pci_resources(struct adapter *adapter)
2020{
2021	device_t	dev = adapter->dev;
2022	int		rid, error = 0;
2023
2024	rid = PCIR_BAR(0);
2025	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2026	    &rid, RF_ACTIVE);
2027	if (adapter->pci_mem == NULL) {
2028		device_printf(dev, "Unable to allocate bus resource: memory\n");
2029		return (ENXIO);
2030	}
2031	adapter->osdep.mem_bus_space_tag =
2032	    rman_get_bustag(adapter->pci_mem);
2033	adapter->osdep.mem_bus_space_handle =
2034	    rman_get_bushandle(adapter->pci_mem);
2035	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2036
2037	/*
2038	** Init the resource arrays
2039	*/
2040	for (int i = 0; i < IGB_MSIX_VEC; i++) {
2041		adapter->rid[i] = i + 1; /* MSI/X RID starts at 1 */
2042		adapter->tag[i] = NULL;
2043		adapter->res[i] = NULL;
2044	}
2045
2046	adapter->num_tx_queues = 1; /* Defaults for Legacy or MSI */
2047	adapter->num_rx_queues = 1;
2048
2049	/* This will setup either MSI/X or MSI */
2050	adapter->msix = igb_setup_msix(adapter);
2051
2052	adapter->hw.back = &adapter->osdep;
2053
2054	return (error);
2055}
2056
2057/*********************************************************************
2058 *
2059 *  Setup the Legacy or MSI Interrupt handler
2060 *
2061 **********************************************************************/
2062static int
2063igb_allocate_legacy(struct adapter *adapter)
2064{
2065	device_t dev = adapter->dev;
2066	int error;
2067
2068	/* Turn off all interrupts */
2069	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2070
2071	/* Legacy RID at 0 */
2072	if (adapter->msix == 0)
2073		adapter->rid[0] = 0;
2074
2075	/* We allocate a single interrupt resource */
2076	adapter->res[0] = bus_alloc_resource_any(dev,
2077	    SYS_RES_IRQ, &adapter->rid[0], RF_SHAREABLE | RF_ACTIVE);
2078	if (adapter->res[0] == NULL) {
2079		device_printf(dev, "Unable to allocate bus resource: "
2080		    "interrupt\n");
2081		return (ENXIO);
2082	}
2083
2084	/*
2085	 * Try allocating a fast interrupt and the associated deferred
2086	 * processing contexts.
2087	 */
2088	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2089	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2090	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2091	    taskqueue_thread_enqueue, &adapter->tq);
2092	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2093	    device_get_nameunit(adapter->dev));
2094	if ((error = bus_setup_intr(dev, adapter->res[0],
2095	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter,
2096	    &adapter->tag[0])) != 0) {
2097		device_printf(dev, "Failed to register fast interrupt "
2098			    "handler: %d\n", error);
2099		taskqueue_free(adapter->tq);
2100		adapter->tq = NULL;
2101		return (error);
2102	}
2103
2104	return (0);
2105}
2106
2107
2108/*********************************************************************
2109 *
2110 *  Setup the MSIX Interrupt handlers:
2111 *
2112 **********************************************************************/
2113static int
2114igb_allocate_msix(struct adapter *adapter)
2115{
2116	device_t dev = adapter->dev;
2117	struct tx_ring *txr = adapter->tx_rings;
2118	struct rx_ring *rxr = adapter->rx_rings;
2119	int error, vector = 0;
2120
2121	/*
2122	 * Setup the interrupt handlers
2123	 */
2124
2125	/* TX Setup */
2126	for (int i = 0; i < adapter->num_tx_queues; i++, vector++, txr++) {
2127		adapter->res[vector] = bus_alloc_resource_any(dev,
2128		    SYS_RES_IRQ, &adapter->rid[vector],
2129		    RF_SHAREABLE | RF_ACTIVE);
2130		if (adapter->res[vector] == NULL) {
2131			device_printf(dev,
2132			    "Unable to allocate bus resource: "
2133			    "MSIX TX Interrupt\n");
2134			return (ENXIO);
2135		}
2136		error = bus_setup_intr(dev, adapter->res[vector],
2137	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_tx,
2138		    txr, &adapter->tag[vector]);
2139		if (error) {
2140			adapter->res[vector] = NULL;
2141			device_printf(dev, "Failed to register TX handler");
2142			return (error);
2143		}
2144		/* Make tasklet for deferred handling - one per queue */
2145		TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2146		if (adapter->hw.mac.type == e1000_82575) {
2147			txr->eims = E1000_EICR_TX_QUEUE0 << i;
2148			/* MSIXBM registers start at 0 */
2149			txr->msix = adapter->rid[vector] - 1;
2150		} else {
2151			txr->eims = 1 << vector;
2152			txr->msix = vector;
2153		}
2154	}
2155
2156	/* RX Setup */
2157	for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rxr++) {
2158		adapter->res[vector] = bus_alloc_resource_any(dev,
2159		    SYS_RES_IRQ, &adapter->rid[vector],
2160		    RF_SHAREABLE | RF_ACTIVE);
2161		if (adapter->res[vector] == NULL) {
2162			device_printf(dev,
2163			    "Unable to allocate bus resource: "
2164			    "MSIX RX Interrupt\n");
2165			return (ENXIO);
2166		}
2167		error = bus_setup_intr(dev, adapter->res[vector],
2168	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_rx,
2169		    rxr, &adapter->tag[vector]);
2170		if (error) {
2171			adapter->res[vector] = NULL;
2172			device_printf(dev, "Failed to register RX handler");
2173			return (error);
2174		}
2175		TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2176		if (adapter->hw.mac.type == e1000_82575) {
2177			rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2178			rxr->msix = adapter->rid[vector] - 1;
2179		} else {
2180			rxr->eims = 1 << vector;
2181			rxr->msix = vector;
2182		}
2183	}
2184
2185	/* And Link */
2186	adapter->res[vector] = bus_alloc_resource_any(dev,
2187	    SYS_RES_IRQ, &adapter->rid[vector],
2188		    RF_SHAREABLE | RF_ACTIVE);
2189	if (adapter->res[vector] == NULL) {
2190		device_printf(dev,
2191		    "Unable to allocate bus resource: "
2192		    "MSIX Link Interrupt\n");
2193		return (ENXIO);
2194	}
2195	if ((error = bus_setup_intr(dev, adapter->res[vector],
2196	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link,
2197	    adapter, &adapter->tag[vector])) != 0) {
2198		device_printf(dev, "Failed to register Link handler");
2199		return (error);
2200	}
2201	if (adapter->hw.mac.type == e1000_82575)
2202		adapter->linkvec = adapter->rid[vector] - 1;
2203	else
2204		adapter->linkvec = vector;
2205
2206	/* Make tasklet for deferred link interrupt handling */
2207	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2208
2209	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2210	    taskqueue_thread_enqueue, &adapter->tq);
2211	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2212	    device_get_nameunit(adapter->dev));
2213
2214	return (0);
2215}
2216
2217static void
2218igb_configure_queues(struct adapter *adapter)
2219{
2220	struct	e1000_hw *hw = &adapter->hw;
2221	struct	tx_ring	*txr;
2222	struct	rx_ring	*rxr;
2223
2224	/* Turn on MSIX */
2225	/*
2226	** 82576 uses IVARs to route MSI/X
2227	** interrupts, its not very intuitive,
2228	** study the code carefully :)
2229	*/
2230	if (adapter->hw.mac.type == e1000_82576) {
2231		u32	ivar = 0;
2232		/* First turn on the capability */
2233		E1000_WRITE_REG(hw, E1000_GPIE,
2234		    E1000_GPIE_MSIX_MODE |
2235		    E1000_GPIE_EIAME |
2236		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2237	 	/* Set the MSIX interrupt rate. */
2238		for (int i = 0; i < IGB_MSIX_VEC; i++)
2239			E1000_WRITE_REG(&adapter->hw,
2240			    E1000_EITR(i), DEFAULT_ITR);
2241		/* RX */
2242		for (int i = 0; i < adapter->num_rx_queues; i++) {
2243			u32 index = i & 0x7; /* Each IVAR has two entries */
2244			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2245			rxr = &adapter->rx_rings[i];
2246			if (i < 8) {
2247				ivar &= 0xFFFFFF00;
2248				ivar |= rxr->msix | E1000_IVAR_VALID;
2249			} else {
2250				ivar &= 0xFF00FFFF;
2251				ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2252			}
2253			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2254			adapter->eims_mask |= rxr->eims;
2255		}
2256		/* TX */
2257		for (int i = 0; i < adapter->num_tx_queues; i++) {
2258			u32 index = i & 0x7; /* Each IVAR has two entries */
2259			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2260			txr = &adapter->tx_rings[i];
2261			if (i < 8) {
2262				ivar &= 0xFFFF00FF;
2263				ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2264			} else {
2265				ivar &= 0x00FFFFFF;
2266				ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2267			}
2268			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2269			adapter->eims_mask |= txr->eims;
2270		}
2271
2272		/* And for the link interrupt */
2273		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2274		adapter->link_mask = 1 << adapter->linkvec;
2275		adapter->eims_mask |= adapter->link_mask;
2276		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2277	} else
2278	{ /* 82575 */
2279		int tmp;
2280
2281                /* enable MSI-X PBA support*/
2282		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2283                tmp |= E1000_CTRL_EXT_PBA_CLR;
2284                /* Auto-Mask interrupts upon ICR read. */
2285                tmp |= E1000_CTRL_EXT_EIAME;
2286                tmp |= E1000_CTRL_EXT_IRCA;
2287                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2288
2289	 	/* Set the interrupt throttling rate. */
2290		for (int i = 0; i < IGB_MSIX_VEC; i++)
2291			E1000_WRITE_REG(&adapter->hw,
2292			    E1000_EITR(i), DEFAULT_ITR);
2293
2294		/* TX */
2295		for (int i = 0; i < adapter->num_tx_queues; i++) {
2296			txr = &adapter->tx_rings[i];
2297			E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2298			    txr->eims);
2299			adapter->eims_mask |= txr->eims;
2300		}
2301
2302		/* RX */
2303		for (int i = 0; i < adapter->num_rx_queues; i++) {
2304			rxr = &adapter->rx_rings[i];
2305			E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2306			    rxr->eims);
2307			adapter->eims_mask |= rxr->eims;
2308		}
2309
2310		/* Link */
2311		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2312		    E1000_EIMS_OTHER);
2313		adapter->link_mask |= E1000_EIMS_OTHER;
2314		adapter->eims_mask |= adapter->link_mask;
2315	}
2316	return;
2317}
2318
2319
2320static void
2321igb_free_pci_resources(struct adapter *adapter)
2322{
2323	device_t dev = adapter->dev;
2324
2325	/* Make sure the for loop below runs once */
2326	if (adapter->msix == 0)
2327		adapter->msix = 1;
2328
2329	/*
2330	 * First release all the interrupt resources:
2331	 *      notice that since these are just kept
2332	 *      in an array we can do the same logic
2333	 *      whether its MSIX or just legacy.
2334	 */
2335	for (int i = 0; i < adapter->msix; i++) {
2336		if (adapter->tag[i] != NULL) {
2337			bus_teardown_intr(dev, adapter->res[i],
2338			    adapter->tag[i]);
2339			adapter->tag[i] = NULL;
2340		}
2341		if (adapter->res[i] != NULL) {
2342			bus_release_resource(dev, SYS_RES_IRQ,
2343			    adapter->rid[i], adapter->res[i]);
2344		}
2345	}
2346
2347	if (adapter->msix)
2348		pci_release_msi(dev);
2349
2350	if (adapter->msix_mem != NULL)
2351		bus_release_resource(dev, SYS_RES_MEMORY,
2352		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2353
2354	if (adapter->pci_mem != NULL)
2355		bus_release_resource(dev, SYS_RES_MEMORY,
2356		    PCIR_BAR(0), adapter->pci_mem);
2357
2358}
2359
2360/*
2361 * Setup Either MSI/X or MSI
2362 */
2363static int
2364igb_setup_msix(struct adapter *adapter)
2365{
2366	device_t dev = adapter->dev;
2367	int rid, want, queues, msgs;
2368
2369	/* First try MSI/X */
2370	rid = PCIR_BAR(IGB_MSIX_BAR);
2371	adapter->msix_mem = bus_alloc_resource_any(dev,
2372	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2373       	if (!adapter->msix_mem) {
2374		/* May not be enabled */
2375		device_printf(adapter->dev,
2376		    "Unable to map MSIX table \n");
2377		goto msi;
2378	}
2379
2380	msgs = pci_msix_count(dev);
2381	if (msgs == 0) { /* system has msix disabled */
2382		bus_release_resource(dev, SYS_RES_MEMORY,
2383		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2384		adapter->msix_mem = NULL;
2385		goto msi;
2386	}
2387
2388	/* Limit by the number set in header */
2389	if (msgs > IGB_MSIX_VEC)
2390		msgs = IGB_MSIX_VEC;
2391
2392	/* Figure out a reasonable auto config value */
2393	queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2394
2395	if (igb_tx_queues == 0)
2396		igb_tx_queues = queues;
2397	if (igb_rx_queues == 0)
2398		igb_rx_queues = queues;
2399	want = igb_tx_queues + igb_rx_queues + 1;
2400	if (msgs >= want)
2401		msgs = want;
2402	else {
2403               	device_printf(adapter->dev,
2404		    "MSIX Configuration Problem, "
2405		    "%d vectors configured, but %d queues wanted!\n",
2406		    msgs, want);
2407		return (ENXIO);
2408	}
2409	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2410               	device_printf(adapter->dev,
2411		    "Using MSIX interrupts with %d vectors\n", msgs);
2412		adapter->num_tx_queues = igb_tx_queues;
2413		adapter->num_rx_queues = igb_rx_queues;
2414		return (msgs);
2415	}
2416msi:
2417       	msgs = pci_msi_count(dev);
2418       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2419               	device_printf(adapter->dev,"Using MSI interrupt\n");
2420	return (msgs);
2421}
2422
2423/*********************************************************************
2424 *
2425 *  Initialize the hardware to a configuration
2426 *  as specified by the adapter structure.
2427 *
2428 **********************************************************************/
2429static int
2430igb_hardware_init(struct adapter *adapter)
2431{
2432	device_t	dev = adapter->dev;
2433	u32		rx_buffer_size;
2434
2435	INIT_DEBUGOUT("igb_hardware_init: begin");
2436
2437	/* Issue a global reset */
2438	e1000_reset_hw(&adapter->hw);
2439
2440	/* Let the firmware know the OS is in control */
2441	igb_get_hw_control(adapter);
2442
2443	/*
2444	 * These parameters control the automatic generation (Tx) and
2445	 * response (Rx) to Ethernet PAUSE frames.
2446	 * - High water mark should allow for at least two frames to be
2447	 *   received after sending an XOFF.
2448	 * - Low water mark works best when it is very near the high water mark.
2449	 *   This allows the receiver to restart by sending XON when it has
2450	 *   drained a bit. Here we use an arbitary value of 1500 which will
2451	 *   restart after one full frame is pulled from the buffer. There
2452	 *   could be several smaller frames in the buffer and if so they will
2453	 *   not trigger the XON until their total number reduces the buffer
2454	 *   by 1500.
2455	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2456	 */
2457	if (adapter->hw.mac.type == e1000_82576)
2458		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2459		    E1000_RXPBS) & 0xffff) << 10 );
2460	else
2461		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2462		    E1000_PBA) & 0xffff) << 10 );
2463
2464	adapter->hw.fc.high_water = rx_buffer_size -
2465	    roundup2(adapter->max_frame_size, 1024);
2466	adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2467
2468	adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2469	adapter->hw.fc.send_xon = TRUE;
2470
2471	/* Set Flow control, use the tunable location if sane */
2472	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2473		adapter->hw.fc.type = igb_fc_setting;
2474	else
2475		adapter->hw.fc.type = e1000_fc_none;
2476
2477	if (e1000_init_hw(&adapter->hw) < 0) {
2478		device_printf(dev, "Hardware Initialization Failed\n");
2479		return (EIO);
2480	}
2481
2482	e1000_check_for_link(&adapter->hw);
2483
2484	return (0);
2485}
2486
2487/*********************************************************************
2488 *
2489 *  Setup networking device structure and register an interface.
2490 *
2491 **********************************************************************/
2492static void
2493igb_setup_interface(device_t dev, struct adapter *adapter)
2494{
2495	struct ifnet   *ifp;
2496
2497	INIT_DEBUGOUT("igb_setup_interface: begin");
2498
2499	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2500	if (ifp == NULL)
2501		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2502	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2503	ifp->if_mtu = ETHERMTU;
2504	ifp->if_init =  igb_init;
2505	ifp->if_softc = adapter;
2506	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2507	ifp->if_ioctl = igb_ioctl;
2508	ifp->if_start = igb_start;
2509	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2510	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2511	IFQ_SET_READY(&ifp->if_snd);
2512
2513	ether_ifattach(ifp, adapter->hw.mac.addr);
2514
2515	ifp->if_capabilities = ifp->if_capenable = 0;
2516
2517	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2518	ifp->if_capabilities |= IFCAP_TSO4;
2519	ifp->if_capenable = ifp->if_capabilities;
2520
2521	/*
2522	 * Tell the upper layer(s) we support long frames.
2523	 */
2524	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2525	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2526	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2527
2528#ifdef DEVICE_POLLING
2529	if (adapter->msix > 1)
2530		device_printf(adapter->dev, "POLLING not supported with MSIX\n");
2531	else
2532		ifp->if_capabilities |= IFCAP_POLLING;
2533#endif
2534
2535	/*
2536	 * Specify the media types supported by this adapter and register
2537	 * callbacks to update media and link information
2538	 */
2539	ifmedia_init(&adapter->media, IFM_IMASK,
2540	    igb_media_change, igb_media_status);
2541	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2542	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2543		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2544			    0, NULL);
2545		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2546	} else {
2547		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2548		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2549			    0, NULL);
2550		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2551			    0, NULL);
2552		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2553			    0, NULL);
2554		if (adapter->hw.phy.type != e1000_phy_ife) {
2555			ifmedia_add(&adapter->media,
2556				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2557			ifmedia_add(&adapter->media,
2558				IFM_ETHER | IFM_1000_T, 0, NULL);
2559		}
2560	}
2561	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2562	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2563}
2564
2565
2566/*
2567 * Manage DMA'able memory.
2568 */
2569static void
2570igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2571{
2572	if (error)
2573		return;
2574	*(bus_addr_t *) arg = segs[0].ds_addr;
2575}
2576
2577static int
2578igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2579        struct igb_dma_alloc *dma, int mapflags)
2580{
2581	int error;
2582
2583	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2584				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2585				BUS_SPACE_MAXADDR,	/* lowaddr */
2586				BUS_SPACE_MAXADDR,	/* highaddr */
2587				NULL, NULL,		/* filter, filterarg */
2588				size,			/* maxsize */
2589				1,			/* nsegments */
2590				size,			/* maxsegsize */
2591				0,			/* flags */
2592				NULL,			/* lockfunc */
2593				NULL,			/* lockarg */
2594				&dma->dma_tag);
2595	if (error) {
2596		device_printf(adapter->dev,
2597		    "%s: bus_dma_tag_create failed: %d\n",
2598		    __func__, error);
2599		goto fail_0;
2600	}
2601
2602	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2603	    BUS_DMA_NOWAIT, &dma->dma_map);
2604	if (error) {
2605		device_printf(adapter->dev,
2606		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2607		    __func__, (uintmax_t)size, error);
2608		goto fail_2;
2609	}
2610
2611	dma->dma_paddr = 0;
2612	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2613	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2614	if (error || dma->dma_paddr == 0) {
2615		device_printf(adapter->dev,
2616		    "%s: bus_dmamap_load failed: %d\n",
2617		    __func__, error);
2618		goto fail_3;
2619	}
2620
2621	return (0);
2622
2623fail_3:
2624	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2625fail_2:
2626	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2627	bus_dma_tag_destroy(dma->dma_tag);
2628fail_0:
2629	dma->dma_map = NULL;
2630	dma->dma_tag = NULL;
2631
2632	return (error);
2633}
2634
2635static void
2636igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2637{
2638	if (dma->dma_tag == NULL)
2639		return;
2640	if (dma->dma_map != NULL) {
2641		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2642		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2643		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2644		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2645		dma->dma_map = NULL;
2646	}
2647	bus_dma_tag_destroy(dma->dma_tag);
2648	dma->dma_tag = NULL;
2649}
2650
2651
2652/*********************************************************************
2653 *
2654 *  Allocate memory for the transmit and receive rings, and then
2655 *  the descriptors associated with each, called only once at attach.
2656 *
2657 **********************************************************************/
2658static int
2659igb_allocate_queues(struct adapter *adapter)
2660{
2661	device_t dev = adapter->dev;
2662	struct tx_ring *txr;
2663	struct rx_ring *rxr;
2664	int rsize, tsize, error = E1000_SUCCESS;
2665	int txconf = 0, rxconf = 0;
2666	char	name_string[16];
2667
2668	/* First allocate the TX ring struct memory */
2669	if (!(adapter->tx_rings =
2670	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2671	    adapter->num_tx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2672		device_printf(dev, "Unable to allocate TX ring memory\n");
2673		error = ENOMEM;
2674		goto fail;
2675	}
2676	txr = adapter->tx_rings;
2677
2678	/* Next allocate the RX */
2679	if (!(adapter->rx_rings =
2680	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2681	    adapter->num_rx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2682		device_printf(dev, "Unable to allocate RX ring memory\n");
2683		error = ENOMEM;
2684		goto rx_fail;
2685	}
2686	rxr = adapter->rx_rings;
2687
2688	tsize = roundup2(adapter->num_tx_desc *
2689	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2690	/*
2691	 * Now set up the TX queues, txconf is needed to handle the
2692	 * possibility that things fail midcourse and we need to
2693	 * undo memory gracefully
2694	 */
2695	for (int i = 0; i < adapter->num_tx_queues; i++, txconf++) {
2696		/* Set up some basics */
2697		txr = &adapter->tx_rings[i];
2698		txr->adapter = adapter;
2699		txr->me = i;
2700
2701		/* Initialize the TX lock */
2702		snprintf(name_string, sizeof(name_string), "%s:tx(%d)",
2703		    device_get_nameunit(dev), txr->me);
2704		mtx_init(&txr->tx_mtx, name_string, NULL, MTX_DEF);
2705
2706		if (igb_dma_malloc(adapter, tsize,
2707			&txr->txdma, BUS_DMA_NOWAIT)) {
2708			device_printf(dev,
2709			    "Unable to allocate TX Descriptor memory\n");
2710			error = ENOMEM;
2711			goto err_tx_desc;
2712		}
2713		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2714		bzero((void *)txr->tx_base, tsize);
2715
2716        	/* Now allocate transmit buffers for the ring */
2717        	if (igb_allocate_transmit_buffers(txr)) {
2718			device_printf(dev,
2719			    "Critical Failure setting up transmit buffers\n");
2720			error = ENOMEM;
2721			goto err_tx_desc;
2722        	}
2723
2724	}
2725
2726	/*
2727	 * Next the RX queues...
2728	 */
2729	rsize = roundup2(adapter->num_rx_desc *
2730	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2731	for (int i = 0; i < adapter->num_rx_queues; i++, rxconf++) {
2732		rxr = &adapter->rx_rings[i];
2733		rxr->adapter = adapter;
2734		rxr->me = i;
2735
2736		/* Initialize the RX lock */
2737		snprintf(name_string, sizeof(name_string), "%s:rx(%d)",
2738		    device_get_nameunit(dev), txr->me);
2739		mtx_init(&rxr->rx_mtx, name_string, NULL, MTX_DEF);
2740
2741		if (igb_dma_malloc(adapter, rsize,
2742			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2743			device_printf(dev,
2744			    "Unable to allocate RxDescriptor memory\n");
2745			error = ENOMEM;
2746			goto err_rx_desc;
2747		}
2748		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2749		bzero((void *)rxr->rx_base, rsize);
2750
2751        	/* Allocate receive buffers for the ring*/
2752		if (igb_allocate_receive_buffers(rxr)) {
2753			device_printf(dev,
2754			    "Critical Failure setting up receive buffers\n");
2755			error = ENOMEM;
2756			goto err_rx_desc;
2757		}
2758	}
2759
2760	return (0);
2761
2762err_rx_desc:
2763	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2764		igb_dma_free(adapter, &rxr->rxdma);
2765err_tx_desc:
2766	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2767		igb_dma_free(adapter, &txr->txdma);
2768	free(adapter->rx_rings, M_DEVBUF);
2769rx_fail:
2770	free(adapter->tx_rings, M_DEVBUF);
2771fail:
2772	return (error);
2773}
2774
2775/*********************************************************************
2776 *
2777 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2778 *  the information needed to transmit a packet on the wire. This is
2779 *  called only once at attach, setup is done every reset.
2780 *
2781 **********************************************************************/
2782static int
2783igb_allocate_transmit_buffers(struct tx_ring *txr)
2784{
2785	struct adapter *adapter = txr->adapter;
2786	device_t dev = adapter->dev;
2787	struct igb_buffer *txbuf;
2788	int error, i;
2789
2790	/*
2791	 * Setup DMA descriptor areas.
2792	 */
2793	if ((error = bus_dma_tag_create(NULL,		/* parent */
2794			       PAGE_SIZE, 0,		/* alignment, bounds */
2795			       BUS_SPACE_MAXADDR,	/* lowaddr */
2796			       BUS_SPACE_MAXADDR,	/* highaddr */
2797			       NULL, NULL,		/* filter, filterarg */
2798			       IGB_TSO_SIZE,		/* maxsize */
2799			       IGB_MAX_SCATTER,		/* nsegments */
2800			       PAGE_SIZE,		/* maxsegsize */
2801			       0,			/* flags */
2802			       NULL,			/* lockfunc */
2803			       NULL,			/* lockfuncarg */
2804			       &txr->txtag))) {
2805		device_printf(dev,"Unable to allocate TX DMA tag\n");
2806		goto fail;
2807	}
2808
2809	if (!(txr->tx_buffers =
2810	    (struct igb_buffer *) malloc(sizeof(struct igb_buffer) *
2811	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2812		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2813		error = ENOMEM;
2814		goto fail;
2815	}
2816
2817        /* Create the descriptor buffer dma maps */
2818	txbuf = txr->tx_buffers;
2819	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2820		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2821		if (error != 0) {
2822			device_printf(dev, "Unable to create TX DMA map\n");
2823			goto fail;
2824		}
2825	}
2826
2827	return 0;
2828fail:
2829	/* We free all, it handles case where we are in the middle */
2830	igb_free_transmit_structures(adapter);
2831	return (error);
2832}
2833
2834/*********************************************************************
2835 *
2836 *  Initialize a transmit ring.
2837 *
2838 **********************************************************************/
2839static void
2840igb_setup_transmit_ring(struct tx_ring *txr)
2841{
2842	struct adapter *adapter = txr->adapter;
2843	struct igb_buffer *txbuf;
2844	int i;
2845
2846	/* Clear the old ring contents */
2847	bzero((void *)txr->tx_base,
2848	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2849	/* Reset indices */
2850	txr->next_avail_desc = 0;
2851	txr->next_to_clean = 0;
2852
2853	/* Free any existing tx buffers. */
2854        txbuf = txr->tx_buffers;
2855	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2856		if (txbuf->m_head != NULL) {
2857			bus_dmamap_sync(txr->txtag, txbuf->map,
2858			    BUS_DMASYNC_POSTWRITE);
2859			bus_dmamap_unload(txr->txtag, txbuf->map);
2860			m_freem(txbuf->m_head);
2861			txbuf->m_head = NULL;
2862		}
2863		/* clear the watch index */
2864		txbuf->next_eop = -1;
2865        }
2866
2867	/* Set number of descriptors available */
2868	txr->tx_avail = adapter->num_tx_desc;
2869
2870	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2871	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2872
2873}
2874
2875/*********************************************************************
2876 *
2877 *  Initialize all transmit rings.
2878 *
2879 **********************************************************************/
2880static void
2881igb_setup_transmit_structures(struct adapter *adapter)
2882{
2883	struct tx_ring *txr = adapter->tx_rings;
2884
2885	for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
2886		igb_setup_transmit_ring(txr);
2887
2888	return;
2889}
2890
2891/*********************************************************************
2892 *
2893 *  Enable transmit unit.
2894 *
2895 **********************************************************************/
2896static void
2897igb_initialize_transmit_units(struct adapter *adapter)
2898{
2899	struct tx_ring	*txr = adapter->tx_rings;
2900	u32		tctl, txdctl, tipg = 0;
2901
2902	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2903
2904	/* Setup the Base and Length of the Tx Descriptor Rings */
2905	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2906		u64 bus_addr = txr->txdma.dma_paddr;
2907
2908		E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2909		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2910		E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2911		    (uint32_t)(bus_addr >> 32));
2912		E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2913		    (uint32_t)bus_addr);
2914
2915		/* Setup the HW Tx Head and Tail descriptor pointers */
2916		E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2917		E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2918
2919		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2920		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2921		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2922
2923		/* Setup Transmit Descriptor Base Settings */
2924		adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2925
2926		txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
2927		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2928		E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
2929	}
2930
2931	/* Set the default values for the Tx Inter Packet Gap timer */
2932	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2933	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
2934		tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2935	else
2936		tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2937
2938	tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2939	tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2940
2941	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
2942	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
2943	E1000_WRITE_REG(&adapter->hw, E1000_TADV, adapter->tx_abs_int_delay.value);
2944
2945	/* Program the Transmit Control Register */
2946	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2947	tctl &= ~E1000_TCTL_CT;
2948	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2949		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2950
2951	/* This write will effectively turn on the transmit unit. */
2952	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2953
2954}
2955
2956/*********************************************************************
2957 *
2958 *  Free all transmit rings.
2959 *
2960 **********************************************************************/
2961static void
2962igb_free_transmit_structures(struct adapter *adapter)
2963{
2964	struct tx_ring *txr = adapter->tx_rings;
2965
2966	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2967		IGB_TX_LOCK(txr);
2968		igb_free_transmit_buffers(txr);
2969		igb_dma_free(adapter, &txr->txdma);
2970		IGB_TX_UNLOCK(txr);
2971		IGB_TX_LOCK_DESTROY(txr);
2972	}
2973	free(adapter->tx_rings, M_DEVBUF);
2974}
2975
2976/*********************************************************************
2977 *
2978 *  Free transmit ring related data structures.
2979 *
2980 **********************************************************************/
2981static void
2982igb_free_transmit_buffers(struct tx_ring *txr)
2983{
2984	struct adapter *adapter = txr->adapter;
2985	struct igb_buffer *tx_buffer;
2986	int             i;
2987
2988	INIT_DEBUGOUT("free_transmit_ring: begin");
2989
2990	if (txr->tx_buffers == NULL)
2991		return;
2992
2993	tx_buffer = txr->tx_buffers;
2994	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2995		if (tx_buffer->m_head != NULL) {
2996			bus_dmamap_sync(txr->txtag, tx_buffer->map,
2997			    BUS_DMASYNC_POSTWRITE);
2998			bus_dmamap_unload(txr->txtag,
2999			    tx_buffer->map);
3000			m_freem(tx_buffer->m_head);
3001			tx_buffer->m_head = NULL;
3002			if (tx_buffer->map != NULL) {
3003				bus_dmamap_destroy(txr->txtag,
3004				    tx_buffer->map);
3005				tx_buffer->map = NULL;
3006			}
3007		} else if (tx_buffer->map != NULL) {
3008			bus_dmamap_unload(txr->txtag,
3009			    tx_buffer->map);
3010			bus_dmamap_destroy(txr->txtag,
3011			    tx_buffer->map);
3012			tx_buffer->map = NULL;
3013		}
3014	}
3015
3016	if (txr->tx_buffers != NULL) {
3017		free(txr->tx_buffers, M_DEVBUF);
3018		txr->tx_buffers = NULL;
3019	}
3020	if (txr->txtag != NULL) {
3021		bus_dma_tag_destroy(txr->txtag);
3022		txr->txtag = NULL;
3023	}
3024	return;
3025}
3026
3027/**********************************************************************
3028 *
3029 *  Setup work for hardware segmentation offload (TSO) on
3030 *  adapters using advanced tx descriptors (82575)
3031 *
3032 **********************************************************************/
3033static boolean_t
3034igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3035{
3036	struct adapter *adapter = txr->adapter;
3037	struct e1000_adv_tx_context_desc *TXD;
3038	struct igb_buffer        *tx_buffer;
3039	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3040	u32 mss_l4len_idx = 0;
3041	u16 vtag = 0;
3042	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3043	struct ether_vlan_header *eh;
3044	struct ip *ip;
3045	struct tcphdr *th;
3046
3047
3048	/*
3049	 * Determine where frame payload starts.
3050	 * Jump over vlan headers if already present
3051	 */
3052	eh = mtod(mp, struct ether_vlan_header *);
3053	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3054		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3055	else
3056		ehdrlen = ETHER_HDR_LEN;
3057
3058	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3059	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3060		return FALSE;
3061
3062	/* Only supports IPV4 for now */
3063	ctxd = txr->next_avail_desc;
3064	tx_buffer = &txr->tx_buffers[ctxd];
3065	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3066
3067	ip = (struct ip *)(mp->m_data + ehdrlen);
3068	if (ip->ip_p != IPPROTO_TCP)
3069                return FALSE;   /* 0 */
3070	ip->ip_sum = 0;
3071	ip_hlen = ip->ip_hl << 2;
3072	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3073	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3074	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3075	tcp_hlen = th->th_off << 2;
3076	/*
3077	 * Calculate header length, this is used
3078	 * in the transmit desc in igb_xmit
3079	 */
3080	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3081
3082	/* VLAN MACLEN IPLEN */
3083	if (mp->m_flags & M_VLANTAG) {
3084		vtag = htole16(mp->m_pkthdr.ether_vtag);
3085		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3086	}
3087
3088	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3089	vlan_macip_lens |= ip_hlen;
3090	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3091
3092	/* ADV DTYPE TUCMD */
3093	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3094	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3095	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3096	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3097
3098	/* MSS L4LEN IDX */
3099	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3100	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3101	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3102
3103	TXD->seqnum_seed = htole32(0);
3104	tx_buffer->m_head = NULL;
3105	tx_buffer->next_eop = -1;
3106
3107	if (++ctxd == adapter->num_tx_desc)
3108		ctxd = 0;
3109
3110	txr->tx_avail--;
3111	txr->next_avail_desc = ctxd;
3112	return TRUE;
3113}
3114
3115
3116/*********************************************************************
3117 *
3118 *  Context Descriptor setup for VLAN or CSUM
3119 *
3120 **********************************************************************/
3121
3122static int
3123igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3124{
3125	struct adapter *adapter = txr->adapter;
3126	struct e1000_adv_tx_context_desc *TXD;
3127	struct igb_buffer        *tx_buffer;
3128	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3129	struct ether_vlan_header *eh;
3130	struct ip *ip = NULL;
3131	struct ip6_hdr *ip6;
3132	int  ehdrlen, ip_hlen = 0;
3133	u16	etype;
3134	u8	ipproto = 0;
3135	bool	offload = TRUE;
3136	u16 vtag = 0;
3137
3138	int ctxd = txr->next_avail_desc;
3139	tx_buffer = &txr->tx_buffers[ctxd];
3140	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3141
3142	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3143		offload = FALSE; /* Only here to handle VLANs */
3144	/*
3145	** In advanced descriptors the vlan tag must
3146	** be placed into the descriptor itself.
3147	*/
3148	if (mp->m_flags & M_VLANTAG) {
3149		vtag = htole16(mp->m_pkthdr.ether_vtag);
3150		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3151	} else if (offload == FALSE)
3152		return FALSE;
3153	/*
3154	 * Determine where frame payload starts.
3155	 * Jump over vlan headers if already present,
3156	 * helpful for QinQ too.
3157	 */
3158	eh = mtod(mp, struct ether_vlan_header *);
3159	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3160		etype = ntohs(eh->evl_proto);
3161		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3162	} else {
3163		etype = ntohs(eh->evl_encap_proto);
3164		ehdrlen = ETHER_HDR_LEN;
3165	}
3166
3167	/* Set the ether header length */
3168	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3169
3170	switch (etype) {
3171		case ETHERTYPE_IP:
3172			ip = (struct ip *)(mp->m_data + ehdrlen);
3173			ip_hlen = ip->ip_hl << 2;
3174			if (mp->m_len < ehdrlen + ip_hlen) {
3175				offload = FALSE;
3176				break;
3177			}
3178			ipproto = ip->ip_p;
3179			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3180			break;
3181		case ETHERTYPE_IPV6:
3182			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3183			ip_hlen = sizeof(struct ip6_hdr);
3184			if (mp->m_len < ehdrlen + ip_hlen)
3185				return FALSE; /* failure */
3186			ipproto = ip6->ip6_nxt;
3187			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3188			break;
3189#ifdef IGB_TIMESYNC
3190		case ETHERTYPE_IEEE1588:
3191			offload = IGB_TIMESTAMP;
3192			break;
3193#endif
3194		default:
3195			offload = FALSE;
3196			break;
3197	}
3198
3199	vlan_macip_lens |= ip_hlen;
3200	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3201
3202	switch (ipproto) {
3203		case IPPROTO_TCP:
3204			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3205				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3206			break;
3207		case IPPROTO_UDP:
3208		{
3209#ifdef IGB_TIMESYNC
3210			void *hdr = (caddr_t) ip + ip_hlen;
3211			struct udphdr *uh = (struct udphdr *)hdr;
3212
3213			if (uh->uh_dport == htons(TSYNC_PORT))
3214				offload = IGB_TIMESTAMP;
3215#endif
3216			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3217				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3218			break;
3219		}
3220		default:
3221			offload = FALSE;
3222			break;
3223	}
3224
3225	/* Now copy bits into descriptor */
3226	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3227	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3228	TXD->seqnum_seed = htole32(0);
3229	TXD->mss_l4len_idx = htole32(0);
3230
3231	tx_buffer->m_head = NULL;
3232	tx_buffer->next_eop = -1;
3233
3234	/* We've consumed the first desc, adjust counters */
3235	if (++ctxd == adapter->num_tx_desc)
3236		ctxd = 0;
3237	txr->next_avail_desc = ctxd;
3238	--txr->tx_avail;
3239
3240        return (offload);
3241}
3242
3243
3244/**********************************************************************
3245 *
3246 *  Examine each tx_buffer in the used queue. If the hardware is done
3247 *  processing the packet then free associated resources. The
3248 *  tx_buffer is put back on the free queue.
3249 *
3250 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3251 **********************************************************************/
3252static bool
3253igb_txeof(struct tx_ring *txr)
3254{
3255	struct adapter	*adapter = txr->adapter;
3256        int first, last, done, num_avail;
3257        struct igb_buffer *tx_buffer;
3258        struct e1000_tx_desc   *tx_desc, *eop_desc;
3259	struct ifnet   *ifp = adapter->ifp;
3260
3261	IGB_TX_LOCK_ASSERT(txr);
3262
3263        if (txr->tx_avail == adapter->num_tx_desc)
3264                return FALSE;
3265
3266        num_avail = txr->tx_avail;
3267        first = txr->next_to_clean;
3268        tx_desc = &txr->tx_base[first];
3269        tx_buffer = &txr->tx_buffers[first];
3270	last = tx_buffer->next_eop;
3271        eop_desc = &txr->tx_base[last];
3272
3273	/*
3274	 * What this does is get the index of the
3275	 * first descriptor AFTER the EOP of the
3276	 * first packet, that way we can do the
3277	 * simple comparison on the inner while loop.
3278	 */
3279	if (++last == adapter->num_tx_desc)
3280 		last = 0;
3281	done = last;
3282
3283        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3284            BUS_DMASYNC_POSTREAD);
3285
3286        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3287		/* We clean the range of the packet */
3288		while (first != done) {
3289                	tx_desc->upper.data = 0;
3290                	tx_desc->lower.data = 0;
3291                	tx_desc->buffer_addr = 0;
3292                	num_avail++;
3293
3294			if (tx_buffer->m_head) {
3295				ifp->if_opackets++;
3296				bus_dmamap_sync(txr->txtag,
3297				    tx_buffer->map,
3298				    BUS_DMASYNC_POSTWRITE);
3299				bus_dmamap_unload(txr->txtag,
3300				    tx_buffer->map);
3301
3302                        	m_freem(tx_buffer->m_head);
3303                        	tx_buffer->m_head = NULL;
3304                	}
3305			tx_buffer->next_eop = -1;
3306
3307	                if (++first == adapter->num_tx_desc)
3308				first = 0;
3309
3310	                tx_buffer = &txr->tx_buffers[first];
3311			tx_desc = &txr->tx_base[first];
3312		}
3313		/* See if we can continue to the next packet */
3314		last = tx_buffer->next_eop;
3315		if (last != -1) {
3316        		eop_desc = &txr->tx_base[last];
3317			/* Get new done point */
3318			if (++last == adapter->num_tx_desc) last = 0;
3319			done = last;
3320		} else
3321			break;
3322        }
3323        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3324            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3325
3326        txr->next_to_clean = first;
3327
3328        /*
3329         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3330         * that it is OK to send packets.
3331         * If there are no pending descriptors, clear the timeout. Otherwise,
3332         * if some descriptors have been freed, restart the timeout.
3333         */
3334        if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3335                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3336		/* All clean, turn off the timer */
3337                if (num_avail == adapter->num_tx_desc) {
3338			txr->watchdog_timer = 0;
3339        		txr->tx_avail = num_avail;
3340			return FALSE;
3341		}
3342		/* Some cleaned, reset the timer */
3343                else if (num_avail != txr->tx_avail)
3344			txr->watchdog_timer = IGB_TX_TIMEOUT;
3345        }
3346        txr->tx_avail = num_avail;
3347        return TRUE;
3348}
3349
3350
3351/*********************************************************************
3352 *
3353 *  Get a buffer from system mbuf buffer pool.
3354 *
3355 **********************************************************************/
3356static int
3357igb_get_buf(struct rx_ring *rxr, int i)
3358{
3359	struct adapter		*adapter = rxr->adapter;
3360	struct mbuf		*m;
3361	bus_dma_segment_t	segs[1];
3362	bus_dmamap_t		map;
3363	struct igb_buffer	*rx_buffer;
3364	int			error, nsegs;
3365
3366	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3367	if (m == NULL) {
3368		adapter->mbuf_cluster_failed++;
3369		return (ENOBUFS);
3370	}
3371	m->m_len = m->m_pkthdr.len = MCLBYTES;
3372
3373	if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3374		m_adj(m, ETHER_ALIGN);
3375
3376	/*
3377	 * Using memory from the mbuf cluster pool, invoke the
3378	 * bus_dma machinery to arrange the memory mapping.
3379	 */
3380	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3381	    rxr->rx_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT);
3382	if (error != 0) {
3383		m_free(m);
3384		return (error);
3385	}
3386
3387	/* If nsegs is wrong then the stack is corrupt. */
3388	KASSERT(nsegs == 1, ("Too many segments returned!"));
3389
3390	rx_buffer = &rxr->rx_buffers[i];
3391	if (rx_buffer->m_head != NULL)
3392		bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3393
3394	map = rx_buffer->map;
3395	rx_buffer->map = rxr->rx_spare_map;
3396	rxr->rx_spare_map = map;
3397	bus_dmamap_sync(rxr->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3398	rx_buffer->m_head = m;
3399
3400	rxr->rx_base[i].read.pkt_addr = htole64(segs[0].ds_addr);
3401	return (0);
3402}
3403
3404
3405/*********************************************************************
3406 *
3407 *  Allocate memory for rx_buffer structures. Since we use one
3408 *  rx_buffer per received packet, the maximum number of rx_buffer's
3409 *  that we'll need is equal to the number of receive descriptors
3410 *  that we've allocated.
3411 *
3412 **********************************************************************/
3413static int
3414igb_allocate_receive_buffers(struct rx_ring *rxr)
3415{
3416	struct	adapter 	*adapter = rxr->adapter;
3417	device_t 		dev = adapter->dev;
3418	struct igb_buffer 	*rxbuf;
3419	int             	i, bsize, error;
3420
3421	bsize = sizeof(struct igb_buffer) * adapter->num_rx_desc;
3422	if (!(rxr->rx_buffers =
3423	    (struct igb_buffer *) malloc(bsize,
3424	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3425		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3426		error = ENOMEM;
3427		goto fail;
3428	}
3429
3430	if ((error = bus_dma_tag_create(NULL,		/* parent */
3431				   PAGE_SIZE, 0,	/* alignment, bounds */
3432				   BUS_SPACE_MAXADDR,	/* lowaddr */
3433				   BUS_SPACE_MAXADDR,	/* highaddr */
3434				   NULL, NULL,		/* filter, filterarg */
3435				   MCLBYTES,		/* maxsize */
3436				   1,			/* nsegments */
3437				   MCLBYTES,		/* maxsegsize */
3438				   0,			/* flags */
3439				   NULL,		/* lockfunc */
3440				   NULL,		/* lockfuncarg */
3441				   &rxr->rxtag))) {
3442		device_printf(dev, "Unable to create RX Small DMA tag\n");
3443		goto fail;
3444	}
3445
3446	/* Create the spare map (used by getbuf) */
3447        error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3448	     &rxr->rx_spare_map);
3449	if (error) {
3450		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3451		    __func__, error);
3452		goto fail;
3453	}
3454
3455	for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3456		rxbuf = &rxr->rx_buffers[i];
3457		error = bus_dmamap_create(rxr->rxtag,
3458		    BUS_DMA_NOWAIT, &rxbuf->map);
3459		if (error) {
3460			device_printf(dev, "Unable to create Small RX DMA map\n");
3461			goto fail;
3462		}
3463	}
3464
3465	return (0);
3466
3467fail:
3468	/* Frees all, but can handle partial completion */
3469	igb_free_receive_structures(adapter);
3470	return (error);
3471}
3472
3473/*********************************************************************
3474 *
3475 *  Initialize a receive ring and its buffers.
3476 *
3477 **********************************************************************/
3478static int
3479igb_setup_receive_ring(struct rx_ring *rxr)
3480{
3481	struct	adapter		*adapter;
3482	device_t		dev;
3483	struct igb_buffer	*rxbuf;
3484	struct lro_ctrl		*lro = &rxr->lro;
3485	int j, rsize;
3486
3487	adapter = rxr->adapter;
3488	dev = adapter->dev;
3489	rsize = roundup2(adapter->num_rx_desc *
3490	    sizeof(union e1000_adv_rx_desc), 4096);
3491	/* Clear the ring contents */
3492	bzero((void *)rxr->rx_base, rsize);
3493
3494	/*
3495	** Free current RX buffers: the size buffer
3496	** that is loaded is indicated by the buffer
3497	** bigbuf value.
3498	*/
3499	for (int i = 0; i < adapter->num_rx_desc; i++) {
3500		rxbuf = &rxr->rx_buffers[i];
3501		if (rxbuf->m_head != NULL) {
3502			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3503			    BUS_DMASYNC_POSTREAD);
3504			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3505			m_freem(rxbuf->m_head);
3506			rxbuf->m_head = NULL;
3507		}
3508	}
3509
3510	for (j = 0; j < adapter->num_rx_desc; j++) {
3511		if (igb_get_buf(rxr, j) == ENOBUFS) {
3512			rxr->rx_buffers[j].m_head = NULL;
3513			rxr->rx_base[j].read.pkt_addr = 0;
3514			goto fail;
3515		}
3516	}
3517
3518	/* Setup our descriptor indices */
3519	rxr->next_to_check = 0;
3520	rxr->last_cleaned = 0;
3521
3522	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3523	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3524
3525        /* Now set up the LRO interface */
3526	if (igb_enable_lro) {
3527		int err = tcp_lro_init(lro);
3528		if (err) {
3529			device_printf(dev,"LRO Initialization failed!\n");
3530			goto fail;
3531		}
3532		device_printf(dev,"RX LRO Initialized\n");
3533		lro->ifp = adapter->ifp;
3534	}
3535
3536	return (0);
3537fail:
3538	/*
3539	 * We need to clean up any buffers allocated so far
3540	 * 'j' is the failing index, decrement it to get the
3541	 * last success.
3542	 */
3543	for (--j; j < 0; j--) {
3544		rxbuf = &rxr->rx_buffers[j];
3545		if (rxbuf->m_head != NULL) {
3546			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3547			    BUS_DMASYNC_POSTREAD);
3548			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3549			m_freem(rxbuf->m_head);
3550			rxbuf->m_head = NULL;
3551		}
3552	}
3553	return (ENOBUFS);
3554}
3555
3556/*********************************************************************
3557 *
3558 *  Initialize all receive rings.
3559 *
3560 **********************************************************************/
3561static int
3562igb_setup_receive_structures(struct adapter *adapter)
3563{
3564	struct rx_ring *rxr = adapter->rx_rings;
3565	int i, j;
3566
3567	for (i = 0; i < adapter->num_rx_queues; i++, rxr++)
3568		if (igb_setup_receive_ring(rxr))
3569			goto fail;
3570
3571	return (0);
3572fail:
3573	/*
3574	 * Free RX buffers allocated so far, we will only handle
3575	 * the rings that completed, the failing case will have
3576	 * cleaned up for itself. The value of 'i' will be the
3577	 * failed ring so we must pre-decrement it.
3578	 */
3579	rxr = adapter->rx_rings;
3580	for (--i; i > 0; i--, rxr++) {
3581		for (j = 0; j < adapter->num_rx_desc; j++) {
3582			struct igb_buffer *rxbuf;
3583			rxbuf = &rxr->rx_buffers[j];
3584			if (rxbuf->m_head != NULL) {
3585				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3586			  	  BUS_DMASYNC_POSTREAD);
3587				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3588				m_freem(rxbuf->m_head);
3589				rxbuf->m_head = NULL;
3590			}
3591		}
3592	}
3593
3594	return (ENOBUFS);
3595}
3596
3597/*********************************************************************
3598 *
3599 *  Enable receive unit.
3600 *
3601 **********************************************************************/
3602static void
3603igb_initialize_receive_units(struct adapter *adapter)
3604{
3605	struct rx_ring	*rxr = adapter->rx_rings;
3606	struct ifnet	*ifp = adapter->ifp;
3607	u32		rctl, rxcsum, psize;
3608
3609	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3610
3611	/*
3612	 * Make sure receives are disabled while setting
3613	 * up the descriptor ring
3614	 */
3615	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3616	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3617
3618	E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3619	    adapter->rx_abs_int_delay.value);
3620
3621	/* Setup the Base and Length of the Rx Descriptor Rings */
3622	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3623		u64 bus_addr = rxr->rxdma.dma_paddr;
3624		u32 rxdctl, srrctl;
3625
3626		E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3627		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3628		E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3629		    (uint32_t)(bus_addr >> 32));
3630		E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3631		    (uint32_t)bus_addr);
3632		/* Use Advanced Descriptor type */
3633		srrctl = E1000_READ_REG(&adapter->hw, E1000_SRRCTL(i));
3634		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3635		E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3636		/* Enable this Queue */
3637		rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3638		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3639		rxdctl &= 0xFFF00000;
3640		rxdctl |= IGB_RX_PTHRESH;
3641		rxdctl |= IGB_RX_HTHRESH << 8;
3642		rxdctl |= IGB_RX_WTHRESH << 16;
3643		E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3644	}
3645
3646	/*
3647	** Setup for RX MultiQueue
3648	*/
3649	if (adapter->num_rx_queues >1) {
3650		u32 random[10], mrqc, shift = 0;
3651		union igb_reta {
3652			u32 dword;
3653			u8  bytes[4];
3654		} reta;
3655
3656		arc4rand(&random, sizeof(random), 0);
3657		if (adapter->hw.mac.type == e1000_82575)
3658			shift = 6;
3659		/* Warning FM follows */
3660		for (int i = 0; i < 128; i++) {
3661			reta.bytes[i & 3] =
3662			    (i % adapter->num_rx_queues) << shift;
3663			if ((i & 3) == 3)
3664				E1000_WRITE_REG(&adapter->hw,
3665				    E1000_RETA(i & ~3), reta.dword);
3666		}
3667		/* Now fill in hash table */
3668		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3669		for (int i = 0; i < 10; i++)
3670			E1000_WRITE_REG_ARRAY(&adapter->hw,
3671			    E1000_RSSRK(0), i, random[i]);
3672
3673		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3674		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3675		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3676		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3677		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3678		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3679		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3680		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3681
3682		E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3683
3684		/*
3685		** NOTE: Receive Full-Packet Checksum Offload
3686		** is mutually exclusive with Multiqueue. However
3687		** this is not the same as TCP/IP checksums which
3688		** still work.
3689		*/
3690		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3691		rxcsum |= E1000_RXCSUM_PCSD;
3692		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3693	} else if (ifp->if_capenable & IFCAP_RXCSUM) {
3694		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3695		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3696		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3697	}
3698
3699	/* Setup the Receive Control Register */
3700	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3701	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3702		   E1000_RCTL_RDMTS_HALF |
3703		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3704
3705	/* Make sure VLAN Filters are off */
3706	rctl &= ~E1000_RCTL_VFE;
3707
3708	rctl &= ~E1000_RCTL_SBP;
3709
3710	switch (adapter->rx_buffer_len) {
3711	default:
3712	case 2048:
3713		rctl |= E1000_RCTL_SZ_2048;
3714		break;
3715	case 4096:
3716		rctl |= E1000_RCTL_SZ_4096 |
3717		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3718		break;
3719	case 8192:
3720		rctl |= E1000_RCTL_SZ_8192 |
3721		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3722		break;
3723	case 16384:
3724		rctl |= E1000_RCTL_SZ_16384 |
3725		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3726		break;
3727	}
3728
3729	if (ifp->if_mtu > ETHERMTU) {
3730		/* Set maximum packet len */
3731		psize = adapter->max_frame_size;
3732		/* are we on a vlan? */
3733		if (adapter->ifp->if_vlantrunk != NULL)
3734			psize += VLAN_TAG_SIZE;
3735		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3736		rctl |= E1000_RCTL_LPE;
3737	} else
3738		rctl &= ~E1000_RCTL_LPE;
3739
3740	/* Enable Receives */
3741	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3742
3743	/*
3744	 * Setup the HW Rx Head and Tail Descriptor Pointers
3745	 *   - needs to be after enable
3746	 */
3747	for (int i = 0; i < adapter->num_rx_queues; i++) {
3748		E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3749		E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3750		     adapter->num_rx_desc - 1);
3751	}
3752	return;
3753}
3754
3755/*********************************************************************
3756 *
3757 *  Free receive rings.
3758 *
3759 **********************************************************************/
3760static void
3761igb_free_receive_structures(struct adapter *adapter)
3762{
3763	struct rx_ring *rxr = adapter->rx_rings;
3764
3765	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3766		struct lro_ctrl	*lro = &rxr->lro;
3767		igb_free_receive_buffers(rxr);
3768		tcp_lro_free(lro);
3769		igb_dma_free(adapter, &rxr->rxdma);
3770	}
3771
3772	free(adapter->rx_rings, M_DEVBUF);
3773}
3774
3775/*********************************************************************
3776 *
3777 *  Free receive ring data structures.
3778 *
3779 **********************************************************************/
3780static void
3781igb_free_receive_buffers(struct rx_ring *rxr)
3782{
3783	struct adapter	*adapter = rxr->adapter;
3784	struct igb_buffer *rx_buffer;
3785
3786	INIT_DEBUGOUT("free_receive_structures: begin");
3787
3788	if (rxr->rx_spare_map) {
3789		bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3790		rxr->rx_spare_map = NULL;
3791	}
3792
3793	/* Cleanup any existing buffers */
3794	if (rxr->rx_buffers != NULL) {
3795		rx_buffer = &rxr->rx_buffers[0];
3796		for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3797			if (rx_buffer->m_head != NULL) {
3798				bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3799				    BUS_DMASYNC_POSTREAD);
3800				bus_dmamap_unload(rxr->rxtag,
3801				    rx_buffer->map);
3802				m_freem(rx_buffer->m_head);
3803				rx_buffer->m_head = NULL;
3804			} else if (rx_buffer->map != NULL)
3805				bus_dmamap_unload(rxr->rxtag,
3806				    rx_buffer->map);
3807			if (rx_buffer->map != NULL) {
3808				bus_dmamap_destroy(rxr->rxtag,
3809				    rx_buffer->map);
3810				rx_buffer->map = NULL;
3811			}
3812		}
3813	}
3814
3815	if (rxr->rx_buffers != NULL) {
3816		free(rxr->rx_buffers, M_DEVBUF);
3817		rxr->rx_buffers = NULL;
3818	}
3819
3820	if (rxr->rxtag != NULL) {
3821		bus_dma_tag_destroy(rxr->rxtag);
3822		rxr->rxtag = NULL;
3823	}
3824}
3825/*********************************************************************
3826 *
3827 *  This routine executes in interrupt context. It replenishes
3828 *  the mbufs in the descriptor and sends data which has been
3829 *  dma'ed into host memory to upper layer.
3830 *
3831 *  We loop at most count times if count is > 0, or until done if
3832 *  count < 0.
3833 *
3834 *  Return TRUE if all clean, FALSE otherwise
3835 *********************************************************************/
3836static bool
3837igb_rxeof(struct rx_ring *rxr, int count)
3838{
3839	struct adapter		*adapter = rxr->adapter;
3840	struct ifnet		*ifp;
3841	struct lro_ctrl		*lro = &rxr->lro;
3842	struct lro_entry	*queued;
3843	struct mbuf		*mp;
3844	uint8_t			accept_frame = 0;
3845	uint8_t			eop = 0;
3846	uint16_t 		len, desc_len, prev_len_adj;
3847	int			i;
3848	u32			staterr;
3849	union e1000_adv_rx_desc	*cur;
3850
3851	IGB_RX_LOCK(rxr);
3852	ifp = adapter->ifp;
3853	i = rxr->next_to_check;
3854	cur = &rxr->rx_base[i];
3855	staterr = cur->wb.upper.status_error;
3856
3857	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3858	    BUS_DMASYNC_POSTREAD);
3859
3860	if (!(staterr & E1000_RXD_STAT_DD)) {
3861		IGB_RX_UNLOCK(rxr);
3862		return FALSE;
3863	}
3864
3865	while ((staterr & E1000_RXD_STAT_DD) &&
3866	    (count != 0) &&
3867	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3868		struct mbuf *m = NULL;
3869
3870		mp = rxr->rx_buffers[i].m_head;
3871		/*
3872		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3873		 * needs to access the last received byte in the mbuf.
3874		 */
3875		bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
3876		    BUS_DMASYNC_POSTREAD);
3877
3878		accept_frame = 1;
3879		prev_len_adj = 0;
3880		desc_len = le16toh(cur->wb.upper.length);
3881		if (staterr & E1000_RXD_STAT_EOP) {
3882			count--;
3883			eop = 1;
3884			if (desc_len < ETHER_CRC_LEN) {
3885				len = 0;
3886				prev_len_adj = ETHER_CRC_LEN - desc_len;
3887			} else
3888				len = desc_len - ETHER_CRC_LEN;
3889		} else {
3890			eop = 0;
3891			len = desc_len;
3892		}
3893
3894		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
3895			u32	pkt_len = desc_len;
3896
3897			if (rxr->fmp != NULL)
3898				pkt_len += rxr->fmp->m_pkthdr.len;
3899
3900			accept_frame = 0;
3901		}
3902
3903		if (accept_frame) {
3904			if (igb_get_buf(rxr, i) != 0) {
3905				ifp->if_iqdrops++;
3906				goto discard;
3907			}
3908
3909			/* Assign correct length to the current fragment */
3910			mp->m_len = len;
3911
3912			if (rxr->fmp == NULL) {
3913				mp->m_pkthdr.len = len;
3914				rxr->fmp = mp; /* Store the first mbuf */
3915				rxr->lmp = mp;
3916			} else {
3917				/* Chain mbuf's together */
3918				mp->m_flags &= ~M_PKTHDR;
3919				/*
3920				 * Adjust length of previous mbuf in chain if
3921				 * we received less than 4 bytes in the last
3922				 * descriptor.
3923				 */
3924				if (prev_len_adj > 0) {
3925					rxr->lmp->m_len -= prev_len_adj;
3926					rxr->fmp->m_pkthdr.len -=
3927					    prev_len_adj;
3928				}
3929				rxr->lmp->m_next = mp;
3930				rxr->lmp = rxr->lmp->m_next;
3931				rxr->fmp->m_pkthdr.len += len;
3932			}
3933
3934			if (eop) {
3935				rxr->fmp->m_pkthdr.rcvif = ifp;
3936				ifp->if_ipackets++;
3937				rxr->rx_packets++;
3938				rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
3939
3940				igb_rx_checksum(staterr, rxr->fmp);
3941#ifndef __NO_STRICT_ALIGNMENT
3942				if (adapter->max_frame_size >
3943				    (MCLBYTES - ETHER_ALIGN) &&
3944				    igb_fixup_rx(rxr) != 0)
3945					goto skip;
3946#endif
3947				if (staterr & E1000_RXD_STAT_VP) {
3948					rxr->fmp->m_pkthdr.ether_vtag =
3949					    le16toh(cur->wb.upper.vlan);
3950					rxr->fmp->m_flags |= M_VLANTAG;
3951				}
3952#ifndef __NO_STRICT_ALIGNMENT
3953skip:
3954#endif
3955				m = rxr->fmp;
3956				rxr->fmp = NULL;
3957				rxr->lmp = NULL;
3958			}
3959		} else {
3960			ifp->if_ierrors++;
3961discard:
3962			/* Reuse loaded DMA map and just update mbuf chain */
3963			mp = rxr->rx_buffers[i].m_head;
3964			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3965			mp->m_data = mp->m_ext.ext_buf;
3966			mp->m_next = NULL;
3967			if (adapter->max_frame_size <=
3968			    (MCLBYTES - ETHER_ALIGN))
3969				m_adj(mp, ETHER_ALIGN);
3970			if (rxr->fmp != NULL) {
3971				m_freem(rxr->fmp);
3972				rxr->fmp = NULL;
3973				rxr->lmp = NULL;
3974			}
3975			m = NULL;
3976		}
3977
3978		/* Zero out the receive descriptors status. */
3979		cur->wb.upper.status_error = 0;
3980		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3981		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3982
3983		rxr->last_cleaned = i; /* For updating tail */
3984
3985		/* Advance our pointers to the next descriptor. */
3986		if (++i == adapter->num_rx_desc)
3987			i = 0;
3988
3989		if (m != NULL) {
3990			rxr->next_to_check = i;
3991			/* Use LRO if possible */
3992			if ((!lro->lro_cnt) || (tcp_lro_rx(lro, m, 0))) {
3993				/* Pass up to the stack */
3994				IGB_RX_UNLOCK(rxr);
3995				(*ifp->if_input)(ifp, m);
3996				IGB_RX_LOCK(rxr);
3997				i = rxr->next_to_check;
3998			}
3999		}
4000		/* Get the next descriptor */
4001		cur = &rxr->rx_base[i];
4002		staterr = cur->wb.upper.status_error;
4003	}
4004	rxr->next_to_check = i;
4005
4006	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4007	E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4008	IGB_RX_UNLOCK(rxr);
4009
4010	/*
4011	** Flush any outstanding LRO work
4012	** this may call into the stack and
4013	** must not hold a driver lock.
4014	*/
4015	while(!SLIST_EMPTY(&lro->lro_active)) {
4016		queued = SLIST_FIRST(&lro->lro_active);
4017		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4018		tcp_lro_flush(lro, queued);
4019	}
4020
4021	if (!((staterr) & E1000_RXD_STAT_DD))
4022		return FALSE;
4023
4024	return TRUE;
4025}
4026
4027#ifndef __NO_STRICT_ALIGNMENT
4028/*
4029 * When jumbo frames are enabled we should realign entire payload on
4030 * architecures with strict alignment. This is serious design mistake of 8254x
4031 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
4032 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
4033 * payload. On architecures without strict alignment restrictions 8254x still
4034 * performs unaligned memory access which would reduce the performance too.
4035 * To avoid copying over an entire frame to align, we allocate a new mbuf and
4036 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
4037 * existing mbuf chain.
4038 *
4039 * Be aware, best performance of the 8254x is achived only when jumbo frame is
4040 * not used at all on architectures with strict alignment.
4041 */
4042static int
4043igb_fixup_rx(struct rx_ring *rxr)
4044{
4045	struct adapter *adapter = rxr->adapter;
4046	struct mbuf *m, *n;
4047	int error;
4048
4049	error = 0;
4050	m = rxr->fmp;
4051	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4052		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4053		m->m_data += ETHER_HDR_LEN;
4054	} else {
4055		MGETHDR(n, M_DONTWAIT, MT_DATA);
4056		if (n != NULL) {
4057			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4058			m->m_data += ETHER_HDR_LEN;
4059			m->m_len -= ETHER_HDR_LEN;
4060			n->m_len = ETHER_HDR_LEN;
4061			M_MOVE_PKTHDR(n, m);
4062			n->m_next = m;
4063			rxr->fmp = n;
4064		} else {
4065			adapter->dropped_pkts++;
4066			m_freem(rxr->fmp);
4067			rxr->fmp = NULL;
4068			error = ENOMEM;
4069		}
4070	}
4071
4072	return (error);
4073}
4074#endif
4075
4076/*********************************************************************
4077 *
4078 *  Verify that the hardware indicated that the checksum is valid.
4079 *  Inform the stack about the status of checksum so that stack
4080 *  doesn't spend time verifying the checksum.
4081 *
4082 *********************************************************************/
4083static void
4084igb_rx_checksum(u32 staterr, struct mbuf *mp)
4085{
4086	u16 status = (u16)staterr;
4087	u8  errors = (u8) (staterr >> 24);
4088
4089	/* Ignore Checksum bit is set */
4090	if (status & E1000_RXD_STAT_IXSM) {
4091		mp->m_pkthdr.csum_flags = 0;
4092		return;
4093	}
4094
4095	if (status & E1000_RXD_STAT_IPCS) {
4096		/* Did it pass? */
4097		if (!(errors & E1000_RXD_ERR_IPE)) {
4098			/* IP Checksum Good */
4099			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4100			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4101
4102		} else
4103			mp->m_pkthdr.csum_flags = 0;
4104	}
4105
4106	if (status & E1000_RXD_STAT_TCPCS) {
4107		/* Did it pass? */
4108		if (!(errors & E1000_RXD_ERR_TCPE)) {
4109			mp->m_pkthdr.csum_flags |=
4110			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4111			mp->m_pkthdr.csum_data = htons(0xffff);
4112		}
4113	}
4114	return;
4115}
4116
4117#ifdef IGB_HW_VLAN_SUPPORT
4118/*
4119 * This routine is run via an vlan
4120 * config EVENT
4121 */
4122static void
4123igb_register_vlan(void *unused, struct ifnet *ifp, u16 vtag)
4124{
4125	struct adapter	*adapter = ifp->if_softc;
4126	u32		ctrl, rctl, index, vfta;
4127
4128	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4129	ctrl |= E1000_CTRL_VME;
4130	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4131
4132	/* Setup for Hardware Filter */
4133	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4134	rctl |= E1000_RCTL_VFE;
4135	rctl &= ~E1000_RCTL_CFIEN;
4136	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4137
4138	/* Make entry in the hardware filter table */
4139	index = ((vtag >> 5) & 0x7F);
4140	vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index);
4141	vfta |= (1 << (vtag & 0x1F));
4142	E1000_WRITE_REG_ARRAY(&adapter->hw, E1000_VFTA, index, vfta);
4143
4144	/* Update the frame size */
4145	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4146	    adapter->max_frame_size + VLAN_TAG_SIZE);
4147
4148}
4149
4150/*
4151 * This routine is run via an vlan
4152 * unconfig EVENT
4153 */
4154static void
4155igb_unregister_vlan(void *unused, struct ifnet *ifp, u16 vtag)
4156{
4157	struct adapter	*adapter = ifp->if_softc;
4158	u32		index, vfta;
4159
4160	/* Remove entry in the hardware filter table */
4161	index = ((vtag >> 5) & 0x7F);
4162	vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index);
4163	vfta &= ~(1 << (vtag & 0x1F));
4164	E1000_WRITE_REG_ARRAY(&adapter->hw, E1000_VFTA, index, vfta);
4165	/* Have all vlans unregistered? */
4166	if (adapter->ifp->if_vlantrunk == NULL) {
4167		u32 rctl;
4168		/* Turn off the filter table */
4169		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4170		rctl &= ~E1000_RCTL_VFE;
4171		rctl |= E1000_RCTL_CFIEN;
4172		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4173		/* Reset the frame size */
4174		E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4175		    adapter->max_frame_size);
4176	}
4177}
4178#endif /* IGB_HW_VLAN_SUPPORT */
4179
4180static void
4181igb_enable_intr(struct adapter *adapter)
4182{
4183	/* With RSS set up what to auto clear */
4184	if (adapter->msix_mem) {
4185		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4186		    adapter->eims_mask);
4187		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4188		    adapter->eims_mask);
4189		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4190		    adapter->eims_mask);
4191		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4192		    E1000_IMS_LSC);
4193	} else {
4194		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4195		    IMS_ENABLE_MASK);
4196	}
4197	E1000_WRITE_FLUSH(&adapter->hw);
4198
4199	return;
4200}
4201
4202static void
4203igb_disable_intr(struct adapter *adapter)
4204{
4205	if (adapter->msix_mem) {
4206		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4207		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4208	}
4209	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4210	E1000_WRITE_FLUSH(&adapter->hw);
4211	return;
4212}
4213
4214/*
4215 * Bit of a misnomer, what this really means is
4216 * to enable OS management of the system... aka
4217 * to disable special hardware management features
4218 */
4219static void
4220igb_init_manageability(struct adapter *adapter)
4221{
4222	/* A shared code workaround */
4223#define E1000_82542_MANC2H E1000_MANC2H
4224	if (adapter->has_manage) {
4225		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4226		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4227
4228		/* disable hardware interception of ARP */
4229		manc &= ~(E1000_MANC_ARP_EN);
4230
4231                /* enable receiving management packets to the host */
4232		manc |= E1000_MANC_EN_MNG2HOST;
4233#define E1000_MNG2HOST_PORT_623 (1 << 5)
4234#define E1000_MNG2HOST_PORT_664 (1 << 6)
4235		manc2h |= E1000_MNG2HOST_PORT_623;
4236		manc2h |= E1000_MNG2HOST_PORT_664;
4237		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4238
4239		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4240	}
4241}
4242
4243/*
4244 * Give control back to hardware management
4245 * controller if there is one.
4246 */
4247static void
4248igb_release_manageability(struct adapter *adapter)
4249{
4250	if (adapter->has_manage) {
4251		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4252
4253		/* re-enable hardware interception of ARP */
4254		manc |= E1000_MANC_ARP_EN;
4255		manc &= ~E1000_MANC_EN_MNG2HOST;
4256
4257		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4258	}
4259}
4260
4261/*
4262 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4263 * For ASF and Pass Through versions of f/w this means that
4264 * the driver is loaded.
4265 *
4266 */
4267static void
4268igb_get_hw_control(struct adapter *adapter)
4269{
4270	u32 ctrl_ext;
4271
4272	/* Let firmware know the driver has taken over */
4273	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4274	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4275	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4276}
4277
4278/*
4279 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4280 * For ASF and Pass Through versions of f/w this means that the
4281 * driver is no longer loaded.
4282 *
4283 */
4284static void
4285igb_release_hw_control(struct adapter *adapter)
4286{
4287	u32 ctrl_ext;
4288
4289	/* Let firmware taken over control of h/w */
4290	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4291	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4292	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4293}
4294
4295static int
4296igb_is_valid_ether_addr(uint8_t *addr)
4297{
4298	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4299
4300	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4301		return (FALSE);
4302	}
4303
4304	return (TRUE);
4305}
4306
4307
4308/*
4309 * Enable PCI Wake On Lan capability
4310 */
4311void
4312igb_enable_wakeup(device_t dev)
4313{
4314	u16     cap, status;
4315	u8      id;
4316
4317	/* First find the capabilities pointer*/
4318	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4319	/* Read the PM Capabilities */
4320	id = pci_read_config(dev, cap, 1);
4321	if (id != PCIY_PMG)     /* Something wrong */
4322		return;
4323	/* OK, we have the power capabilities, so
4324	   now get the status register */
4325	cap += PCIR_POWER_STATUS;
4326	status = pci_read_config(dev, cap, 2);
4327	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4328	pci_write_config(dev, cap, status, 2);
4329	return;
4330}
4331
4332
4333/**********************************************************************
4334 *
4335 *  Update the board statistics counters.
4336 *
4337 **********************************************************************/
4338static void
4339igb_update_stats_counters(struct adapter *adapter)
4340{
4341	struct ifnet   *ifp;
4342
4343	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4344	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4345		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4346		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4347	}
4348	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4349	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4350	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4351	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4352
4353	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4354	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4355	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4356	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4357	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4358	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4359	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4360	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4361	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4362	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4363	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4364	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4365	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4366	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4367	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4368	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4369	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4370	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4371	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4372	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4373
4374	/* For the 64-bit byte counters the low dword must be read first. */
4375	/* Both registers clear on the read of the high dword */
4376
4377	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4378	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4379
4380	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4381	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4382	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4383	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4384	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4385
4386	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4387	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4388
4389	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4390	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4391	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4392	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4393	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4394	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4395	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4396	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4397	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4398	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4399
4400	adapter->stats.algnerrc +=
4401		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4402	adapter->stats.rxerrc +=
4403		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4404	adapter->stats.tncrs +=
4405		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4406	adapter->stats.cexterr +=
4407		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4408	adapter->stats.tsctc +=
4409		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4410	adapter->stats.tsctfc +=
4411		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4412	ifp = adapter->ifp;
4413
4414	ifp->if_collisions = adapter->stats.colc;
4415
4416	/* Rx Errors */
4417	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4418	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4419	    adapter->stats.ruc + adapter->stats.roc +
4420	    adapter->stats.mpc + adapter->stats.cexterr;
4421
4422	/* Tx Errors */
4423	ifp->if_oerrors = adapter->stats.ecol +
4424	    adapter->stats.latecol + adapter->watchdog_events;
4425}
4426
4427
4428/**********************************************************************
4429 *
4430 *  This routine is called only when igb_display_debug_stats is enabled.
4431 *  This routine provides a way to take a look at important statistics
4432 *  maintained by the driver and hardware.
4433 *
4434 **********************************************************************/
4435static void
4436igb_print_debug_info(struct adapter *adapter)
4437{
4438	device_t dev = adapter->dev;
4439	struct rx_ring *rxr = adapter->rx_rings;
4440	struct tx_ring *txr = adapter->tx_rings;
4441	uint8_t *hw_addr = adapter->hw.hw_addr;
4442
4443	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4444	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4445	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4446	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4447
4448#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4449	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4450	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4451	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4452#endif
4453
4454	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4455	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4456	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4457	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4458	    adapter->hw.fc.high_water,
4459	    adapter->hw.fc.low_water);
4460	device_printf(dev, "tx_int_delay = %d, tx_abs_int_delay = %d\n",
4461	    E1000_READ_REG(&adapter->hw, E1000_TIDV),
4462	    E1000_READ_REG(&adapter->hw, E1000_TADV));
4463	device_printf(dev, "rx_int_delay = %d, rx_abs_int_delay = %d\n",
4464	    E1000_READ_REG(&adapter->hw, E1000_RDTR),
4465	    E1000_READ_REG(&adapter->hw, E1000_RADV));
4466
4467	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
4468		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4469		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4470		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4471		device_printf(dev, "no descriptors avail event = %lld\n",
4472		    (long long)txr->no_desc_avail);
4473		device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
4474		    (long long)txr->tx_irq);
4475		device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4476		    (long long)txr->tx_packets);
4477	}
4478
4479	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
4480		struct lro_ctrl *lro = &rxr->lro;
4481		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4482		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4483		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4484		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4485		    (long long)rxr->rx_packets);
4486		device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4487		    (long long)rxr->rx_bytes);
4488		device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
4489		    (long long)rxr->rx_irq);
4490		device_printf(dev,"RX(%d) LRO Queued= %d\n",
4491		    rxr->me, lro->lro_queued);
4492		device_printf(dev,"RX(%d) LRO Flushed= %d\n",
4493		    rxr->me, lro->lro_flushed);
4494	}
4495
4496	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4497
4498	device_printf(dev, "Std mbuf failed = %ld\n",
4499	    adapter->mbuf_alloc_failed);
4500	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4501	    adapter->mbuf_cluster_failed);
4502	device_printf(dev, "Driver dropped packets = %ld\n",
4503	    adapter->dropped_pkts);
4504	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4505		adapter->no_tx_dma_setup);
4506}
4507
4508static void
4509igb_print_hw_stats(struct adapter *adapter)
4510{
4511	device_t dev = adapter->dev;
4512
4513	device_printf(dev, "Excessive collisions = %lld\n",
4514	    (long long)adapter->stats.ecol);
4515#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4516	device_printf(dev, "Symbol errors = %lld\n",
4517	    (long long)adapter->stats.symerrs);
4518#endif
4519	device_printf(dev, "Sequence errors = %lld\n",
4520	    (long long)adapter->stats.sec);
4521	device_printf(dev, "Defer count = %lld\n",
4522	    (long long)adapter->stats.dc);
4523	device_printf(dev, "Missed Packets = %lld\n",
4524	    (long long)adapter->stats.mpc);
4525	device_printf(dev, "Receive No Buffers = %lld\n",
4526	    (long long)adapter->stats.rnbc);
4527	/* RLEC is inaccurate on some hardware, calculate our own. */
4528	device_printf(dev, "Receive Length Errors = %lld\n",
4529	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4530	device_printf(dev, "Receive errors = %lld\n",
4531	    (long long)adapter->stats.rxerrc);
4532	device_printf(dev, "Crc errors = %lld\n",
4533	    (long long)adapter->stats.crcerrs);
4534	device_printf(dev, "Alignment errors = %lld\n",
4535	    (long long)adapter->stats.algnerrc);
4536	/* On 82575 these are collision counts */
4537	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4538	    (long long)adapter->stats.cexterr);
4539	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4540	device_printf(dev, "watchdog timeouts = %ld\n",
4541	    adapter->watchdog_events);
4542	device_printf(dev, "XON Rcvd = %lld\n",
4543	    (long long)adapter->stats.xonrxc);
4544	device_printf(dev, "XON Xmtd = %lld\n",
4545	    (long long)adapter->stats.xontxc);
4546	device_printf(dev, "XOFF Rcvd = %lld\n",
4547	    (long long)adapter->stats.xoffrxc);
4548	device_printf(dev, "XOFF Xmtd = %lld\n",
4549	    (long long)adapter->stats.xofftxc);
4550	device_printf(dev, "Good Packets Rcvd = %lld\n",
4551	    (long long)adapter->stats.gprc);
4552	device_printf(dev, "Good Packets Xmtd = %lld\n",
4553	    (long long)adapter->stats.gptc);
4554	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4555	    (long long)adapter->stats.tsctc);
4556	device_printf(dev, "TSO Contexts Failed = %lld\n",
4557	    (long long)adapter->stats.tsctfc);
4558}
4559
4560/**********************************************************************
4561 *
4562 *  This routine provides a way to dump out the adapter eeprom,
4563 *  often a useful debug/service tool. This only dumps the first
4564 *  32 words, stuff that matters is in that extent.
4565 *
4566 **********************************************************************/
4567static void
4568igb_print_nvm_info(struct adapter *adapter)
4569{
4570	u16	eeprom_data;
4571	int	i, j, row = 0;
4572
4573	/* Its a bit crude, but it gets the job done */
4574	printf("\nInterface EEPROM Dump:\n");
4575	printf("Offset\n0x0000  ");
4576	for (i = 0, j = 0; i < 32; i++, j++) {
4577		if (j == 8) { /* Make the offset block */
4578			j = 0; ++row;
4579			printf("\n0x00%x0  ",row);
4580		}
4581		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4582		printf("%04x ", eeprom_data);
4583	}
4584	printf("\n");
4585}
4586
4587static int
4588igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4589{
4590	struct adapter *adapter;
4591	int error;
4592	int result;
4593
4594	result = -1;
4595	error = sysctl_handle_int(oidp, &result, 0, req);
4596
4597	if (error || !req->newptr)
4598		return (error);
4599
4600	if (result == 1) {
4601		adapter = (struct adapter *)arg1;
4602		igb_print_debug_info(adapter);
4603	}
4604	/*
4605	 * This value will cause a hex dump of the
4606	 * first 32 16-bit words of the EEPROM to
4607	 * the screen.
4608	 */
4609	if (result == 2) {
4610		adapter = (struct adapter *)arg1;
4611		igb_print_nvm_info(adapter);
4612        }
4613
4614	return (error);
4615}
4616
4617
4618static int
4619igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4620{
4621	struct adapter *adapter;
4622	int error;
4623	int result;
4624
4625	result = -1;
4626	error = sysctl_handle_int(oidp, &result, 0, req);
4627
4628	if (error || !req->newptr)
4629		return (error);
4630
4631	if (result == 1) {
4632		adapter = (struct adapter *)arg1;
4633		igb_print_hw_stats(adapter);
4634	}
4635
4636	return (error);
4637}
4638
4639static int
4640igb_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4641{
4642	struct igb_int_delay_info *info;
4643	struct adapter *adapter;
4644	uint32_t regval;
4645	int error;
4646	int usecs;
4647	int ticks;
4648
4649	info = (struct igb_int_delay_info *)arg1;
4650	usecs = info->value;
4651	error = sysctl_handle_int(oidp, &usecs, 0, req);
4652	if (error != 0 || req->newptr == NULL)
4653		return (error);
4654	if (usecs < 0 || usecs > IGB_TICKS_TO_USECS(65535))
4655		return (EINVAL);
4656	info->value = usecs;
4657	ticks = IGB_USECS_TO_TICKS(usecs);
4658
4659	adapter = info->adapter;
4660
4661	IGB_CORE_LOCK(adapter);
4662	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4663	regval = (regval & ~0xffff) | (ticks & 0xffff);
4664	/* Handle a few special cases. */
4665	switch (info->offset) {
4666	case E1000_RDTR:
4667		break;
4668	case E1000_TIDV:
4669		if (ticks == 0) {
4670			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4671			/* Don't write 0 into the TIDV register. */
4672			regval++;
4673		} else
4674			if (adapter->hw.mac.type < e1000_82575)
4675				adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4676		break;
4677	}
4678	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4679	IGB_CORE_UNLOCK(adapter);
4680	return (0);
4681}
4682
4683static void
4684igb_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4685	const char *description, struct igb_int_delay_info *info,
4686	int offset, int value)
4687{
4688	info->adapter = adapter;
4689	info->offset = offset;
4690	info->value = value;
4691	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4692	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4693	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4694	    info, 0, igb_sysctl_int_delay, "I", description);
4695}
4696
4697static void
4698igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4699	const char *description, int *limit, int value)
4700{
4701	*limit = value;
4702	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4703	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4704	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4705}
4706
4707#ifdef IGB_TIMESYNC
4708/*
4709 * Initialize the Time Sync Feature
4710 */
4711static int
4712igb_tsync_init(struct adapter *adapter)
4713{
4714	device_t	dev = adapter->dev;
4715	u32		tx_ctl, rx_ctl, val;
4716
4717
4718	E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
4719	    20833/PICOSECS_PER_TICK);
4720
4721	adapter->last_stamp =  E1000_READ_REG(&adapter->hw, E1000_SYSTIML);
4722	adapter->last_stamp |= (u64)E1000_READ_REG(&adapter->hw,
4723	    E1000_SYSTIMH) << 32ULL;
4724
4725	/* Enable the TX side */
4726	tx_ctl =  E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4727	tx_ctl |= 0x10;
4728	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCTXCTL, tx_ctl);
4729	E1000_WRITE_FLUSH(&adapter->hw);
4730
4731	tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4732	if ((tx_ctl & 0x10) == 0) {
4733     		device_printf(dev, "Failed to enable TX timestamping\n");
4734		return (ENXIO);
4735	}
4736
4737	/* Enable RX */
4738	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4739	rx_ctl |= 0x10; /* Enable the feature */
4740	rx_ctl |= 0x04; /* This value turns on Ver 1 and 2 */
4741	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCTL, rx_ctl);
4742
4743	/*
4744	 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7 (Ethertype)
4745	 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
4746	 * Ethertype Filter Queue Filter[0][31] = 0x1 (Enable Timestamping)
4747	 */
4748	E1000_WRITE_REG(&adapter->hw, E1000_ETQF(0), 0x440088f7);
4749	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCFG, 0x0);
4750
4751	/*
4752	 * Source Port Queue Filter Setup:
4753	 *  this is for UDP port filtering
4754	 */
4755	E1000_WRITE_REG(&adapter->hw, E1000_SPQF(0), TSYNC_PORT);
4756	/* Protocol = UDP, enable Timestamp, and filter on source/protocol */
4757	val = (0x11 | (1 << 27) | (6 << 28));
4758	E1000_WRITE_REG(&adapter->hw, E1000_FTQF(0), val);
4759
4760	E1000_WRITE_FLUSH(&adapter->hw);
4761
4762	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4763	if ((rx_ctl & 0x10) == 0) {
4764     		device_printf(dev, "Failed to enable RX timestamping\n");
4765		return (ENXIO);
4766	}
4767
4768	device_printf(dev, "IEEE 1588 Precision Time Protocol enabled\n");
4769
4770	return (0);
4771}
4772
4773/*
4774 * Disable the Time Sync Feature
4775 */
4776static void
4777igb_tsync_disable(struct adapter *adapter)
4778{
4779	u32		tx_ctl, rx_ctl;
4780
4781	tx_ctl =  E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4782	tx_ctl &= ~0x10;
4783	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCTXCTL, tx_ctl);
4784	E1000_WRITE_FLUSH(&adapter->hw);
4785
4786	/* Invalidate TX Timestamp */
4787	E1000_READ_REG(&adapter->hw, E1000_TXSTMPH);
4788
4789	tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4790	if (tx_ctl & 0x10)
4791     		HW_DEBUGOUT("Failed to disable TX timestamping\n");
4792
4793	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4794	rx_ctl &= ~0x10;
4795
4796	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCTL, rx_ctl);
4797	E1000_WRITE_FLUSH(&adapter->hw);
4798
4799	/* Invalidate RX Timestamp */
4800	E1000_READ_REG(&adapter->hw, E1000_RXSATRH);
4801
4802	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4803	if (rx_ctl & 0x10)
4804		HW_DEBUGOUT("Failed to disable RX timestamping\n");
4805
4806	return;
4807}
4808
4809#endif /* IGB_TIMESYNC */
4810