if_igb.c revision 195049
1/******************************************************************************
2
3  Copyright (c) 2001-2009, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 195049 2009-06-26 11:45:06Z rwatson $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#if __FreeBSD_version >= 800000
43#include <sys/buf_ring.h>
44#endif
45#include <sys/bus.h>
46#include <sys/endian.h>
47#include <sys/kernel.h>
48#include <sys/kthread.h>
49#include <sys/malloc.h>
50#include <sys/mbuf.h>
51#include <sys/module.h>
52#include <sys/rman.h>
53#include <sys/socket.h>
54#include <sys/sockio.h>
55#include <sys/sysctl.h>
56#include <sys/taskqueue.h>
57#include <sys/eventhandler.h>
58#include <sys/pcpu.h>
59#include <sys/smp.h>
60#include <machine/smp.h>
61#include <machine/bus.h>
62#include <machine/resource.h>
63
64#ifdef IGB_IEEE1588
65#include <sys/ieee1588.h>
66#endif
67
68#include <net/bpf.h>
69#include <net/ethernet.h>
70#include <net/if.h>
71#include <net/if_arp.h>
72#include <net/if_dl.h>
73#include <net/if_media.h>
74
75#include <net/if_types.h>
76#include <net/if_vlan_var.h>
77
78#include <netinet/in_systm.h>
79#include <netinet/in.h>
80#include <netinet/if_ether.h>
81#include <netinet/ip.h>
82#include <netinet/ip6.h>
83#include <netinet/tcp.h>
84#include <netinet/tcp_lro.h>
85#include <netinet/udp.h>
86
87#include <machine/in_cksum.h>
88#include <dev/pci/pcivar.h>
89#include <dev/pci/pcireg.h>
90
91#include "e1000_api.h"
92#include "e1000_82575.h"
93#include "if_igb.h"
94
95/*********************************************************************
96 *  Set this to one to display debug statistics
97 *********************************************************************/
98int	igb_display_debug_stats = 0;
99
100/*********************************************************************
101 *  Driver version:
102 *********************************************************************/
103char igb_driver_version[] = "version - 1.7.3";
104
105
106/*********************************************************************
107 *  PCI Device ID Table
108 *
109 *  Used by probe to select devices to load on
110 *  Last field stores an index into e1000_strings
111 *  Last entry must be all 0s
112 *
113 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
114 *********************************************************************/
115
116static igb_vendor_info_t igb_vendor_info_array[] =
117{
118	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
120						PCI_ANY_ID, PCI_ANY_ID, 0},
121	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
122						PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
128						PCI_ANY_ID, PCI_ANY_ID, 0},
129	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
130						PCI_ANY_ID, PCI_ANY_ID, 0},
131	/* required last entry */
132	{ 0, 0, 0, 0, 0}
133};
134
135/*********************************************************************
136 *  Table of branding strings for all supported NICs.
137 *********************************************************************/
138
139static char *igb_strings[] = {
140	"Intel(R) PRO/1000 Network Connection"
141};
142
143/*********************************************************************
144 *  Function prototypes
145 *********************************************************************/
146static int	igb_probe(device_t);
147static int	igb_attach(device_t);
148static int	igb_detach(device_t);
149static int	igb_shutdown(device_t);
150static int	igb_suspend(device_t);
151static int	igb_resume(device_t);
152static void	igb_start(struct ifnet *);
153static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
154#if __FreeBSD_version >= 800000
155static int	igb_mq_start(struct ifnet *, struct mbuf *);
156static int	igb_mq_start_locked(struct ifnet *,
157		    struct tx_ring *, struct mbuf *);
158static void	igb_qflush(struct ifnet *);
159#endif
160static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
161static void	igb_watchdog(struct adapter *);
162static void	igb_init(void *);
163static void	igb_init_locked(struct adapter *);
164static void	igb_stop(void *);
165static void	igb_media_status(struct ifnet *, struct ifmediareq *);
166static int	igb_media_change(struct ifnet *);
167static void	igb_identify_hardware(struct adapter *);
168static int	igb_allocate_pci_resources(struct adapter *);
169static int	igb_allocate_msix(struct adapter *);
170static int	igb_allocate_legacy(struct adapter *);
171static int	igb_setup_msix(struct adapter *);
172static void	igb_free_pci_resources(struct adapter *);
173static void	igb_local_timer(void *);
174static int	igb_hardware_init(struct adapter *);
175static void	igb_setup_interface(device_t, struct adapter *);
176static int	igb_allocate_queues(struct adapter *);
177static void	igb_configure_queues(struct adapter *);
178
179static int	igb_allocate_transmit_buffers(struct tx_ring *);
180static void	igb_setup_transmit_structures(struct adapter *);
181static void	igb_setup_transmit_ring(struct tx_ring *);
182static void	igb_initialize_transmit_units(struct adapter *);
183static void	igb_free_transmit_structures(struct adapter *);
184static void	igb_free_transmit_buffers(struct tx_ring *);
185
186static int	igb_allocate_receive_buffers(struct rx_ring *);
187static int	igb_setup_receive_structures(struct adapter *);
188static int	igb_setup_receive_ring(struct rx_ring *);
189static void	igb_initialize_receive_units(struct adapter *);
190static void	igb_free_receive_structures(struct adapter *);
191static void	igb_free_receive_buffers(struct rx_ring *);
192
193static void	igb_enable_intr(struct adapter *);
194static void	igb_disable_intr(struct adapter *);
195static void	igb_update_stats_counters(struct adapter *);
196static bool	igb_txeof(struct tx_ring *);
197static bool	igb_rxeof(struct rx_ring *, int);
198static void	igb_rx_checksum(u32, struct mbuf *, bool);
199static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
200static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
201static void	igb_set_promisc(struct adapter *);
202static void	igb_disable_promisc(struct adapter *);
203static void	igb_set_multi(struct adapter *);
204static void	igb_print_hw_stats(struct adapter *);
205static void	igb_update_link_status(struct adapter *);
206static int	igb_get_buf(struct rx_ring *, int, u8);
207
208static void	igb_register_vlan(void *, struct ifnet *, u16);
209static void	igb_unregister_vlan(void *, struct ifnet *, u16);
210static void	igb_setup_vlan_hw_support(struct adapter *);
211
212static int	igb_xmit(struct tx_ring *, struct mbuf **);
213static int	igb_dma_malloc(struct adapter *, bus_size_t,
214		    struct igb_dma_alloc *, int);
215static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
216static void	igb_print_debug_info(struct adapter *);
217static void	igb_print_nvm_info(struct adapter *);
218static int 	igb_is_valid_ether_addr(u8 *);
219static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
220static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
221/* Management and WOL Support */
222static void	igb_init_manageability(struct adapter *);
223static void	igb_release_manageability(struct adapter *);
224static void     igb_get_hw_control(struct adapter *);
225static void     igb_release_hw_control(struct adapter *);
226static void     igb_enable_wakeup(device_t);
227
228static int	igb_irq_fast(void *);
229static void	igb_add_rx_process_limit(struct adapter *, const char *,
230		    const char *, int *, int);
231static void	igb_handle_rxtx(void *context, int pending);
232static void	igb_handle_tx(void *context, int pending);
233static void	igb_handle_rx(void *context, int pending);
234
235/* These are MSIX only irq handlers */
236static void	igb_msix_rx(void *);
237static void	igb_msix_tx(void *);
238static void	igb_msix_link(void *);
239
240/* Adaptive Interrupt Moderation */
241static void	igb_update_aim(struct rx_ring *);
242
243/*********************************************************************
244 *  FreeBSD Device Interface Entry Points
245 *********************************************************************/
246
247static device_method_t igb_methods[] = {
248	/* Device interface */
249	DEVMETHOD(device_probe, igb_probe),
250	DEVMETHOD(device_attach, igb_attach),
251	DEVMETHOD(device_detach, igb_detach),
252	DEVMETHOD(device_shutdown, igb_shutdown),
253	DEVMETHOD(device_suspend, igb_suspend),
254	DEVMETHOD(device_resume, igb_resume),
255	{0, 0}
256};
257
258static driver_t igb_driver = {
259	"igb", igb_methods, sizeof(struct adapter),
260};
261
262static devclass_t igb_devclass;
263DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
264MODULE_DEPEND(igb, pci, 1, 1, 1);
265MODULE_DEPEND(igb, ether, 1, 1, 1);
266
267/*********************************************************************
268 *  Tunable default values.
269 *********************************************************************/
270
271/* Descriptor defaults */
272static int igb_rxd = IGB_DEFAULT_RXD;
273static int igb_txd = IGB_DEFAULT_TXD;
274TUNABLE_INT("hw.igb.rxd", &igb_rxd);
275TUNABLE_INT("hw.igb.txd", &igb_txd);
276
277/*
278** These parameters are used in Adaptive
279** Interrupt Moderation. The value is set
280** into EITR and controls the interrupt
281** frequency. A variable static scheme can
282** be created by changing the assigned value
283** of igb_ave_latency to the desired value,
284** and then set igb_enable_aim to FALSE.
285** This will result in all EITR registers
286** getting set to that value statically.
287*/
288static int igb_enable_aim = TRUE;
289TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
290static int igb_low_latency = IGB_LOW_LATENCY;
291TUNABLE_INT("hw.igb.low_latency", &igb_low_latency);
292static int igb_ave_latency = IGB_AVE_LATENCY;
293TUNABLE_INT("hw.igb.ave_latency", &igb_ave_latency);
294static int igb_bulk_latency = IGB_BULK_LATENCY;
295TUNABLE_INT("hw.igb.bulk_latency", &igb_bulk_latency);
296
297/*
298** This will autoconfigure based on the number
299** of CPUs if set to 0. Only a matched pair of
300** TX and RX rings are allowed.
301*/
302static int igb_num_queues = 1;
303TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
304
305/* How many packets rxeof tries to clean at a time */
306static int igb_rx_process_limit = 100;
307TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
308
309/* Flow control setting - default to FULL */
310static int igb_fc_setting = e1000_fc_full;
311TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
312
313/*
314** Shadow VFTA table, this is needed because
315** the real filter table gets cleared during
316** a soft reset and the driver needs to be able
317** to repopulate it.
318*/
319static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
320
321
322/*********************************************************************
323 *  Device identification routine
324 *
325 *  igb_probe determines if the driver should be loaded on
326 *  adapter based on PCI vendor/device id of the adapter.
327 *
328 *  return BUS_PROBE_DEFAULT on success, positive on failure
329 *********************************************************************/
330
331static int
332igb_probe(device_t dev)
333{
334	char		adapter_name[60];
335	uint16_t	pci_vendor_id = 0;
336	uint16_t	pci_device_id = 0;
337	uint16_t	pci_subvendor_id = 0;
338	uint16_t	pci_subdevice_id = 0;
339	igb_vendor_info_t *ent;
340
341	INIT_DEBUGOUT("igb_probe: begin");
342
343	pci_vendor_id = pci_get_vendor(dev);
344	if (pci_vendor_id != IGB_VENDOR_ID)
345		return (ENXIO);
346
347	pci_device_id = pci_get_device(dev);
348	pci_subvendor_id = pci_get_subvendor(dev);
349	pci_subdevice_id = pci_get_subdevice(dev);
350
351	ent = igb_vendor_info_array;
352	while (ent->vendor_id != 0) {
353		if ((pci_vendor_id == ent->vendor_id) &&
354		    (pci_device_id == ent->device_id) &&
355
356		    ((pci_subvendor_id == ent->subvendor_id) ||
357		    (ent->subvendor_id == PCI_ANY_ID)) &&
358
359		    ((pci_subdevice_id == ent->subdevice_id) ||
360		    (ent->subdevice_id == PCI_ANY_ID))) {
361			sprintf(adapter_name, "%s %s",
362				igb_strings[ent->index],
363				igb_driver_version);
364			device_set_desc_copy(dev, adapter_name);
365			return (BUS_PROBE_DEFAULT);
366		}
367		ent++;
368	}
369
370	return (ENXIO);
371}
372
373/*********************************************************************
374 *  Device initialization routine
375 *
376 *  The attach entry point is called when the driver is being loaded.
377 *  This routine identifies the type of hardware, allocates all resources
378 *  and initializes the hardware.
379 *
380 *  return 0 on success, positive on failure
381 *********************************************************************/
382
383static int
384igb_attach(device_t dev)
385{
386	struct adapter	*adapter;
387	int		error = 0;
388	u16		eeprom_data;
389
390	INIT_DEBUGOUT("igb_attach: begin");
391
392	adapter = device_get_softc(dev);
393	adapter->dev = adapter->osdep.dev = dev;
394	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
395
396	/* SYSCTL stuff */
397	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
398	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
399	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
400	    igb_sysctl_debug_info, "I", "Debug Information");
401
402	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
403	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
404	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
405	    igb_sysctl_stats, "I", "Statistics");
406
407	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
408	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
409	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
410	    &igb_fc_setting, 0, "Flow Control");
411
412	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
413	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
414	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
415	    &igb_enable_aim, 1, "Interrupt Moderation");
416
417	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
418	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
419	    OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
420	    &igb_low_latency, 1, "Low Latency");
421
422	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
423	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
424	    OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
425	    &igb_ave_latency, 1, "Average Latency");
426
427	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
428	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429	    OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
430	    &igb_bulk_latency, 1, "Bulk Latency");
431
432	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
433
434	/* Determine hardware and mac info */
435	igb_identify_hardware(adapter);
436
437	/* Setup PCI resources */
438	if (igb_allocate_pci_resources(adapter)) {
439		device_printf(dev, "Allocation of PCI resources failed\n");
440		error = ENXIO;
441		goto err_pci;
442	}
443
444	/* Do Shared Code initialization */
445	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
446		device_printf(dev, "Setup of Shared code failed\n");
447		error = ENXIO;
448		goto err_pci;
449	}
450
451	e1000_get_bus_info(&adapter->hw);
452
453	/* Sysctls for limiting the amount of work done in the taskqueue */
454	igb_add_rx_process_limit(adapter, "rx_processing_limit",
455	    "max number of rx packets to process", &adapter->rx_process_limit,
456	    igb_rx_process_limit);
457
458	/*
459	 * Validate number of transmit and receive descriptors. It
460	 * must not exceed hardware maximum, and must be multiple
461	 * of E1000_DBA_ALIGN.
462	 */
463	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
464	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
465		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
466		    IGB_DEFAULT_TXD, igb_txd);
467		adapter->num_tx_desc = IGB_DEFAULT_TXD;
468	} else
469		adapter->num_tx_desc = igb_txd;
470	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
471	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
472		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
473		    IGB_DEFAULT_RXD, igb_rxd);
474		adapter->num_rx_desc = IGB_DEFAULT_RXD;
475	} else
476		adapter->num_rx_desc = igb_rxd;
477
478	adapter->hw.mac.autoneg = DO_AUTO_NEG;
479	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
480	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
481
482	/* Copper options */
483	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
484		adapter->hw.phy.mdix = AUTO_ALL_MODES;
485		adapter->hw.phy.disable_polarity_correction = FALSE;
486		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
487	}
488
489	/*
490	 * Set the frame limits assuming
491	 * standard ethernet sized frames.
492	 */
493	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
494	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
495
496	/*
497	** Allocate and Setup Queues
498	*/
499	if (igb_allocate_queues(adapter)) {
500		error = ENOMEM;
501		goto err_pci;
502	}
503
504	/*
505	** Start from a known state, this is
506	** important in reading the nvm and
507	** mac from that.
508	*/
509	e1000_reset_hw(&adapter->hw);
510
511	/* Make sure we have a good EEPROM before we read from it */
512	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
513		/*
514		** Some PCI-E parts fail the first check due to
515		** the link being in sleep state, call it again,
516		** if it fails a second time its a real issue.
517		*/
518		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
519			device_printf(dev,
520			    "The EEPROM Checksum Is Not Valid\n");
521			error = EIO;
522			goto err_late;
523		}
524	}
525
526	/*
527	** Copy the permanent MAC address out of the EEPROM
528	*/
529	if (e1000_read_mac_addr(&adapter->hw) < 0) {
530		device_printf(dev, "EEPROM read error while reading MAC"
531		    " address\n");
532		error = EIO;
533		goto err_late;
534	}
535	/* Check its sanity */
536	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
537		device_printf(dev, "Invalid MAC address\n");
538		error = EIO;
539		goto err_late;
540	}
541
542	/* Now Initialize the hardware */
543	if (igb_hardware_init(adapter)) {
544		device_printf(dev, "Unable to initialize the hardware\n");
545		error = EIO;
546		goto err_late;
547	}
548
549	/*
550	** Configure Interrupts
551	*/
552	if (adapter->msix > 1) /* MSIX */
553		error = igb_allocate_msix(adapter);
554	else /* MSI or Legacy */
555		error = igb_allocate_legacy(adapter);
556	if (error)
557		goto err_late;
558
559	/* Setup OS specific network interface */
560	igb_setup_interface(dev, adapter);
561
562#ifdef IGB_IEEE1588
563        /*
564        ** Setup the timer: IEEE 1588 support
565        */
566        adapter->cycles.read = igb_read_clock;
567        adapter->cycles.mask = (u64)-1;
568        adapter->cycles.mult = 1;
569        adapter->cycles.shift = IGB_TSYNC_SHIFT;
570        E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
571            IGB_TSYNC_CYCLE_TIME * IGB_TSYNC_SHIFT);
572        E1000_WRITE_REG(&adapter->hw, E1000_SYSTIML, 0x00000000);
573        E1000_WRITE_REG(&adapter->hw, E1000_SYSTIMH, 0xFF800000);
574
575	// JFV - this is not complete yet
576#endif
577
578	/* Initialize statistics */
579	igb_update_stats_counters(adapter);
580
581	adapter->hw.mac.get_link_status = 1;
582	igb_update_link_status(adapter);
583
584	/* Indicate SOL/IDER usage */
585	if (e1000_check_reset_block(&adapter->hw))
586		device_printf(dev,
587		    "PHY reset is blocked due to SOL/IDER session.\n");
588
589	/* Determine if we have to control management hardware */
590	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
591
592	/*
593	 * Setup Wake-on-Lan
594	 */
595	/* APME bit in EEPROM is mapped to WUC.APME */
596	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
597	if (eeprom_data)
598		adapter->wol = E1000_WUFC_MAG;
599
600	/* Register for VLAN events */
601	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
602	     igb_register_vlan, 0, EVENTHANDLER_PRI_FIRST);
603	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
604	     igb_unregister_vlan, 0, EVENTHANDLER_PRI_FIRST);
605
606	/* Tell the stack that the interface is not active */
607	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
608
609	INIT_DEBUGOUT("igb_attach: end");
610
611	return (0);
612
613err_late:
614	igb_free_transmit_structures(adapter);
615	igb_free_receive_structures(adapter);
616	igb_release_hw_control(adapter);
617err_pci:
618	igb_free_pci_resources(adapter);
619	IGB_CORE_LOCK_DESTROY(adapter);
620
621	return (error);
622}
623
624/*********************************************************************
625 *  Device removal routine
626 *
627 *  The detach entry point is called when the driver is being removed.
628 *  This routine stops the adapter and deallocates all the resources
629 *  that were allocated for driver operation.
630 *
631 *  return 0 on success, positive on failure
632 *********************************************************************/
633
634static int
635igb_detach(device_t dev)
636{
637	struct adapter	*adapter = device_get_softc(dev);
638	struct ifnet	*ifp = adapter->ifp;
639
640	INIT_DEBUGOUT("igb_detach: begin");
641
642	/* Make sure VLANS are not using driver */
643	if (adapter->ifp->if_vlantrunk != NULL) {
644		device_printf(dev,"Vlan in use, detach first\n");
645		return (EBUSY);
646	}
647
648	IGB_CORE_LOCK(adapter);
649	adapter->in_detach = 1;
650	igb_stop(adapter);
651	IGB_CORE_UNLOCK(adapter);
652
653	e1000_phy_hw_reset(&adapter->hw);
654
655	/* Give control back to firmware */
656	igb_release_manageability(adapter);
657	igb_release_hw_control(adapter);
658
659	if (adapter->wol) {
660		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
661		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
662		igb_enable_wakeup(dev);
663	}
664
665	/* Unregister VLAN events */
666	if (adapter->vlan_attach != NULL)
667		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
668	if (adapter->vlan_detach != NULL)
669		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
670
671	ether_ifdetach(adapter->ifp);
672
673	callout_drain(&adapter->timer);
674
675	igb_free_pci_resources(adapter);
676	bus_generic_detach(dev);
677	if_free(ifp);
678
679	igb_free_transmit_structures(adapter);
680	igb_free_receive_structures(adapter);
681
682	IGB_CORE_LOCK_DESTROY(adapter);
683
684	return (0);
685}
686
687/*********************************************************************
688 *
689 *  Shutdown entry point
690 *
691 **********************************************************************/
692
693static int
694igb_shutdown(device_t dev)
695{
696	return igb_suspend(dev);
697}
698
699/*
700 * Suspend/resume device methods.
701 */
702static int
703igb_suspend(device_t dev)
704{
705	struct adapter *adapter = device_get_softc(dev);
706
707	IGB_CORE_LOCK(adapter);
708
709	igb_stop(adapter);
710
711        igb_release_manageability(adapter);
712	igb_release_hw_control(adapter);
713
714        if (adapter->wol) {
715                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
716                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
717                igb_enable_wakeup(dev);
718        }
719
720	IGB_CORE_UNLOCK(adapter);
721
722	return bus_generic_suspend(dev);
723}
724
725static int
726igb_resume(device_t dev)
727{
728	struct adapter *adapter = device_get_softc(dev);
729	struct ifnet *ifp = adapter->ifp;
730
731	IGB_CORE_LOCK(adapter);
732	igb_init_locked(adapter);
733	igb_init_manageability(adapter);
734
735	if ((ifp->if_flags & IFF_UP) &&
736	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
737		igb_start(ifp);
738
739	IGB_CORE_UNLOCK(adapter);
740
741	return bus_generic_resume(dev);
742}
743
744
745/*********************************************************************
746 *  Transmit entry point
747 *
748 *  igb_start is called by the stack to initiate a transmit.
749 *  The driver will remain in this routine as long as there are
750 *  packets to transmit and transmit resources are available.
751 *  In case resources are not available stack is notified and
752 *  the packet is requeued.
753 **********************************************************************/
754
755static void
756igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
757{
758	struct adapter	*adapter = ifp->if_softc;
759	struct mbuf	*m_head;
760
761	IGB_TX_LOCK_ASSERT(txr);
762
763	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
764	    IFF_DRV_RUNNING)
765		return;
766	if (!adapter->link_active)
767		return;
768
769	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
770
771		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
772		if (m_head == NULL)
773			break;
774		/*
775		 *  Encapsulation can modify our pointer, and or make it
776		 *  NULL on failure.  In that event, we can't requeue.
777		 */
778		if (igb_xmit(txr, &m_head)) {
779			if (m_head == NULL)
780				break;
781			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
782			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
783			break;
784		}
785
786		/* Send a copy of the frame to the BPF listener */
787		ETHER_BPF_MTAP(ifp, m_head);
788
789		/* Set timeout in case hardware has problems transmitting. */
790		txr->watchdog_timer = IGB_TX_TIMEOUT;
791	}
792}
793
794/*
795 * Legacy TX driver routine, called from the
796 * stack, always uses tx[0], and spins for it.
797 * Should not be used with multiqueue tx
798 */
799static void
800igb_start(struct ifnet *ifp)
801{
802	struct adapter	*adapter = ifp->if_softc;
803	struct tx_ring	*txr = adapter->tx_rings;
804
805	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
806		IGB_TX_LOCK(txr);
807		igb_start_locked(txr, ifp);
808		IGB_TX_UNLOCK(txr);
809	}
810	return;
811}
812
813#if __FreeBSD_version >= 800000
814/*
815** Multiqueue Transmit driver
816**
817*/
818static int
819igb_mq_start(struct ifnet *ifp, struct mbuf *m)
820{
821	struct adapter	*adapter = ifp->if_softc;
822	struct tx_ring	*txr;
823	int 		i = 0, err = 0;
824
825	/* Which queue to use */
826	if ((m->m_flags & M_FLOWID) != 0)
827		i = m->m_pkthdr.flowid % adapter->num_queues;
828	txr = &adapter->tx_rings[i];
829
830	if (IGB_TX_TRYLOCK(txr)) {
831		err = igb_mq_start_locked(ifp, txr, m);
832		IGB_TX_UNLOCK(txr);
833	} else
834		err = drbr_enqueue(ifp, txr->br, m);
835
836	return (err);
837}
838
839static int
840igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
841{
842	struct adapter  *adapter = txr->adapter;
843        struct mbuf     *next;
844        int             err = 0;
845
846	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
847		err = drbr_enqueue(ifp, txr->br, m);
848		return (err);
849	}
850
851	if (m == NULL) /* Called by tasklet */
852		goto process;
853
854	/* If nothing queued go right to xmit */
855	if (drbr_empty(ifp, txr->br)) {
856		if (igb_xmit(txr, &m)) {
857			if (m && (err = drbr_enqueue(ifp, txr->br, m)) != 0)
858                                return (err);
859		} else {
860			/* Success, update stats */
861			drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
862			/* Send a copy of the frame to the BPF listener */
863			ETHER_BPF_MTAP(ifp, m);
864			/* Set the watchdog */
865			txr->watchdog_timer = IGB_TX_TIMEOUT;
866                }
867
868        } else if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
869		return (err);
870
871process:
872	if (drbr_empty(ifp, txr->br))
873		return (err);
874
875	/* Process the queue */
876	while (TRUE) {
877		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
878			break;
879		next = drbr_dequeue(ifp, txr->br);
880		if (next == NULL)
881			break;
882		if (igb_xmit(txr, &next))
883			break;
884		ETHER_BPF_MTAP(ifp, next);
885		/* Set the watchdog */
886		txr->watchdog_timer = IGB_TX_TIMEOUT;
887	}
888
889	if (txr->tx_avail <= IGB_TX_OP_THRESHOLD)
890		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
891
892	return (err);
893}
894
895/*
896** Flush all ring buffers
897*/
898static void
899igb_qflush(struct ifnet *ifp)
900{
901	struct adapter	*adapter = ifp->if_softc;
902	struct tx_ring	*txr = adapter->tx_rings;
903	struct mbuf	*m;
904
905	for (int i = 0; i < adapter->num_queues; i++, txr++) {
906		IGB_TX_LOCK(txr);
907		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
908			m_freem(m);
909		IGB_TX_UNLOCK(txr);
910	}
911	if_qflush(ifp);
912}
913#endif /* __FreeBSD_version >= 800000 */
914
915/*********************************************************************
916 *  Ioctl entry point
917 *
918 *  igb_ioctl is called when the user wants to configure the
919 *  interface.
920 *
921 *  return 0 on success, positive on failure
922 **********************************************************************/
923
924static int
925igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
926{
927	struct adapter	*adapter = ifp->if_softc;
928	struct ifreq *ifr = (struct ifreq *)data;
929#ifdef INET
930	struct ifaddr *ifa = (struct ifaddr *)data;
931#endif
932	int error = 0;
933
934	if (adapter->in_detach)
935		return (error);
936
937	switch (command) {
938	case SIOCSIFADDR:
939#ifdef INET
940		if (ifa->ifa_addr->sa_family == AF_INET) {
941			/*
942			 * XXX
943			 * Since resetting hardware takes a very long time
944			 * and results in link renegotiation we only
945			 * initialize the hardware only when it is absolutely
946			 * required.
947			 */
948			ifp->if_flags |= IFF_UP;
949			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
950				IGB_CORE_LOCK(adapter);
951				igb_init_locked(adapter);
952				IGB_CORE_UNLOCK(adapter);
953			}
954			arp_ifinit(ifp, ifa);
955		} else
956#endif
957			error = ether_ioctl(ifp, command, data);
958		break;
959	case SIOCSIFMTU:
960	    {
961		int max_frame_size;
962
963		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
964
965		IGB_CORE_LOCK(adapter);
966		max_frame_size = 9234;
967		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
968		    ETHER_CRC_LEN) {
969			IGB_CORE_UNLOCK(adapter);
970			error = EINVAL;
971			break;
972		}
973
974		ifp->if_mtu = ifr->ifr_mtu;
975		adapter->max_frame_size =
976		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
977		igb_init_locked(adapter);
978		IGB_CORE_UNLOCK(adapter);
979		break;
980	    }
981	case SIOCSIFFLAGS:
982		IOCTL_DEBUGOUT("ioctl rcv'd:\
983		    SIOCSIFFLAGS (Set Interface Flags)");
984		IGB_CORE_LOCK(adapter);
985		if (ifp->if_flags & IFF_UP) {
986			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
987				if ((ifp->if_flags ^ adapter->if_flags) &
988				    (IFF_PROMISC | IFF_ALLMULTI)) {
989					igb_disable_promisc(adapter);
990					igb_set_promisc(adapter);
991				}
992			} else
993				igb_init_locked(adapter);
994		} else
995			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
996				igb_stop(adapter);
997		adapter->if_flags = ifp->if_flags;
998		IGB_CORE_UNLOCK(adapter);
999		break;
1000	case SIOCADDMULTI:
1001	case SIOCDELMULTI:
1002		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1003		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1004			IGB_CORE_LOCK(adapter);
1005			igb_disable_intr(adapter);
1006			igb_set_multi(adapter);
1007				igb_enable_intr(adapter);
1008			IGB_CORE_UNLOCK(adapter);
1009		}
1010		break;
1011	case SIOCSIFMEDIA:
1012		/* Check SOL/IDER usage */
1013		IGB_CORE_LOCK(adapter);
1014		if (e1000_check_reset_block(&adapter->hw)) {
1015			IGB_CORE_UNLOCK(adapter);
1016			device_printf(adapter->dev, "Media change is"
1017			    " blocked due to SOL/IDER session.\n");
1018			break;
1019		}
1020		IGB_CORE_UNLOCK(adapter);
1021	case SIOCGIFMEDIA:
1022		IOCTL_DEBUGOUT("ioctl rcv'd: \
1023		    SIOCxIFMEDIA (Get/Set Interface Media)");
1024		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1025		break;
1026	case SIOCSIFCAP:
1027	    {
1028		int mask, reinit;
1029
1030		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1031		reinit = 0;
1032		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1033		if (mask & IFCAP_HWCSUM) {
1034			ifp->if_capenable ^= IFCAP_HWCSUM;
1035			reinit = 1;
1036		}
1037		if (mask & IFCAP_TSO4) {
1038			ifp->if_capenable ^= IFCAP_TSO4;
1039			reinit = 1;
1040		}
1041		if (mask & IFCAP_VLAN_HWTAGGING) {
1042			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1043			reinit = 1;
1044		}
1045		if (mask & IFCAP_LRO) {
1046			ifp->if_capenable ^= IFCAP_LRO;
1047			reinit = 1;
1048		}
1049		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1050			igb_init(adapter);
1051		VLAN_CAPABILITIES(ifp);
1052		break;
1053	    }
1054
1055#ifdef IGB_IEEE1588
1056	/*
1057	** IOCTL support for Precision Time (IEEE 1588) Support
1058	*/
1059	case SIOCSHWTSTAMP:
1060		error = igb_hwtstamp_ioctl(adapter, ifp);
1061		break;
1062#endif
1063
1064	default:
1065		error = ether_ioctl(ifp, command, data);
1066		break;
1067	}
1068
1069	return (error);
1070}
1071
1072/*********************************************************************
1073 *  Watchdog timer:
1074 *
1075 *  This routine is called from the local timer every second.
1076 *  As long as transmit descriptors are being cleaned the value
1077 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1078 *  and we then reset the device.
1079 *
1080 **********************************************************************/
1081
1082static void
1083igb_watchdog(struct adapter *adapter)
1084{
1085	struct tx_ring	*txr = adapter->tx_rings;
1086	bool		tx_hang = FALSE;
1087
1088	IGB_CORE_LOCK_ASSERT(adapter);
1089
1090	/*
1091	** The timer is set to 5 every time start() queues a packet.
1092	** Then txeof keeps resetting it as long as it cleans at
1093	** least one descriptor.
1094	** Finally, anytime all descriptors are clean the timer is
1095	** set to 0.
1096	**
1097	** With TX Multiqueue we need to check every queue's timer,
1098	** if any time out we do the reset.
1099	*/
1100	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1101		IGB_TX_LOCK(txr);
1102		if (txr->watchdog_timer == 0 ||
1103		    (--txr->watchdog_timer)) {
1104			IGB_TX_UNLOCK(txr);
1105			continue;
1106		} else {
1107			tx_hang = TRUE;
1108			IGB_TX_UNLOCK(txr);
1109			break;
1110		}
1111	}
1112	if (tx_hang == FALSE)
1113		return;
1114
1115	/* If we are in this routine because of pause frames, then
1116	 * don't reset the hardware.
1117	 */
1118	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1119	    E1000_STATUS_TXOFF) {
1120		txr = adapter->tx_rings; /* reset pointer */
1121		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1122			IGB_TX_LOCK(txr);
1123			txr->watchdog_timer = IGB_TX_TIMEOUT;
1124			IGB_TX_UNLOCK(txr);
1125		}
1126		return;
1127	}
1128
1129	if (e1000_check_for_link(&adapter->hw) == 0)
1130		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1131
1132	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1133		device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
1134		    i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
1135		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
1136		device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
1137		    " Next Desc to Clean = %d\n", i, txr->tx_avail,
1138		    txr->next_to_clean);
1139	}
1140
1141	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1142	adapter->watchdog_events++;
1143
1144	igb_init_locked(adapter);
1145}
1146
1147/*********************************************************************
1148 *  Init entry point
1149 *
1150 *  This routine is used in two ways. It is used by the stack as
1151 *  init entry point in network interface structure. It is also used
1152 *  by the driver as a hw/sw initialization routine to get to a
1153 *  consistent state.
1154 *
1155 *  return 0 on success, positive on failure
1156 **********************************************************************/
1157
1158static void
1159igb_init_locked(struct adapter *adapter)
1160{
1161	struct rx_ring *rxr = adapter->rx_rings;
1162	struct tx_ring *txr = adapter->tx_rings;
1163	struct ifnet	*ifp = adapter->ifp;
1164	device_t	dev = adapter->dev;
1165	u32		pba = 0;
1166
1167	INIT_DEBUGOUT("igb_init: begin");
1168
1169	IGB_CORE_LOCK_ASSERT(adapter);
1170
1171	igb_stop(adapter);
1172
1173	/*
1174	 * Packet Buffer Allocation (PBA)
1175	 * Writing PBA sets the receive portion of the buffer
1176	 * the remainder is used for the transmit buffer.
1177	 */
1178	if (adapter->hw.mac.type == e1000_82575) {
1179		INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1180		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1181		E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1182	}
1183
1184	/* Get the latest mac address, User can use a LAA */
1185        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1186              ETHER_ADDR_LEN);
1187
1188	/* Put the address into the Receive Address Array */
1189	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1190
1191	/* Initialize the hardware */
1192	if (igb_hardware_init(adapter)) {
1193		device_printf(dev, "Unable to initialize the hardware\n");
1194		return;
1195	}
1196	igb_update_link_status(adapter);
1197
1198	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1199
1200	/* Set hardware offload abilities */
1201	ifp->if_hwassist = 0;
1202	if (ifp->if_capenable & IFCAP_TXCSUM) {
1203		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1204#if __FreeBSD_version >= 800000
1205		if (adapter->hw.mac.type == e1000_82576)
1206			ifp->if_hwassist |= CSUM_SCTP;
1207#endif
1208	}
1209
1210	if (ifp->if_capenable & IFCAP_TSO4)
1211		ifp->if_hwassist |= CSUM_TSO;
1212
1213	/* Configure for OS presence */
1214	igb_init_manageability(adapter);
1215
1216	/* Prepare transmit descriptors and buffers */
1217	igb_setup_transmit_structures(adapter);
1218	igb_initialize_transmit_units(adapter);
1219
1220	/* Setup Multicast table */
1221	igb_set_multi(adapter);
1222
1223	/*
1224	** Figure out the desired mbuf pool
1225	** for doing jumbo/packetsplit
1226	*/
1227	if (ifp->if_mtu > ETHERMTU)
1228		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1229	else
1230		adapter->rx_mbuf_sz = MCLBYTES;
1231
1232	/* Prepare receive descriptors and buffers */
1233	if (igb_setup_receive_structures(adapter)) {
1234		device_printf(dev, "Could not setup receive structures\n");
1235		igb_stop(adapter);
1236		return;
1237	}
1238	igb_initialize_receive_units(adapter);
1239
1240	/* Don't lose promiscuous settings */
1241	igb_set_promisc(adapter);
1242
1243	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1244	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1245
1246	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1247	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1248
1249	if (adapter->msix > 1) /* Set up queue routing */
1250		igb_configure_queues(adapter);
1251
1252	/* Set up VLAN tag offload and filter */
1253	igb_setup_vlan_hw_support(adapter);
1254
1255        /* Set default RX interrupt moderation */
1256	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1257		E1000_WRITE_REG(&adapter->hw,
1258		    E1000_EITR(rxr->msix), igb_ave_latency);
1259		rxr->eitr_setting = igb_ave_latency;
1260	}
1261
1262	/* Set TX interrupt rate & reset TX watchdog */
1263	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1264		E1000_WRITE_REG(&adapter->hw,
1265		    E1000_EITR(txr->msix), igb_ave_latency);
1266		txr->watchdog_timer = FALSE;
1267	}
1268
1269	{
1270		/* this clears any pending interrupts */
1271		E1000_READ_REG(&adapter->hw, E1000_ICR);
1272		igb_enable_intr(adapter);
1273		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1274	}
1275
1276	/* Don't reset the phy next time init gets called */
1277	adapter->hw.phy.reset_disable = TRUE;
1278}
1279
1280static void
1281igb_init(void *arg)
1282{
1283	struct adapter *adapter = arg;
1284
1285	IGB_CORE_LOCK(adapter);
1286	igb_init_locked(adapter);
1287	IGB_CORE_UNLOCK(adapter);
1288}
1289
1290
1291static void
1292igb_handle_rxtx(void *context, int pending)
1293{
1294	struct adapter	*adapter = context;
1295	struct tx_ring	*txr = adapter->tx_rings;
1296	struct rx_ring	*rxr = adapter->rx_rings;
1297	struct ifnet	*ifp;
1298
1299	ifp = adapter->ifp;
1300
1301	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1302		if (igb_rxeof(rxr, adapter->rx_process_limit))
1303			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1304		IGB_TX_LOCK(txr);
1305		igb_txeof(txr);
1306
1307#if __FreeBSD_version >= 800000
1308		if (!drbr_empty(ifp, txr->br))
1309			igb_mq_start_locked(ifp, txr, NULL);
1310#else
1311		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1312			igb_start_locked(txr, ifp);
1313#endif
1314		IGB_TX_UNLOCK(txr);
1315	}
1316
1317	igb_enable_intr(adapter);
1318}
1319
1320static void
1321igb_handle_rx(void *context, int pending)
1322{
1323	struct rx_ring  *rxr = context;
1324	struct adapter  *adapter = rxr->adapter;
1325	struct ifnet    *ifp = adapter->ifp;
1326
1327	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1328		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1329			/* More to clean, schedule another task */
1330			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1331
1332}
1333
1334static void
1335igb_handle_tx(void *context, int pending)
1336{
1337	struct tx_ring  *txr = context;
1338	struct adapter  *adapter = txr->adapter;
1339	struct ifnet    *ifp = adapter->ifp;
1340
1341	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1342		IGB_TX_LOCK(txr);
1343		igb_txeof(txr);
1344#if __FreeBSD_version >= 800000
1345		if (!drbr_empty(ifp, txr->br))
1346			igb_mq_start_locked(ifp, txr, NULL);
1347#else
1348		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1349			igb_start_locked(txr, ifp);
1350#endif
1351		IGB_TX_UNLOCK(txr);
1352	}
1353}
1354
1355
1356/*********************************************************************
1357 *
1358 *  MSI/Legacy Deferred
1359 *  Interrupt Service routine
1360 *
1361 *********************************************************************/
1362static int
1363igb_irq_fast(void *arg)
1364{
1365	struct adapter	*adapter = arg;
1366	uint32_t	reg_icr;
1367
1368
1369	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1370
1371	/* Hot eject?  */
1372	if (reg_icr == 0xffffffff)
1373		return FILTER_STRAY;
1374
1375	/* Definitely not our interrupt.  */
1376	if (reg_icr == 0x0)
1377		return FILTER_STRAY;
1378
1379	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1380		return FILTER_STRAY;
1381
1382	/*
1383	 * Mask interrupts until the taskqueue is finished running.  This is
1384	 * cheap, just assume that it is needed.  This also works around the
1385	 * MSI message reordering errata on certain systems.
1386	 */
1387	igb_disable_intr(adapter);
1388	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1389
1390	/* Link status change */
1391	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1392		adapter->hw.mac.get_link_status = 1;
1393		igb_update_link_status(adapter);
1394	}
1395
1396	if (reg_icr & E1000_ICR_RXO)
1397		adapter->rx_overruns++;
1398	return FILTER_HANDLED;
1399}
1400
1401
1402/*********************************************************************
1403 *
1404 *  MSIX TX Interrupt Service routine
1405 *
1406 **********************************************************************/
1407static void
1408igb_msix_tx(void *arg)
1409{
1410	struct tx_ring *txr = arg;
1411	struct adapter *adapter = txr->adapter;
1412	u32		loop = IGB_MAX_LOOP;
1413	bool		more;
1414
1415	++txr->tx_irq;
1416	IGB_TX_LOCK(txr);
1417
1418	do {
1419		more = igb_txeof(txr);
1420	} while (loop-- && more);
1421
1422	IGB_TX_UNLOCK(txr);
1423
1424	/* Schedule a clean task */
1425	taskqueue_enqueue(adapter->tq, &txr->tx_task);
1426
1427	/* Reenable this interrupt */
1428	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1429	return;
1430}
1431
1432/*********************************************************************
1433 *
1434 *  MSIX RX Interrupt Service routine
1435 *
1436 **********************************************************************/
1437
1438static void
1439igb_msix_rx(void *arg)
1440{
1441	struct rx_ring *rxr = arg;
1442	struct adapter *adapter = rxr->adapter;
1443	u32		loop = IGB_MAX_LOOP;
1444	bool		more;
1445
1446	++rxr->rx_irq;
1447	do {
1448		more = igb_rxeof(rxr, adapter->rx_process_limit);
1449	} while (loop-- && more);
1450
1451	/* Update interrupt rate */
1452	if (igb_enable_aim == TRUE)
1453		igb_update_aim(rxr);
1454
1455	/* Schedule another clean */
1456	taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1457
1458	/* Reenable this interrupt */
1459	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1460	return;
1461}
1462
1463
1464/*********************************************************************
1465 *
1466 *  MSIX Link Interrupt Service routine
1467 *
1468 **********************************************************************/
1469
1470static void
1471igb_msix_link(void *arg)
1472{
1473	struct adapter	*adapter = arg;
1474	u32       	icr;
1475
1476	++adapter->link_irq;
1477	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1478	if (!(icr & E1000_ICR_LSC))
1479		goto spurious;
1480	adapter->hw.mac.get_link_status = 1;
1481	igb_update_link_status(adapter);
1482
1483spurious:
1484	/* Rearm */
1485	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1486	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1487	return;
1488}
1489
1490
1491/*
1492** Routine to adjust the RX EITR value based on traffic,
1493** its a simple three state model, but seems to help.
1494**
1495** Note that the three EITR values are tuneable using
1496** sysctl in real time. The feature can be effectively
1497** nullified by setting them equal.
1498*/
1499#define BULK_THRESHOLD	10000
1500#define AVE_THRESHOLD	1600
1501
1502static void
1503igb_update_aim(struct rx_ring *rxr)
1504{
1505	struct adapter	*adapter = rxr->adapter;
1506	u32		olditr, newitr;
1507
1508	/* Update interrupt moderation based on traffic */
1509	olditr = rxr->eitr_setting;
1510	newitr = olditr;
1511
1512	/* Idle, don't change setting */
1513	if (rxr->bytes == 0)
1514		return;
1515
1516	if (olditr == igb_low_latency) {
1517		if (rxr->bytes > AVE_THRESHOLD)
1518			newitr = igb_ave_latency;
1519	} else if (olditr == igb_ave_latency) {
1520		if (rxr->bytes < AVE_THRESHOLD)
1521			newitr = igb_low_latency;
1522		else if (rxr->bytes > BULK_THRESHOLD)
1523			newitr = igb_bulk_latency;
1524	} else if (olditr == igb_bulk_latency) {
1525		if (rxr->bytes < BULK_THRESHOLD)
1526			newitr = igb_ave_latency;
1527	}
1528
1529	if (olditr != newitr) {
1530		/* Change interrupt rate */
1531		rxr->eitr_setting = newitr;
1532		E1000_WRITE_REG(&adapter->hw, E1000_EITR(rxr->me),
1533		    newitr | (newitr << 16));
1534	}
1535
1536	rxr->bytes = 0;
1537        return;
1538}
1539
1540
1541/*********************************************************************
1542 *
1543 *  Media Ioctl callback
1544 *
1545 *  This routine is called whenever the user queries the status of
1546 *  the interface using ifconfig.
1547 *
1548 **********************************************************************/
1549static void
1550igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1551{
1552	struct adapter *adapter = ifp->if_softc;
1553	u_char fiber_type = IFM_1000_SX;
1554
1555	INIT_DEBUGOUT("igb_media_status: begin");
1556
1557	IGB_CORE_LOCK(adapter);
1558	igb_update_link_status(adapter);
1559
1560	ifmr->ifm_status = IFM_AVALID;
1561	ifmr->ifm_active = IFM_ETHER;
1562
1563	if (!adapter->link_active) {
1564		IGB_CORE_UNLOCK(adapter);
1565		return;
1566	}
1567
1568	ifmr->ifm_status |= IFM_ACTIVE;
1569
1570	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1571	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1572		ifmr->ifm_active |= fiber_type | IFM_FDX;
1573	else {
1574		switch (adapter->link_speed) {
1575		case 10:
1576			ifmr->ifm_active |= IFM_10_T;
1577			break;
1578		case 100:
1579			ifmr->ifm_active |= IFM_100_TX;
1580			break;
1581		case 1000:
1582			ifmr->ifm_active |= IFM_1000_T;
1583			break;
1584		}
1585		if (adapter->link_duplex == FULL_DUPLEX)
1586			ifmr->ifm_active |= IFM_FDX;
1587		else
1588			ifmr->ifm_active |= IFM_HDX;
1589	}
1590	IGB_CORE_UNLOCK(adapter);
1591}
1592
1593/*********************************************************************
1594 *
1595 *  Media Ioctl callback
1596 *
1597 *  This routine is called when the user changes speed/duplex using
1598 *  media/mediopt option with ifconfig.
1599 *
1600 **********************************************************************/
1601static int
1602igb_media_change(struct ifnet *ifp)
1603{
1604	struct adapter *adapter = ifp->if_softc;
1605	struct ifmedia  *ifm = &adapter->media;
1606
1607	INIT_DEBUGOUT("igb_media_change: begin");
1608
1609	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1610		return (EINVAL);
1611
1612	IGB_CORE_LOCK(adapter);
1613	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1614	case IFM_AUTO:
1615		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1616		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1617		break;
1618	case IFM_1000_LX:
1619	case IFM_1000_SX:
1620	case IFM_1000_T:
1621		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1622		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1623		break;
1624	case IFM_100_TX:
1625		adapter->hw.mac.autoneg = FALSE;
1626		adapter->hw.phy.autoneg_advertised = 0;
1627		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1628			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1629		else
1630			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1631		break;
1632	case IFM_10_T:
1633		adapter->hw.mac.autoneg = FALSE;
1634		adapter->hw.phy.autoneg_advertised = 0;
1635		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1636			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1637		else
1638			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1639		break;
1640	default:
1641		device_printf(adapter->dev, "Unsupported media type\n");
1642	}
1643
1644	/* As the speed/duplex settings my have changed we need to
1645	 * reset the PHY.
1646	 */
1647	adapter->hw.phy.reset_disable = FALSE;
1648
1649	igb_init_locked(adapter);
1650	IGB_CORE_UNLOCK(adapter);
1651
1652	return (0);
1653}
1654
1655
1656/*********************************************************************
1657 *
1658 *  This routine maps the mbufs to Advanced TX descriptors.
1659 *  used by the 82575 adapter.
1660 *
1661 **********************************************************************/
1662
1663static int
1664igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1665{
1666	struct adapter		*adapter = txr->adapter;
1667	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1668	bus_dmamap_t		map;
1669	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1670	union e1000_adv_tx_desc	*txd = NULL;
1671	struct mbuf		*m_head;
1672	u32			olinfo_status = 0, cmd_type_len = 0;
1673	int			nsegs, i, j, error, first, last = 0;
1674	u32			hdrlen = 0;
1675
1676	m_head = *m_headp;
1677
1678
1679	/* Set basic descriptor constants */
1680	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1681	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1682	if (m_head->m_flags & M_VLANTAG)
1683		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1684
1685        /*
1686         * Force a cleanup if number of TX descriptors
1687         * available hits the threshold
1688         */
1689	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1690		igb_txeof(txr);
1691		/* Now do we at least have a minimal? */
1692		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1693			txr->no_desc_avail++;
1694			return (ENOBUFS);
1695		}
1696	}
1697
1698	/*
1699         * Map the packet for DMA.
1700	 *
1701	 * Capture the first descriptor index,
1702	 * this descriptor will have the index
1703	 * of the EOP which is the only one that
1704	 * now gets a DONE bit writeback.
1705	 */
1706	first = txr->next_avail_desc;
1707	tx_buffer = &txr->tx_buffers[first];
1708	tx_buffer_mapped = tx_buffer;
1709	map = tx_buffer->map;
1710
1711	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1712	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1713
1714	if (error == EFBIG) {
1715		struct mbuf *m;
1716
1717		m = m_defrag(*m_headp, M_DONTWAIT);
1718		if (m == NULL) {
1719			adapter->mbuf_defrag_failed++;
1720			m_freem(*m_headp);
1721			*m_headp = NULL;
1722			return (ENOBUFS);
1723		}
1724		*m_headp = m;
1725
1726		/* Try it again */
1727		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1728		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1729
1730		if (error == ENOMEM) {
1731			adapter->no_tx_dma_setup++;
1732			return (error);
1733		} else if (error != 0) {
1734			adapter->no_tx_dma_setup++;
1735			m_freem(*m_headp);
1736			*m_headp = NULL;
1737			return (error);
1738		}
1739	} else if (error == ENOMEM) {
1740		adapter->no_tx_dma_setup++;
1741		return (error);
1742	} else if (error != 0) {
1743		adapter->no_tx_dma_setup++;
1744		m_freem(*m_headp);
1745		*m_headp = NULL;
1746		return (error);
1747	}
1748
1749	/* Check again to be sure we have enough descriptors */
1750        if (nsegs > (txr->tx_avail - 2)) {
1751                txr->no_desc_avail++;
1752		bus_dmamap_unload(txr->txtag, map);
1753		return (ENOBUFS);
1754        }
1755	m_head = *m_headp;
1756
1757        /*
1758         * Set up the context descriptor:
1759         * used when any hardware offload is done.
1760	 * This includes CSUM, VLAN, and TSO. It
1761	 * will use the first descriptor.
1762         */
1763        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1764		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1765			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1766			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1767			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1768		} else
1769			return (ENXIO);
1770	} else if (igb_tx_ctx_setup(txr, m_head))
1771		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1772
1773#ifdef IGB_IEEE1588
1774	/* This is changing soon to an mtag detection */
1775	if (we detect this mbuf has a TSTAMP mtag)
1776		cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
1777#endif
1778	/* Calculate payload length */
1779	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1780	    << E1000_ADVTXD_PAYLEN_SHIFT);
1781
1782	/* Set up our transmit descriptors */
1783	i = txr->next_avail_desc;
1784	for (j = 0; j < nsegs; j++) {
1785		bus_size_t seg_len;
1786		bus_addr_t seg_addr;
1787
1788		tx_buffer = &txr->tx_buffers[i];
1789		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1790		seg_addr = segs[j].ds_addr;
1791		seg_len  = segs[j].ds_len;
1792
1793		txd->read.buffer_addr = htole64(seg_addr);
1794		txd->read.cmd_type_len = htole32(
1795		    adapter->txd_cmd | cmd_type_len | seg_len);
1796		txd->read.olinfo_status = htole32(olinfo_status);
1797		last = i;
1798		if (++i == adapter->num_tx_desc)
1799			i = 0;
1800		tx_buffer->m_head = NULL;
1801		tx_buffer->next_eop = -1;
1802	}
1803
1804	txr->next_avail_desc = i;
1805	txr->tx_avail -= nsegs;
1806
1807        tx_buffer->m_head = m_head;
1808	tx_buffer_mapped->map = tx_buffer->map;
1809	tx_buffer->map = map;
1810        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1811
1812        /*
1813         * Last Descriptor of Packet
1814	 * needs End Of Packet (EOP)
1815	 * and Report Status (RS)
1816         */
1817        txd->read.cmd_type_len |=
1818	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1819	/*
1820	 * Keep track in the first buffer which
1821	 * descriptor will be written back
1822	 */
1823	tx_buffer = &txr->tx_buffers[first];
1824	tx_buffer->next_eop = last;
1825
1826	/*
1827	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1828	 * that this frame is available to transmit.
1829	 */
1830	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1831	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1832	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1833	++txr->tx_packets;
1834
1835	return (0);
1836
1837}
1838
1839static void
1840igb_set_promisc(struct adapter *adapter)
1841{
1842	struct ifnet	*ifp = adapter->ifp;
1843	uint32_t	reg_rctl;
1844
1845	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1846
1847	if (ifp->if_flags & IFF_PROMISC) {
1848		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1849		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1850	} else if (ifp->if_flags & IFF_ALLMULTI) {
1851		reg_rctl |= E1000_RCTL_MPE;
1852		reg_rctl &= ~E1000_RCTL_UPE;
1853		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1854	}
1855}
1856
1857static void
1858igb_disable_promisc(struct adapter *adapter)
1859{
1860	uint32_t	reg_rctl;
1861
1862	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1863
1864	reg_rctl &=  (~E1000_RCTL_UPE);
1865	reg_rctl &=  (~E1000_RCTL_MPE);
1866	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1867}
1868
1869
1870/*********************************************************************
1871 *  Multicast Update
1872 *
1873 *  This routine is called whenever multicast address list is updated.
1874 *
1875 **********************************************************************/
1876
1877static void
1878igb_set_multi(struct adapter *adapter)
1879{
1880	struct ifnet	*ifp = adapter->ifp;
1881	struct ifmultiaddr *ifma;
1882	u32 reg_rctl = 0;
1883	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1884
1885	int mcnt = 0;
1886
1887	IOCTL_DEBUGOUT("igb_set_multi: begin");
1888
1889	if_maddr_rlock(ifp);
1890	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1891		if (ifma->ifma_addr->sa_family != AF_LINK)
1892			continue;
1893
1894		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1895			break;
1896
1897		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1898		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1899		mcnt++;
1900	}
1901	if_maddr_runlock(ifp);
1902
1903	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1904		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1905		reg_rctl |= E1000_RCTL_MPE;
1906		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1907	} else
1908		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1909}
1910
1911
1912/*********************************************************************
1913 *  Timer routine
1914 *
1915 *  This routine checks for link status and updates statistics.
1916 *
1917 **********************************************************************/
1918
1919static void
1920igb_local_timer(void *arg)
1921{
1922	struct adapter	*adapter = arg;
1923	struct ifnet	*ifp = adapter->ifp;
1924
1925	IGB_CORE_LOCK_ASSERT(adapter);
1926
1927	igb_update_link_status(adapter);
1928	igb_update_stats_counters(adapter);
1929
1930	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1931		igb_print_hw_stats(adapter);
1932
1933	/*
1934	 * Each second we check the watchdog to
1935	 * protect against hardware hangs.
1936	 */
1937	igb_watchdog(adapter);
1938
1939	/* Trigger an RX interrupt on all queues */
1940	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1941
1942	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1943
1944}
1945
1946static void
1947igb_update_link_status(struct adapter *adapter)
1948{
1949	struct e1000_hw *hw = &adapter->hw;
1950	struct ifnet *ifp = adapter->ifp;
1951	device_t dev = adapter->dev;
1952	struct tx_ring *txr = adapter->tx_rings;
1953	u32 link_check = 0;
1954
1955	/* Get the cached link value or read for real */
1956        switch (hw->phy.media_type) {
1957        case e1000_media_type_copper:
1958                if (hw->mac.get_link_status) {
1959			/* Do the work to read phy */
1960                        e1000_check_for_link(hw);
1961                        link_check = !hw->mac.get_link_status;
1962                } else
1963                        link_check = TRUE;
1964                break;
1965        case e1000_media_type_fiber:
1966                e1000_check_for_link(hw);
1967                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1968                                 E1000_STATUS_LU);
1969                break;
1970        case e1000_media_type_internal_serdes:
1971                e1000_check_for_link(hw);
1972                link_check = adapter->hw.mac.serdes_has_link;
1973                break;
1974        default:
1975        case e1000_media_type_unknown:
1976                break;
1977        }
1978
1979	/* Now we check if a transition has happened */
1980	if (link_check && (adapter->link_active == 0)) {
1981		e1000_get_speed_and_duplex(&adapter->hw,
1982		    &adapter->link_speed, &adapter->link_duplex);
1983		if (bootverbose)
1984			device_printf(dev, "Link is up %d Mbps %s\n",
1985			    adapter->link_speed,
1986			    ((adapter->link_duplex == FULL_DUPLEX) ?
1987			    "Full Duplex" : "Half Duplex"));
1988		adapter->link_active = 1;
1989		ifp->if_baudrate = adapter->link_speed * 1000000;
1990		if_link_state_change(ifp, LINK_STATE_UP);
1991	} else if (!link_check && (adapter->link_active == 1)) {
1992		ifp->if_baudrate = adapter->link_speed = 0;
1993		adapter->link_duplex = 0;
1994		if (bootverbose)
1995			device_printf(dev, "Link is Down\n");
1996		adapter->link_active = 0;
1997		if_link_state_change(ifp, LINK_STATE_DOWN);
1998		/* Turn off watchdogs */
1999		for (int i = 0; i < adapter->num_queues; i++, txr++)
2000			txr->watchdog_timer = FALSE;
2001	}
2002}
2003
2004/*********************************************************************
2005 *
2006 *  This routine disables all traffic on the adapter by issuing a
2007 *  global reset on the MAC and deallocates TX/RX buffers.
2008 *
2009 **********************************************************************/
2010
2011static void
2012igb_stop(void *arg)
2013{
2014	struct adapter	*adapter = arg;
2015	struct ifnet	*ifp = adapter->ifp;
2016
2017	IGB_CORE_LOCK_ASSERT(adapter);
2018
2019	INIT_DEBUGOUT("igb_stop: begin");
2020
2021	igb_disable_intr(adapter);
2022
2023	callout_stop(&adapter->timer);
2024
2025	/* Tell the stack that the interface is no longer active */
2026	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2027
2028	e1000_reset_hw(&adapter->hw);
2029	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2030}
2031
2032
2033/*********************************************************************
2034 *
2035 *  Determine hardware revision.
2036 *
2037 **********************************************************************/
2038static void
2039igb_identify_hardware(struct adapter *adapter)
2040{
2041	device_t dev = adapter->dev;
2042
2043	/* Make sure our PCI config space has the necessary stuff set */
2044	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2045	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2046	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2047		device_printf(dev, "Memory Access and/or Bus Master bits "
2048		    "were not set!\n");
2049		adapter->hw.bus.pci_cmd_word |=
2050		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2051		pci_write_config(dev, PCIR_COMMAND,
2052		    adapter->hw.bus.pci_cmd_word, 2);
2053	}
2054
2055	/* Save off the information about this board */
2056	adapter->hw.vendor_id = pci_get_vendor(dev);
2057	adapter->hw.device_id = pci_get_device(dev);
2058	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2059	adapter->hw.subsystem_vendor_id =
2060	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2061	adapter->hw.subsystem_device_id =
2062	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2063
2064	/* Do Shared Code Init and Setup */
2065	if (e1000_set_mac_type(&adapter->hw)) {
2066		device_printf(dev, "Setup init failure\n");
2067		return;
2068	}
2069}
2070
2071static int
2072igb_allocate_pci_resources(struct adapter *adapter)
2073{
2074	device_t	dev = adapter->dev;
2075	int		rid;
2076
2077	rid = PCIR_BAR(0);
2078	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2079	    &rid, RF_ACTIVE);
2080	if (adapter->pci_mem == NULL) {
2081		device_printf(dev, "Unable to allocate bus resource: memory\n");
2082		return (ENXIO);
2083	}
2084	adapter->osdep.mem_bus_space_tag =
2085	    rman_get_bustag(adapter->pci_mem);
2086	adapter->osdep.mem_bus_space_handle =
2087	    rman_get_bushandle(adapter->pci_mem);
2088	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2089
2090	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2091
2092	/* This will setup either MSI/X or MSI */
2093	adapter->msix = igb_setup_msix(adapter);
2094	adapter->hw.back = &adapter->osdep;
2095
2096	return (0);
2097}
2098
2099/*********************************************************************
2100 *
2101 *  Setup the Legacy or MSI Interrupt handler
2102 *
2103 **********************************************************************/
2104static int
2105igb_allocate_legacy(struct adapter *adapter)
2106{
2107	device_t dev = adapter->dev;
2108	int error, rid = 0;
2109
2110	/* Turn off all interrupts */
2111	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2112
2113	/* MSI RID is 1 */
2114	if (adapter->msix == 1)
2115		rid = 1;
2116
2117	/* We allocate a single interrupt resource */
2118	adapter->res = bus_alloc_resource_any(dev,
2119	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2120	if (adapter->res == NULL) {
2121		device_printf(dev, "Unable to allocate bus resource: "
2122		    "interrupt\n");
2123		return (ENXIO);
2124	}
2125
2126	/*
2127	 * Try allocating a fast interrupt and the associated deferred
2128	 * processing contexts.
2129	 */
2130	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2131	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2132	    taskqueue_thread_enqueue, &adapter->tq);
2133	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2134	    device_get_nameunit(adapter->dev));
2135	if ((error = bus_setup_intr(dev, adapter->res,
2136	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2137	    adapter, &adapter->tag)) != 0) {
2138		device_printf(dev, "Failed to register fast interrupt "
2139			    "handler: %d\n", error);
2140		taskqueue_free(adapter->tq);
2141		adapter->tq = NULL;
2142		return (error);
2143	}
2144
2145	return (0);
2146}
2147
2148
2149/*********************************************************************
2150 *
2151 *  Setup the MSIX Interrupt handlers:
2152 *
2153 **********************************************************************/
2154static int
2155igb_allocate_msix(struct adapter *adapter)
2156{
2157	device_t dev = adapter->dev;
2158	struct tx_ring *txr = adapter->tx_rings;
2159	struct rx_ring *rxr = adapter->rx_rings;
2160	int error, rid, vector = 0;
2161
2162	/*
2163	 * Setup the interrupt handlers
2164	 */
2165
2166	/* TX Setup */
2167	for (int i = 0; i < adapter->num_queues; i++, vector++, txr++) {
2168		rid = vector +1;
2169		txr->res = bus_alloc_resource_any(dev,
2170		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2171		if (txr->res == NULL) {
2172			device_printf(dev,
2173			    "Unable to allocate bus resource: "
2174			    "MSIX TX Interrupt\n");
2175			return (ENXIO);
2176		}
2177		error = bus_setup_intr(dev, txr->res,
2178	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2179		    igb_msix_tx, txr, &txr->tag);
2180		if (error) {
2181			txr->res = NULL;
2182			device_printf(dev, "Failed to register TX handler");
2183			return (error);
2184		}
2185		/* Make tasklet for deferred handling - one per queue */
2186		TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2187		txr->msix = vector;
2188		if (adapter->hw.mac.type == e1000_82575)
2189			txr->eims = E1000_EICR_TX_QUEUE0 << i;
2190		else
2191			txr->eims = 1 << vector;
2192		/*
2193		** Bind the msix vector, and thus the
2194		** ring to the corresponding cpu.
2195		*/
2196		if (adapter->num_queues > 1)
2197			bus_bind_intr(dev, txr->res, i);
2198	}
2199
2200	/* RX Setup */
2201	for (int i = 0; i < adapter->num_queues; i++, vector++, rxr++) {
2202		rid = vector +1;
2203		rxr->res = bus_alloc_resource_any(dev,
2204		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2205		if (rxr->res == NULL) {
2206			device_printf(dev,
2207			    "Unable to allocate bus resource: "
2208			    "MSIX RX Interrupt\n");
2209			return (ENXIO);
2210		}
2211		error = bus_setup_intr(dev, rxr->res,
2212	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2213		    igb_msix_rx, rxr, &rxr->tag);
2214		if (error) {
2215			rxr->res = NULL;
2216			device_printf(dev, "Failed to register RX handler");
2217			return (error);
2218		}
2219		/* Make tasklet for deferred handling - one per queue */
2220		TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2221		rxr->msix = vector;
2222		if (adapter->hw.mac.type == e1000_82575)
2223			rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2224		else
2225			rxr->eims = 1 << vector;
2226		/* Get a mask for local timer */
2227		adapter->rx_mask |= rxr->eims;
2228		/*
2229		** Bind the msix vector, and thus the
2230		** ring to the corresponding cpu.
2231		** Notice that this makes an RX/TX pair
2232		** bound to each CPU, limited by the MSIX
2233		** vectors.
2234		*/
2235		if (adapter->num_queues > 1)
2236			bus_bind_intr(dev, rxr->res, i);
2237	}
2238
2239	/* And Link */
2240	rid = vector +1;
2241	adapter->res = bus_alloc_resource_any(dev,
2242	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2243	if (adapter->res == NULL) {
2244		device_printf(dev,
2245		    "Unable to allocate bus resource: "
2246		    "MSIX Link Interrupt\n");
2247		return (ENXIO);
2248	}
2249	if ((error = bus_setup_intr(dev, adapter->res,
2250	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2251	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2252		device_printf(dev, "Failed to register Link handler");
2253		return (error);
2254	}
2255	adapter->linkvec = vector;
2256	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2257	    taskqueue_thread_enqueue, &adapter->tq);
2258	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2259	    device_get_nameunit(adapter->dev));
2260
2261	return (0);
2262}
2263
2264
2265static void
2266igb_configure_queues(struct adapter *adapter)
2267{
2268	struct	e1000_hw *hw = &adapter->hw;
2269	struct	tx_ring	*txr;
2270	struct	rx_ring	*rxr;
2271
2272	/* Turn on MSIX */
2273	/*
2274	** 82576 uses IVARs to route MSI/X
2275	** interrupts, its not very intuitive,
2276	** study the code carefully :)
2277	*/
2278	if (adapter->hw.mac.type == e1000_82576) {
2279		u32	ivar = 0;
2280		/* First turn on the capability */
2281		E1000_WRITE_REG(hw, E1000_GPIE,
2282		    E1000_GPIE_MSIX_MODE |
2283		    E1000_GPIE_EIAME |
2284		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2285		/* RX */
2286		for (int i = 0; i < adapter->num_queues; i++) {
2287			u32 index = i & 0x7; /* Each IVAR has two entries */
2288			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2289			rxr = &adapter->rx_rings[i];
2290			if (i < 8) {
2291				ivar &= 0xFFFFFF00;
2292				ivar |= rxr->msix | E1000_IVAR_VALID;
2293			} else {
2294				ivar &= 0xFF00FFFF;
2295				ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2296			}
2297			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2298			adapter->eims_mask |= rxr->eims;
2299		}
2300		/* TX */
2301		for (int i = 0; i < adapter->num_queues; i++) {
2302			u32 index = i & 0x7; /* Each IVAR has two entries */
2303			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2304			txr = &adapter->tx_rings[i];
2305			if (i < 8) {
2306				ivar &= 0xFFFF00FF;
2307				ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2308			} else {
2309				ivar &= 0x00FFFFFF;
2310				ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2311			}
2312			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2313			adapter->eims_mask |= txr->eims;
2314		}
2315
2316		/* And for the link interrupt */
2317		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2318		adapter->link_mask = 1 << adapter->linkvec;
2319		adapter->eims_mask |= adapter->link_mask;
2320		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2321	} else
2322	{ /* 82575 */
2323		int tmp;
2324
2325                /* enable MSI-X PBA support*/
2326		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2327                tmp |= E1000_CTRL_EXT_PBA_CLR;
2328                /* Auto-Mask interrupts upon ICR read. */
2329                tmp |= E1000_CTRL_EXT_EIAME;
2330                tmp |= E1000_CTRL_EXT_IRCA;
2331                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2332
2333		/* TX */
2334		for (int i = 0; i < adapter->num_queues; i++) {
2335			txr = &adapter->tx_rings[i];
2336			E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2337			    txr->eims);
2338			adapter->eims_mask |= txr->eims;
2339		}
2340
2341		/* RX */
2342		for (int i = 0; i < adapter->num_queues; i++) {
2343			rxr = &adapter->rx_rings[i];
2344			E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2345			    rxr->eims);
2346			adapter->eims_mask |= rxr->eims;
2347		}
2348
2349		/* Link */
2350		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2351		    E1000_EIMS_OTHER);
2352		adapter->link_mask |= E1000_EIMS_OTHER;
2353		adapter->eims_mask |= adapter->link_mask;
2354	}
2355	return;
2356}
2357
2358
2359static void
2360igb_free_pci_resources(struct adapter *adapter)
2361{
2362	struct          tx_ring *txr = adapter->tx_rings;
2363	struct          rx_ring *rxr = adapter->rx_rings;
2364	device_t	dev = adapter->dev;
2365	int		rid;
2366
2367	/*
2368	** There is a slight possibility of a failure mode
2369	** in attach that will result in entering this function
2370	** before interrupt resources have been initialized, and
2371	** in that case we do not want to execute the loops below
2372	** We can detect this reliably by the state of the adapter
2373	** res pointer.
2374	*/
2375	if (adapter->res == NULL)
2376		goto mem;
2377
2378	/*
2379	 * First release all the TX/RX interrupt resources:
2380	 */
2381	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2382		rid = txr->msix + 1;
2383		if (txr->tag != NULL) {
2384			bus_teardown_intr(dev, txr->res, txr->tag);
2385			txr->tag = NULL;
2386		}
2387		if (txr->res != NULL)
2388			bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res);
2389	}
2390
2391	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
2392		rid = rxr->msix + 1;
2393		if (rxr->tag != NULL) {
2394			bus_teardown_intr(dev, rxr->res, rxr->tag);
2395			rxr->tag = NULL;
2396		}
2397		if (rxr->res != NULL)
2398			bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res);
2399	}
2400
2401	/* Clean the Legacy or Link interrupt last */
2402	if (adapter->linkvec) /* we are doing MSIX */
2403		rid = adapter->linkvec + 1;
2404	else
2405		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2406
2407	if (adapter->tag != NULL) {
2408		bus_teardown_intr(dev, adapter->res, adapter->tag);
2409		adapter->tag = NULL;
2410	}
2411	if (adapter->res != NULL)
2412		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2413
2414mem:
2415	if (adapter->msix)
2416		pci_release_msi(dev);
2417
2418	if (adapter->msix_mem != NULL)
2419		bus_release_resource(dev, SYS_RES_MEMORY,
2420		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2421
2422	if (adapter->pci_mem != NULL)
2423		bus_release_resource(dev, SYS_RES_MEMORY,
2424		    PCIR_BAR(0), adapter->pci_mem);
2425
2426}
2427
2428/*
2429 * Setup Either MSI/X or MSI
2430 */
2431static int
2432igb_setup_msix(struct adapter *adapter)
2433{
2434	device_t dev = adapter->dev;
2435	int rid, want, queues, msgs;
2436
2437	/* First try MSI/X */
2438	rid = PCIR_BAR(IGB_MSIX_BAR);
2439	adapter->msix_mem = bus_alloc_resource_any(dev,
2440	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2441       	if (!adapter->msix_mem) {
2442		/* May not be enabled */
2443		device_printf(adapter->dev,
2444		    "Unable to map MSIX table \n");
2445		goto msi;
2446	}
2447
2448	msgs = pci_msix_count(dev);
2449	if (msgs == 0) { /* system has msix disabled */
2450		bus_release_resource(dev, SYS_RES_MEMORY,
2451		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2452		adapter->msix_mem = NULL;
2453		goto msi;
2454	}
2455
2456	/* Figure out a reasonable auto config value */
2457	queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2458
2459	if (igb_num_queues == 0)
2460		igb_num_queues = queues;
2461	/*
2462	** Two vectors (RX/TX pair) per queue
2463	** plus an additional for Link interrupt
2464	*/
2465	want = (igb_num_queues * 2) + 1;
2466	if (msgs >= want)
2467		msgs = want;
2468	else {
2469               	device_printf(adapter->dev,
2470		    "MSIX Configuration Problem, "
2471		    "%d vectors configured, but %d queues wanted!\n",
2472		    msgs, want);
2473		return (ENXIO);
2474	}
2475	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2476               	device_printf(adapter->dev,
2477		    "Using MSIX interrupts with %d vectors\n", msgs);
2478		adapter->num_queues = igb_num_queues;
2479		return (msgs);
2480	}
2481msi:
2482       	msgs = pci_msi_count(dev);
2483       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2484               	device_printf(adapter->dev,"Using MSI interrupt\n");
2485	return (msgs);
2486}
2487
2488/*********************************************************************
2489 *
2490 *  Initialize the hardware to a configuration
2491 *  as specified by the adapter structure.
2492 *
2493 **********************************************************************/
2494static int
2495igb_hardware_init(struct adapter *adapter)
2496{
2497	device_t	dev = adapter->dev;
2498	u32		rx_buffer_size;
2499
2500	INIT_DEBUGOUT("igb_hardware_init: begin");
2501
2502	/* Issue a global reset */
2503	e1000_reset_hw(&adapter->hw);
2504
2505	/* Let the firmware know the OS is in control */
2506	igb_get_hw_control(adapter);
2507
2508	/*
2509	 * These parameters control the automatic generation (Tx) and
2510	 * response (Rx) to Ethernet PAUSE frames.
2511	 * - High water mark should allow for at least two frames to be
2512	 *   received after sending an XOFF.
2513	 * - Low water mark works best when it is very near the high water mark.
2514	 *   This allows the receiver to restart by sending XON when it has
2515	 *   drained a bit. Here we use an arbitary value of 1500 which will
2516	 *   restart after one full frame is pulled from the buffer. There
2517	 *   could be several smaller frames in the buffer and if so they will
2518	 *   not trigger the XON until their total number reduces the buffer
2519	 *   by 1500.
2520	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2521	 */
2522	if (adapter->hw.mac.type == e1000_82576)
2523		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2524		    E1000_RXPBS) & 0xffff) << 10 );
2525	else
2526		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2527		    E1000_PBA) & 0xffff) << 10 );
2528
2529	adapter->hw.fc.high_water = rx_buffer_size -
2530	    roundup2(adapter->max_frame_size, 1024);
2531	adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2532
2533	adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2534	adapter->hw.fc.send_xon = TRUE;
2535
2536	/* Set Flow control, use the tunable location if sane */
2537	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2538		adapter->hw.fc.requested_mode = igb_fc_setting;
2539	else
2540		adapter->hw.fc.requested_mode = e1000_fc_none;
2541
2542	if (e1000_init_hw(&adapter->hw) < 0) {
2543		device_printf(dev, "Hardware Initialization Failed\n");
2544		return (EIO);
2545	}
2546
2547	e1000_check_for_link(&adapter->hw);
2548
2549	return (0);
2550}
2551
2552/*********************************************************************
2553 *
2554 *  Setup networking device structure and register an interface.
2555 *
2556 **********************************************************************/
2557static void
2558igb_setup_interface(device_t dev, struct adapter *adapter)
2559{
2560	struct ifnet   *ifp;
2561
2562	INIT_DEBUGOUT("igb_setup_interface: begin");
2563
2564	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2565	if (ifp == NULL)
2566		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2567	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2568	ifp->if_mtu = ETHERMTU;
2569	ifp->if_init =  igb_init;
2570	ifp->if_softc = adapter;
2571	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2572	ifp->if_ioctl = igb_ioctl;
2573	ifp->if_start = igb_start;
2574#if __FreeBSD_version >= 800000
2575	ifp->if_transmit = igb_mq_start;
2576	ifp->if_qflush = igb_qflush;
2577#endif
2578	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2579	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2580	IFQ_SET_READY(&ifp->if_snd);
2581
2582	ether_ifattach(ifp, adapter->hw.mac.addr);
2583
2584	ifp->if_capabilities = ifp->if_capenable = 0;
2585
2586	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2587	ifp->if_capabilities |= IFCAP_TSO4;
2588	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2589	ifp->if_capenable = ifp->if_capabilities;
2590
2591	/*
2592	 * Tell the upper layer(s) we support long frames.
2593	 */
2594	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2595	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2596	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2597
2598	/*
2599	 * Specify the media types supported by this adapter and register
2600	 * callbacks to update media and link information
2601	 */
2602	ifmedia_init(&adapter->media, IFM_IMASK,
2603	    igb_media_change, igb_media_status);
2604	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2605	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2606		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2607			    0, NULL);
2608		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2609	} else {
2610		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2611		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2612			    0, NULL);
2613		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2614			    0, NULL);
2615		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2616			    0, NULL);
2617		if (adapter->hw.phy.type != e1000_phy_ife) {
2618			ifmedia_add(&adapter->media,
2619				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2620			ifmedia_add(&adapter->media,
2621				IFM_ETHER | IFM_1000_T, 0, NULL);
2622		}
2623	}
2624	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2625	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2626}
2627
2628
2629/*
2630 * Manage DMA'able memory.
2631 */
2632static void
2633igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2634{
2635	if (error)
2636		return;
2637	*(bus_addr_t *) arg = segs[0].ds_addr;
2638}
2639
2640static int
2641igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2642        struct igb_dma_alloc *dma, int mapflags)
2643{
2644	int error;
2645
2646	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2647				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2648				BUS_SPACE_MAXADDR,	/* lowaddr */
2649				BUS_SPACE_MAXADDR,	/* highaddr */
2650				NULL, NULL,		/* filter, filterarg */
2651				size,			/* maxsize */
2652				1,			/* nsegments */
2653				size,			/* maxsegsize */
2654				0,			/* flags */
2655				NULL,			/* lockfunc */
2656				NULL,			/* lockarg */
2657				&dma->dma_tag);
2658	if (error) {
2659		device_printf(adapter->dev,
2660		    "%s: bus_dma_tag_create failed: %d\n",
2661		    __func__, error);
2662		goto fail_0;
2663	}
2664
2665	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2666	    BUS_DMA_NOWAIT, &dma->dma_map);
2667	if (error) {
2668		device_printf(adapter->dev,
2669		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2670		    __func__, (uintmax_t)size, error);
2671		goto fail_2;
2672	}
2673
2674	dma->dma_paddr = 0;
2675	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2676	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2677	if (error || dma->dma_paddr == 0) {
2678		device_printf(adapter->dev,
2679		    "%s: bus_dmamap_load failed: %d\n",
2680		    __func__, error);
2681		goto fail_3;
2682	}
2683
2684	return (0);
2685
2686fail_3:
2687	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2688fail_2:
2689	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2690	bus_dma_tag_destroy(dma->dma_tag);
2691fail_0:
2692	dma->dma_map = NULL;
2693	dma->dma_tag = NULL;
2694
2695	return (error);
2696}
2697
2698static void
2699igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2700{
2701	if (dma->dma_tag == NULL)
2702		return;
2703	if (dma->dma_map != NULL) {
2704		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2705		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2706		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2707		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2708		dma->dma_map = NULL;
2709	}
2710	bus_dma_tag_destroy(dma->dma_tag);
2711	dma->dma_tag = NULL;
2712}
2713
2714
2715/*********************************************************************
2716 *
2717 *  Allocate memory for the transmit and receive rings, and then
2718 *  the descriptors associated with each, called only once at attach.
2719 *
2720 **********************************************************************/
2721static int
2722igb_allocate_queues(struct adapter *adapter)
2723{
2724	device_t dev = adapter->dev;
2725	struct tx_ring *txr;
2726	struct rx_ring *rxr;
2727	int rsize, tsize, error = E1000_SUCCESS;
2728	int txconf = 0, rxconf = 0;
2729
2730	/* First allocate the TX ring struct memory */
2731	if (!(adapter->tx_rings =
2732	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2733	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2734		device_printf(dev, "Unable to allocate TX ring memory\n");
2735		error = ENOMEM;
2736		goto fail;
2737	}
2738	txr = adapter->tx_rings;
2739
2740	/* Next allocate the RX */
2741	if (!(adapter->rx_rings =
2742	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2743	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2744		device_printf(dev, "Unable to allocate RX ring memory\n");
2745		error = ENOMEM;
2746		goto rx_fail;
2747	}
2748	rxr = adapter->rx_rings;
2749
2750	tsize = roundup2(adapter->num_tx_desc *
2751	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2752	/*
2753	 * Now set up the TX queues, txconf is needed to handle the
2754	 * possibility that things fail midcourse and we need to
2755	 * undo memory gracefully
2756	 */
2757	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2758		/* Set up some basics */
2759		txr = &adapter->tx_rings[i];
2760		txr->adapter = adapter;
2761		txr->me = i;
2762
2763		/* Initialize the TX lock */
2764		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2765		    device_get_nameunit(dev), txr->me);
2766		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2767
2768		if (igb_dma_malloc(adapter, tsize,
2769			&txr->txdma, BUS_DMA_NOWAIT)) {
2770			device_printf(dev,
2771			    "Unable to allocate TX Descriptor memory\n");
2772			error = ENOMEM;
2773			goto err_tx_desc;
2774		}
2775		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2776		bzero((void *)txr->tx_base, tsize);
2777
2778        	/* Now allocate transmit buffers for the ring */
2779        	if (igb_allocate_transmit_buffers(txr)) {
2780			device_printf(dev,
2781			    "Critical Failure setting up transmit buffers\n");
2782			error = ENOMEM;
2783			goto err_tx_desc;
2784        	}
2785#if __FreeBSD_version >= 800000
2786		/* Allocate a buf ring */
2787		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2788		    M_WAITOK, &txr->tx_mtx);
2789#endif
2790	}
2791
2792	/*
2793	 * Next the RX queues...
2794	 */
2795	rsize = roundup2(adapter->num_rx_desc *
2796	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2797	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2798		rxr = &adapter->rx_rings[i];
2799		rxr->adapter = adapter;
2800		rxr->me = i;
2801
2802		/* Initialize the RX lock */
2803		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2804		    device_get_nameunit(dev), txr->me);
2805		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2806
2807		if (igb_dma_malloc(adapter, rsize,
2808			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2809			device_printf(dev,
2810			    "Unable to allocate RxDescriptor memory\n");
2811			error = ENOMEM;
2812			goto err_rx_desc;
2813		}
2814		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2815		bzero((void *)rxr->rx_base, rsize);
2816
2817        	/* Allocate receive buffers for the ring*/
2818		if (igb_allocate_receive_buffers(rxr)) {
2819			device_printf(dev,
2820			    "Critical Failure setting up receive buffers\n");
2821			error = ENOMEM;
2822			goto err_rx_desc;
2823		}
2824	}
2825
2826	return (0);
2827
2828err_rx_desc:
2829	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2830		igb_dma_free(adapter, &rxr->rxdma);
2831err_tx_desc:
2832	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2833		igb_dma_free(adapter, &txr->txdma);
2834	free(adapter->rx_rings, M_DEVBUF);
2835rx_fail:
2836	free(adapter->tx_rings, M_DEVBUF);
2837fail:
2838	return (error);
2839}
2840
2841/*********************************************************************
2842 *
2843 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2844 *  the information needed to transmit a packet on the wire. This is
2845 *  called only once at attach, setup is done every reset.
2846 *
2847 **********************************************************************/
2848static int
2849igb_allocate_transmit_buffers(struct tx_ring *txr)
2850{
2851	struct adapter *adapter = txr->adapter;
2852	device_t dev = adapter->dev;
2853	struct igb_tx_buffer *txbuf;
2854	int error, i;
2855
2856	/*
2857	 * Setup DMA descriptor areas.
2858	 */
2859	if ((error = bus_dma_tag_create(NULL,		/* parent */
2860			       PAGE_SIZE, 0,		/* alignment, bounds */
2861			       BUS_SPACE_MAXADDR,	/* lowaddr */
2862			       BUS_SPACE_MAXADDR,	/* highaddr */
2863			       NULL, NULL,		/* filter, filterarg */
2864			       IGB_TSO_SIZE,		/* maxsize */
2865			       IGB_MAX_SCATTER,		/* nsegments */
2866			       PAGE_SIZE,		/* maxsegsize */
2867			       0,			/* flags */
2868			       NULL,			/* lockfunc */
2869			       NULL,			/* lockfuncarg */
2870			       &txr->txtag))) {
2871		device_printf(dev,"Unable to allocate TX DMA tag\n");
2872		goto fail;
2873	}
2874
2875	if (!(txr->tx_buffers =
2876	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
2877	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2878		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2879		error = ENOMEM;
2880		goto fail;
2881	}
2882
2883        /* Create the descriptor buffer dma maps */
2884	txbuf = txr->tx_buffers;
2885	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2886		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2887		if (error != 0) {
2888			device_printf(dev, "Unable to create TX DMA map\n");
2889			goto fail;
2890		}
2891	}
2892
2893	return 0;
2894fail:
2895	/* We free all, it handles case where we are in the middle */
2896	igb_free_transmit_structures(adapter);
2897	return (error);
2898}
2899
2900/*********************************************************************
2901 *
2902 *  Initialize a transmit ring.
2903 *
2904 **********************************************************************/
2905static void
2906igb_setup_transmit_ring(struct tx_ring *txr)
2907{
2908	struct adapter *adapter = txr->adapter;
2909	struct igb_tx_buffer *txbuf;
2910	int i;
2911
2912	/* Clear the old descriptor contents */
2913	bzero((void *)txr->tx_base,
2914	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2915	/* Reset indices */
2916	txr->next_avail_desc = 0;
2917	txr->next_to_clean = 0;
2918
2919	/* Free any existing tx buffers. */
2920        txbuf = txr->tx_buffers;
2921	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2922		if (txbuf->m_head != NULL) {
2923			bus_dmamap_sync(txr->txtag, txbuf->map,
2924			    BUS_DMASYNC_POSTWRITE);
2925			bus_dmamap_unload(txr->txtag, txbuf->map);
2926			m_freem(txbuf->m_head);
2927			txbuf->m_head = NULL;
2928		}
2929		/* clear the watch index */
2930		txbuf->next_eop = -1;
2931        }
2932
2933	/* Set number of descriptors available */
2934	txr->tx_avail = adapter->num_tx_desc;
2935
2936	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2937	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2938
2939}
2940
2941/*********************************************************************
2942 *
2943 *  Initialize all transmit rings.
2944 *
2945 **********************************************************************/
2946static void
2947igb_setup_transmit_structures(struct adapter *adapter)
2948{
2949	struct tx_ring *txr = adapter->tx_rings;
2950
2951	for (int i = 0; i < adapter->num_queues; i++, txr++)
2952		igb_setup_transmit_ring(txr);
2953
2954	return;
2955}
2956
2957/*********************************************************************
2958 *
2959 *  Enable transmit unit.
2960 *
2961 **********************************************************************/
2962static void
2963igb_initialize_transmit_units(struct adapter *adapter)
2964{
2965	struct tx_ring	*txr = adapter->tx_rings;
2966	u32		tctl, txdctl;
2967
2968	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2969
2970	/* Setup the Base and Length of the Tx Descriptor Rings */
2971	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2972		u64 bus_addr = txr->txdma.dma_paddr;
2973
2974		E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2975		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2976		E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2977		    (uint32_t)(bus_addr >> 32));
2978		E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2979		    (uint32_t)bus_addr);
2980
2981		/* Setup the HW Tx Head and Tail descriptor pointers */
2982		E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2983		E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2984
2985		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2986		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2987		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2988
2989		/* Setup Transmit Descriptor Base Settings */
2990		adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2991
2992		txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
2993		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2994		E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
2995	}
2996
2997	/* Program the Transmit Control Register */
2998	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2999	tctl &= ~E1000_TCTL_CT;
3000	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3001		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3002
3003	e1000_config_collision_dist(&adapter->hw);
3004
3005	/* This write will effectively turn on the transmit unit. */
3006	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3007
3008}
3009
3010/*********************************************************************
3011 *
3012 *  Free all transmit rings.
3013 *
3014 **********************************************************************/
3015static void
3016igb_free_transmit_structures(struct adapter *adapter)
3017{
3018	struct tx_ring *txr = adapter->tx_rings;
3019
3020	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3021		IGB_TX_LOCK(txr);
3022		igb_free_transmit_buffers(txr);
3023		igb_dma_free(adapter, &txr->txdma);
3024		IGB_TX_UNLOCK(txr);
3025		IGB_TX_LOCK_DESTROY(txr);
3026	}
3027	free(adapter->tx_rings, M_DEVBUF);
3028}
3029
3030/*********************************************************************
3031 *
3032 *  Free transmit ring related data structures.
3033 *
3034 **********************************************************************/
3035static void
3036igb_free_transmit_buffers(struct tx_ring *txr)
3037{
3038	struct adapter *adapter = txr->adapter;
3039	struct igb_tx_buffer *tx_buffer;
3040	int             i;
3041
3042	INIT_DEBUGOUT("free_transmit_ring: begin");
3043
3044	if (txr->tx_buffers == NULL)
3045		return;
3046
3047	tx_buffer = txr->tx_buffers;
3048	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3049		if (tx_buffer->m_head != NULL) {
3050			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3051			    BUS_DMASYNC_POSTWRITE);
3052			bus_dmamap_unload(txr->txtag,
3053			    tx_buffer->map);
3054			m_freem(tx_buffer->m_head);
3055			tx_buffer->m_head = NULL;
3056			if (tx_buffer->map != NULL) {
3057				bus_dmamap_destroy(txr->txtag,
3058				    tx_buffer->map);
3059				tx_buffer->map = NULL;
3060			}
3061		} else if (tx_buffer->map != NULL) {
3062			bus_dmamap_unload(txr->txtag,
3063			    tx_buffer->map);
3064			bus_dmamap_destroy(txr->txtag,
3065			    tx_buffer->map);
3066			tx_buffer->map = NULL;
3067		}
3068	}
3069#if __FreeBSD_version >= 800000
3070	buf_ring_free(txr->br, M_DEVBUF);
3071#endif
3072	if (txr->tx_buffers != NULL) {
3073		free(txr->tx_buffers, M_DEVBUF);
3074		txr->tx_buffers = NULL;
3075	}
3076	if (txr->txtag != NULL) {
3077		bus_dma_tag_destroy(txr->txtag);
3078		txr->txtag = NULL;
3079	}
3080	return;
3081}
3082
3083/**********************************************************************
3084 *
3085 *  Setup work for hardware segmentation offload (TSO) on
3086 *  adapters using advanced tx descriptors (82575)
3087 *
3088 **********************************************************************/
3089static boolean_t
3090igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3091{
3092	struct adapter *adapter = txr->adapter;
3093	struct e1000_adv_tx_context_desc *TXD;
3094	struct igb_tx_buffer        *tx_buffer;
3095	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3096	u32 mss_l4len_idx = 0;
3097	u16 vtag = 0;
3098	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3099	struct ether_vlan_header *eh;
3100	struct ip *ip;
3101	struct tcphdr *th;
3102
3103
3104	/*
3105	 * Determine where frame payload starts.
3106	 * Jump over vlan headers if already present
3107	 */
3108	eh = mtod(mp, struct ether_vlan_header *);
3109	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3110		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3111	else
3112		ehdrlen = ETHER_HDR_LEN;
3113
3114	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3115	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3116		return FALSE;
3117
3118	/* Only supports IPV4 for now */
3119	ctxd = txr->next_avail_desc;
3120	tx_buffer = &txr->tx_buffers[ctxd];
3121	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3122
3123	ip = (struct ip *)(mp->m_data + ehdrlen);
3124	if (ip->ip_p != IPPROTO_TCP)
3125                return FALSE;   /* 0 */
3126	ip->ip_sum = 0;
3127	ip_hlen = ip->ip_hl << 2;
3128	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3129	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3130	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3131	tcp_hlen = th->th_off << 2;
3132	/*
3133	 * Calculate header length, this is used
3134	 * in the transmit desc in igb_xmit
3135	 */
3136	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3137
3138	/* VLAN MACLEN IPLEN */
3139	if (mp->m_flags & M_VLANTAG) {
3140		vtag = htole16(mp->m_pkthdr.ether_vtag);
3141		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3142	}
3143
3144	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3145	vlan_macip_lens |= ip_hlen;
3146	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3147
3148	/* ADV DTYPE TUCMD */
3149	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3150	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3151	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3152	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3153
3154	/* MSS L4LEN IDX */
3155	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3156	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3157	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3158
3159	TXD->seqnum_seed = htole32(0);
3160	tx_buffer->m_head = NULL;
3161	tx_buffer->next_eop = -1;
3162
3163	if (++ctxd == adapter->num_tx_desc)
3164		ctxd = 0;
3165
3166	txr->tx_avail--;
3167	txr->next_avail_desc = ctxd;
3168	return TRUE;
3169}
3170
3171
3172/*********************************************************************
3173 *
3174 *  Context Descriptor setup for VLAN or CSUM
3175 *
3176 **********************************************************************/
3177
3178static bool
3179igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3180{
3181	struct adapter *adapter = txr->adapter;
3182	struct e1000_adv_tx_context_desc *TXD;
3183	struct igb_tx_buffer        *tx_buffer;
3184	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3185	struct ether_vlan_header *eh;
3186	struct ip *ip = NULL;
3187	struct ip6_hdr *ip6;
3188	int  ehdrlen, ctxd, ip_hlen = 0;
3189	u16	etype, vtag = 0;
3190	u8	ipproto = 0;
3191	bool	offload = TRUE;
3192
3193	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3194		offload = FALSE;
3195
3196	ctxd = txr->next_avail_desc;
3197	tx_buffer = &txr->tx_buffers[ctxd];
3198	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3199
3200	/*
3201	** In advanced descriptors the vlan tag must
3202	** be placed into the context descriptor, thus
3203	** we need to be here just for that setup.
3204	*/
3205	if (mp->m_flags & M_VLANTAG) {
3206		vtag = htole16(mp->m_pkthdr.ether_vtag);
3207		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3208	} else if (offload == FALSE)
3209		return FALSE;
3210
3211	/*
3212	 * Determine where frame payload starts.
3213	 * Jump over vlan headers if already present,
3214	 * helpful for QinQ too.
3215	 */
3216	eh = mtod(mp, struct ether_vlan_header *);
3217	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3218		etype = ntohs(eh->evl_proto);
3219		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3220	} else {
3221		etype = ntohs(eh->evl_encap_proto);
3222		ehdrlen = ETHER_HDR_LEN;
3223	}
3224
3225	/* Set the ether header length */
3226	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3227
3228	switch (etype) {
3229		case ETHERTYPE_IP:
3230			ip = (struct ip *)(mp->m_data + ehdrlen);
3231			ip_hlen = ip->ip_hl << 2;
3232			if (mp->m_len < ehdrlen + ip_hlen) {
3233				offload = FALSE;
3234				break;
3235			}
3236			ipproto = ip->ip_p;
3237			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3238			break;
3239		case ETHERTYPE_IPV6:
3240			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3241			ip_hlen = sizeof(struct ip6_hdr);
3242			if (mp->m_len < ehdrlen + ip_hlen)
3243				return (FALSE);
3244			ipproto = ip6->ip6_nxt;
3245			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3246			break;
3247		default:
3248			offload = FALSE;
3249			break;
3250	}
3251
3252	vlan_macip_lens |= ip_hlen;
3253	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3254
3255	switch (ipproto) {
3256		case IPPROTO_TCP:
3257			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3258				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3259			break;
3260		case IPPROTO_UDP:
3261			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3262				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3263			break;
3264#if __FreeBSD_version >= 800000
3265		case IPPROTO_SCTP:
3266			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3267				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3268			break;
3269#endif
3270		default:
3271			offload = FALSE;
3272			break;
3273	}
3274
3275	/* Now copy bits into descriptor */
3276	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3277	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3278	TXD->seqnum_seed = htole32(0);
3279	TXD->mss_l4len_idx = htole32(0);
3280
3281	tx_buffer->m_head = NULL;
3282	tx_buffer->next_eop = -1;
3283
3284	/* We've consumed the first desc, adjust counters */
3285	if (++ctxd == adapter->num_tx_desc)
3286		ctxd = 0;
3287	txr->next_avail_desc = ctxd;
3288	--txr->tx_avail;
3289
3290        return (offload);
3291}
3292
3293
3294/**********************************************************************
3295 *
3296 *  Examine each tx_buffer in the used queue. If the hardware is done
3297 *  processing the packet then free associated resources. The
3298 *  tx_buffer is put back on the free queue.
3299 *
3300 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3301 **********************************************************************/
3302static bool
3303igb_txeof(struct tx_ring *txr)
3304{
3305	struct adapter	*adapter = txr->adapter;
3306        int first, last, done, num_avail;
3307	u32	cleaned = 0;
3308        struct igb_tx_buffer *tx_buffer;
3309        struct e1000_tx_desc   *tx_desc, *eop_desc;
3310	struct ifnet   *ifp = adapter->ifp;
3311
3312	IGB_TX_LOCK_ASSERT(txr);
3313
3314        if (txr->tx_avail == adapter->num_tx_desc)
3315                return FALSE;
3316
3317        num_avail = txr->tx_avail;
3318        first = txr->next_to_clean;
3319        tx_desc = &txr->tx_base[first];
3320        tx_buffer = &txr->tx_buffers[first];
3321	last = tx_buffer->next_eop;
3322        eop_desc = &txr->tx_base[last];
3323
3324	/*
3325	 * What this does is get the index of the
3326	 * first descriptor AFTER the EOP of the
3327	 * first packet, that way we can do the
3328	 * simple comparison on the inner while loop.
3329	 */
3330	if (++last == adapter->num_tx_desc)
3331 		last = 0;
3332	done = last;
3333
3334        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3335            BUS_DMASYNC_POSTREAD);
3336
3337        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3338		/* We clean the range of the packet */
3339		while (first != done) {
3340                	tx_desc->upper.data = 0;
3341                	tx_desc->lower.data = 0;
3342                	tx_desc->buffer_addr = 0;
3343                	++num_avail; ++cleaned;
3344
3345			if (tx_buffer->m_head) {
3346				ifp->if_opackets++;
3347				bus_dmamap_sync(txr->txtag,
3348				    tx_buffer->map,
3349				    BUS_DMASYNC_POSTWRITE);
3350				bus_dmamap_unload(txr->txtag,
3351				    tx_buffer->map);
3352
3353                        	m_freem(tx_buffer->m_head);
3354                        	tx_buffer->m_head = NULL;
3355                	}
3356			tx_buffer->next_eop = -1;
3357
3358	                if (++first == adapter->num_tx_desc)
3359				first = 0;
3360
3361	                tx_buffer = &txr->tx_buffers[first];
3362			tx_desc = &txr->tx_base[first];
3363		}
3364		/* See if we can continue to the next packet */
3365		last = tx_buffer->next_eop;
3366		if (last != -1) {
3367        		eop_desc = &txr->tx_base[last];
3368			/* Get new done point */
3369			if (++last == adapter->num_tx_desc) last = 0;
3370			done = last;
3371		} else
3372			break;
3373        }
3374        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3375            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3376
3377        txr->next_to_clean = first;
3378
3379        /*
3380         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3381         * that it is OK to send packets.
3382         * If there are no pending descriptors, clear the timeout. Otherwise,
3383         * if some descriptors have been freed, restart the timeout.
3384         */
3385        if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3386                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3387		/* All clean, turn off the timer */
3388                if (num_avail == adapter->num_tx_desc) {
3389			txr->watchdog_timer = 0;
3390        		txr->tx_avail = num_avail;
3391			return FALSE;
3392		}
3393        }
3394
3395	/* Some cleaned, reset the timer */
3396	if (cleaned)
3397		txr->watchdog_timer = IGB_TX_TIMEOUT;
3398        txr->tx_avail = num_avail;
3399        return TRUE;
3400}
3401
3402
3403/*********************************************************************
3404 *
3405 *  Setup descriptor buffer(s) from system mbuf buffer pools.
3406 *  		i - designates the ring index
3407 *		clean - tells the function whether to update
3408 *		        the header, the packet buffer, or both.
3409 *
3410 **********************************************************************/
3411static int
3412igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3413{
3414	struct adapter		*adapter = rxr->adapter;
3415	struct mbuf		*mh, *mp;
3416	bus_dma_segment_t	seg[2];
3417	bus_dmamap_t		map;
3418	struct igb_rx_buffer	*rx_buffer;
3419	int			error, nsegs;
3420	int			merr = 0;
3421
3422
3423	rx_buffer = &rxr->rx_buffers[i];
3424
3425	/* First get our header and payload mbuf */
3426	if (clean & IGB_CLEAN_HEADER) {
3427		mh = m_gethdr(M_DONTWAIT, MT_DATA);
3428		if (mh == NULL)
3429			goto remap;
3430	} else  /* reuse */
3431		mh = rxr->rx_buffers[i].m_head;
3432
3433	mh->m_len = MHLEN;
3434	mh->m_flags |= M_PKTHDR;
3435
3436	if (clean & IGB_CLEAN_PAYLOAD) {
3437		mp = m_getjcl(M_DONTWAIT, MT_DATA,
3438		    M_PKTHDR, adapter->rx_mbuf_sz);
3439		if (mp == NULL)
3440			goto remap;
3441		mp->m_len = adapter->rx_mbuf_sz;
3442		mp->m_flags &= ~M_PKTHDR;
3443	} else {	/* reusing */
3444		mp = rxr->rx_buffers[i].m_pack;
3445		mp->m_len = adapter->rx_mbuf_sz;
3446		mp->m_flags &= ~M_PKTHDR;
3447	}
3448	/*
3449	** Need to create a chain for the following
3450	** dmamap call at this point.
3451	*/
3452	mh->m_next = mp;
3453	mh->m_pkthdr.len = mh->m_len + mp->m_len;
3454
3455	/* Get the memory mapping */
3456	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3457	    rxr->rx_spare_map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3458	if (error != 0) {
3459		printf("GET BUF: dmamap load failure - %d\n", error);
3460		m_free(mh);
3461		return (error);
3462	}
3463
3464	/* Unload old mapping and update buffer struct */
3465	if (rx_buffer->m_head != NULL)
3466			bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3467	map = rx_buffer->map;
3468	rx_buffer->map = rxr->rx_spare_map;
3469	rxr->rx_spare_map = map;
3470	rx_buffer->m_head = mh;
3471	rx_buffer->m_pack = mp;
3472	bus_dmamap_sync(rxr->rxtag,
3473	    rx_buffer->map, BUS_DMASYNC_PREREAD);
3474
3475	/* Update descriptor */
3476	rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3477	rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3478
3479	return (0);
3480
3481	/*
3482	** If we get here, we have an mbuf resource
3483	** issue, so we discard the incoming packet
3484	** and attempt to reuse existing mbufs next
3485	** pass thru the ring, but to do so we must
3486	** fix up the descriptor which had the address
3487	** clobbered with writeback info.
3488	*/
3489remap:
3490	adapter->mbuf_header_failed++;
3491	merr = ENOBUFS;
3492	/* Is there a reusable buffer? */
3493	mh = rxr->rx_buffers[i].m_head;
3494	if (mh == NULL) /* Nope, init error */
3495		return (merr);
3496	mp = rxr->rx_buffers[i].m_pack;
3497	if (mp == NULL) /* Nope, init error */
3498		return (merr);
3499	/* Get our old mapping */
3500	rx_buffer = &rxr->rx_buffers[i];
3501	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3502	    rx_buffer->map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3503	if (error != 0) {
3504		/* We really have a problem */
3505		m_free(mh);
3506		return (error);
3507	}
3508	/* Now fix the descriptor as needed */
3509	rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3510	rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3511	return (merr);
3512}
3513
3514
3515/*********************************************************************
3516 *
3517 *  Allocate memory for rx_buffer structures. Since we use one
3518 *  rx_buffer per received packet, the maximum number of rx_buffer's
3519 *  that we'll need is equal to the number of receive descriptors
3520 *  that we've allocated.
3521 *
3522 **********************************************************************/
3523static int
3524igb_allocate_receive_buffers(struct rx_ring *rxr)
3525{
3526	struct	adapter 	*adapter = rxr->adapter;
3527	device_t 		dev = adapter->dev;
3528	struct igb_rx_buffer 	*rxbuf;
3529	int             	i, bsize, error;
3530
3531	bsize = sizeof(struct igb_rx_buffer) * adapter->num_rx_desc;
3532	if (!(rxr->rx_buffers =
3533	    (struct igb_rx_buffer *) malloc(bsize,
3534	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3535		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3536		error = ENOMEM;
3537		goto fail;
3538	}
3539
3540	/*
3541	** The tag is made to accomodate the largest buffer size
3542	** with packet split (hence the two segments, even though
3543	** it may not always use this.
3544	*/
3545	if ((error = bus_dma_tag_create(NULL,		/* parent */
3546				   PAGE_SIZE, 0,	/* alignment, bounds */
3547				   BUS_SPACE_MAXADDR,	/* lowaddr */
3548				   BUS_SPACE_MAXADDR,	/* highaddr */
3549				   NULL, NULL,		/* filter, filterarg */
3550				   MJUM16BYTES,		/* maxsize */
3551				   2,			/* nsegments */
3552				   MJUMPAGESIZE,	/* maxsegsize */
3553				   0,			/* flags */
3554				   NULL,		/* lockfunc */
3555				   NULL,		/* lockfuncarg */
3556				   &rxr->rxtag))) {
3557		device_printf(dev, "Unable to create RX DMA tag\n");
3558		goto fail;
3559	}
3560
3561	/* Create the spare map (used by getbuf) */
3562        error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3563	     &rxr->rx_spare_map);
3564	if (error) {
3565		device_printf(dev,
3566		    "%s: bus_dmamap_create header spare failed: %d\n",
3567		    __func__, error);
3568		goto fail;
3569	}
3570
3571	for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3572		rxbuf = &rxr->rx_buffers[i];
3573		error = bus_dmamap_create(rxr->rxtag,
3574		    BUS_DMA_NOWAIT, &rxbuf->map);
3575		if (error) {
3576			device_printf(dev, "Unable to create RX DMA maps\n");
3577			goto fail;
3578		}
3579	}
3580
3581	return (0);
3582
3583fail:
3584	/* Frees all, but can handle partial completion */
3585	igb_free_receive_structures(adapter);
3586	return (error);
3587}
3588
3589/*********************************************************************
3590 *
3591 *  Initialize a receive ring and its buffers.
3592 *
3593 **********************************************************************/
3594static int
3595igb_setup_receive_ring(struct rx_ring *rxr)
3596{
3597	struct	adapter		*adapter;
3598	struct  ifnet		*ifp;
3599	device_t		dev;
3600	struct igb_rx_buffer	*rxbuf;
3601	struct lro_ctrl		*lro = &rxr->lro;
3602	int			j, rsize;
3603
3604	adapter = rxr->adapter;
3605	dev = adapter->dev;
3606	ifp = adapter->ifp;
3607	rxr->lro_enabled = FALSE;
3608	rxr->hdr_split = FALSE;
3609
3610	/* Clear the ring contents */
3611	rsize = roundup2(adapter->num_rx_desc *
3612	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3613	bzero((void *)rxr->rx_base, rsize);
3614
3615	/*
3616	** Free current RX buffer structures and their mbufs
3617	*/
3618	for (int i = 0; i < adapter->num_rx_desc; i++) {
3619		rxbuf = &rxr->rx_buffers[i];
3620		bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3621		    BUS_DMASYNC_POSTREAD);
3622		bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3623		if (rxbuf->m_head) {
3624			rxbuf->m_head->m_next = rxbuf->m_pack;
3625			m_freem(rxbuf->m_head);
3626		}
3627		rxbuf->m_head = NULL;
3628		rxbuf->m_pack = NULL;
3629	}
3630
3631	/* Next replenish the ring */
3632	for (j = 0; j < adapter->num_rx_desc; j++) {
3633		if (igb_get_buf(rxr, j, IGB_CLEAN_BOTH) == ENOBUFS) {
3634			rxr->rx_buffers[j].m_head = NULL;
3635			rxr->rx_buffers[j].m_pack = NULL;
3636			rxr->rx_base[j].read.hdr_addr = 0;
3637			rxr->rx_base[j].read.pkt_addr = 0;
3638			goto fail;
3639		}
3640	}
3641
3642	/* Setup our descriptor indices */
3643	rxr->next_to_check = 0;
3644	rxr->last_cleaned = 0;
3645
3646	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3647	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3648
3649	/*
3650	** Now set up the LRO interface, we
3651	** also only do head split when LRO
3652	** is enabled, since so often they
3653	** are undesireable in similar setups.
3654	*/
3655	if (ifp->if_capenable & IFCAP_LRO) {
3656		int err = tcp_lro_init(lro);
3657		if (err) {
3658			device_printf(dev,"LRO Initialization failed!\n");
3659			goto fail;
3660		}
3661		INIT_DEBUGOUT("RX LRO Initialized\n");
3662		rxr->lro_enabled = TRUE;
3663		rxr->hdr_split = TRUE;
3664		lro->ifp = adapter->ifp;
3665	}
3666
3667	return (0);
3668fail:
3669	/*
3670	 * We need to clean up any buffers allocated
3671	 * so far, 'j' is the failing index.
3672	 */
3673	for (int i = 0; i < j; i++) {
3674		rxbuf = &rxr->rx_buffers[i];
3675		if (rxbuf->m_head != NULL) {
3676			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3677			    BUS_DMASYNC_POSTREAD);
3678			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3679			m_freem(rxbuf->m_head);
3680			rxbuf->m_head = NULL;
3681		}
3682	}
3683	return (ENOBUFS);
3684}
3685
3686/*********************************************************************
3687 *
3688 *  Initialize all receive rings.
3689 *
3690 **********************************************************************/
3691static int
3692igb_setup_receive_structures(struct adapter *adapter)
3693{
3694	struct rx_ring *rxr = adapter->rx_rings;
3695	int i, j;
3696
3697	for (i = 0; i < adapter->num_queues; i++, rxr++)
3698		if (igb_setup_receive_ring(rxr))
3699			goto fail;
3700
3701	return (0);
3702fail:
3703	/*
3704	 * Free RX buffers allocated so far, we will only handle
3705	 * the rings that completed, the failing case will have
3706	 * cleaned up for itself. The value of 'i' will be the
3707	 * failed ring so we must pre-decrement it.
3708	 */
3709	rxr = adapter->rx_rings;
3710	for (--i; i > 0; i--, rxr++) {
3711		for (j = 0; j < adapter->num_rx_desc; j++) {
3712			struct igb_rx_buffer *rxbuf;
3713			rxbuf = &rxr->rx_buffers[j];
3714			if (rxbuf->m_head != NULL) {
3715				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3716			  	  BUS_DMASYNC_POSTREAD);
3717				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3718				m_freem(rxbuf->m_head);
3719				rxbuf->m_head = NULL;
3720			}
3721		}
3722	}
3723
3724	return (ENOBUFS);
3725}
3726
3727/*********************************************************************
3728 *
3729 *  Enable receive unit.
3730 *
3731 **********************************************************************/
3732static void
3733igb_initialize_receive_units(struct adapter *adapter)
3734{
3735	struct rx_ring	*rxr = adapter->rx_rings;
3736	struct ifnet	*ifp = adapter->ifp;
3737	u32		rctl, rxcsum, psize, srrctl = 0;
3738
3739	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3740
3741	/*
3742	 * Make sure receives are disabled while setting
3743	 * up the descriptor ring
3744	 */
3745	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3746	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3747
3748	/*
3749	** Set up for header split
3750	*/
3751	if (rxr->hdr_split) {
3752		/* Use a standard mbuf for the header */
3753		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3754		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3755	} else
3756		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3757
3758	/*
3759	** Set up for jumbo frames
3760	*/
3761	if (ifp->if_mtu > ETHERMTU) {
3762		rctl |= E1000_RCTL_LPE;
3763		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3764		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3765
3766		/* Set maximum packet len */
3767		psize = adapter->max_frame_size;
3768		/* are we on a vlan? */
3769		if (adapter->ifp->if_vlantrunk != NULL)
3770			psize += VLAN_TAG_SIZE;
3771		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3772	} else {
3773		rctl &= ~E1000_RCTL_LPE;
3774		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3775		rctl |= E1000_RCTL_SZ_2048;
3776	}
3777
3778	/* Setup the Base and Length of the Rx Descriptor Rings */
3779	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3780		u64 bus_addr = rxr->rxdma.dma_paddr;
3781		u32 rxdctl;
3782
3783		E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3784		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3785		E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3786		    (uint32_t)(bus_addr >> 32));
3787		E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3788		    (uint32_t)bus_addr);
3789		E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3790		/* Enable this Queue */
3791		rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3792		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3793		rxdctl &= 0xFFF00000;
3794		rxdctl |= IGB_RX_PTHRESH;
3795		rxdctl |= IGB_RX_HTHRESH << 8;
3796		rxdctl |= IGB_RX_WTHRESH << 16;
3797		E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3798	}
3799
3800	/*
3801	** Setup for RX MultiQueue
3802	*/
3803	rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3804	if (adapter->num_queues >1) {
3805		u32 random[10], mrqc, shift = 0;
3806		union igb_reta {
3807			u32 dword;
3808			u8  bytes[4];
3809		} reta;
3810
3811		arc4rand(&random, sizeof(random), 0);
3812		if (adapter->hw.mac.type == e1000_82575)
3813			shift = 6;
3814		/* Warning FM follows */
3815		for (int i = 0; i < 128; i++) {
3816			reta.bytes[i & 3] =
3817			    (i % adapter->num_queues) << shift;
3818			if ((i & 3) == 3)
3819				E1000_WRITE_REG(&adapter->hw,
3820				    E1000_RETA(i >> 2), reta.dword);
3821		}
3822		/* Now fill in hash table */
3823		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3824		for (int i = 0; i < 10; i++)
3825			E1000_WRITE_REG_ARRAY(&adapter->hw,
3826			    E1000_RSSRK(0), i, random[i]);
3827
3828		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3829		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3830		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3831		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3832		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3833		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3834		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3835		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3836
3837		E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3838
3839		/*
3840		** NOTE: Receive Full-Packet Checksum Offload
3841		** is mutually exclusive with Multiqueue. However
3842		** this is not the same as TCP/IP checksums which
3843		** still work.
3844		*/
3845		rxcsum |= E1000_RXCSUM_PCSD;
3846#if __FreeBSD_version >= 800000
3847		/* For SCTP Offload */
3848		if ((adapter->hw.mac.type == e1000_82576)
3849		    && (ifp->if_capenable & IFCAP_RXCSUM))
3850			rxcsum |= E1000_RXCSUM_CRCOFL;
3851#endif
3852	} else {
3853		/* Non RSS setup */
3854		if (ifp->if_capenable & IFCAP_RXCSUM) {
3855			rxcsum |= E1000_RXCSUM_IPPCSE;
3856#if __FreeBSD_version >= 800000
3857			if (adapter->hw.mac.type == e1000_82576)
3858				rxcsum |= E1000_RXCSUM_CRCOFL;
3859#endif
3860		} else
3861			rxcsum &= ~E1000_RXCSUM_TUOFL;
3862	}
3863	E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3864
3865	/* Setup the Receive Control Register */
3866	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3867	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3868		   E1000_RCTL_RDMTS_HALF |
3869		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3870
3871	/* Make sure VLAN Filters are off */
3872	rctl &= ~E1000_RCTL_VFE;
3873	/* Don't store bad packets */
3874	rctl &= ~E1000_RCTL_SBP;
3875
3876	/* Enable Receives */
3877	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3878
3879	/*
3880	 * Setup the HW Rx Head and Tail Descriptor Pointers
3881	 *   - needs to be after enable
3882	 */
3883	for (int i = 0; i < adapter->num_queues; i++) {
3884		E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3885		E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3886		     adapter->num_rx_desc - 1);
3887	}
3888	return;
3889}
3890
3891/*********************************************************************
3892 *
3893 *  Free receive rings.
3894 *
3895 **********************************************************************/
3896static void
3897igb_free_receive_structures(struct adapter *adapter)
3898{
3899	struct rx_ring *rxr = adapter->rx_rings;
3900
3901	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3902		struct lro_ctrl	*lro = &rxr->lro;
3903		igb_free_receive_buffers(rxr);
3904		tcp_lro_free(lro);
3905		igb_dma_free(adapter, &rxr->rxdma);
3906	}
3907
3908	free(adapter->rx_rings, M_DEVBUF);
3909}
3910
3911/*********************************************************************
3912 *
3913 *  Free receive ring data structures.
3914 *
3915 **********************************************************************/
3916static void
3917igb_free_receive_buffers(struct rx_ring *rxr)
3918{
3919	struct adapter	*adapter = rxr->adapter;
3920	struct igb_rx_buffer *rx_buffer;
3921
3922	INIT_DEBUGOUT("free_receive_structures: begin");
3923
3924	if (rxr->rx_spare_map) {
3925		bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3926		rxr->rx_spare_map = NULL;
3927	}
3928
3929	/* Cleanup any existing buffers */
3930	if (rxr->rx_buffers != NULL) {
3931		rx_buffer = &rxr->rx_buffers[0];
3932		for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3933			if (rx_buffer->m_head != NULL) {
3934				bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3935				    BUS_DMASYNC_POSTREAD);
3936				bus_dmamap_unload(rxr->rxtag,
3937				    rx_buffer->map);
3938				m_freem(rx_buffer->m_head);
3939				rx_buffer->m_head = NULL;
3940			} else if (rx_buffer->map != NULL)
3941				bus_dmamap_unload(rxr->rxtag,
3942				    rx_buffer->map);
3943			if (rx_buffer->map != NULL) {
3944				bus_dmamap_destroy(rxr->rxtag,
3945				    rx_buffer->map);
3946				rx_buffer->map = NULL;
3947			}
3948		}
3949	}
3950
3951	if (rxr->rx_buffers != NULL) {
3952		free(rxr->rx_buffers, M_DEVBUF);
3953		rxr->rx_buffers = NULL;
3954	}
3955
3956	if (rxr->rxtag != NULL) {
3957		bus_dma_tag_destroy(rxr->rxtag);
3958		rxr->rxtag = NULL;
3959	}
3960}
3961/*********************************************************************
3962 *
3963 *  This routine executes in interrupt context. It replenishes
3964 *  the mbufs in the descriptor and sends data which has been
3965 *  dma'ed into host memory to upper layer.
3966 *
3967 *  We loop at most count times if count is > 0, or until done if
3968 *  count < 0.
3969 *
3970 *  Return TRUE if more to clean, FALSE otherwise
3971 *********************************************************************/
3972static bool
3973igb_rxeof(struct rx_ring *rxr, int count)
3974{
3975	struct adapter		*adapter = rxr->adapter;
3976	struct ifnet		*ifp;
3977	struct lro_ctrl		*lro = &rxr->lro;
3978	struct lro_entry	*queued;
3979	int			i;
3980	u32			staterr;
3981	union e1000_adv_rx_desc	*cur;
3982
3983
3984	IGB_RX_LOCK(rxr);
3985	ifp = adapter->ifp;
3986	i = rxr->next_to_check;
3987	cur = &rxr->rx_base[i];
3988	staterr = cur->wb.upper.status_error;
3989
3990	if (!(staterr & E1000_RXD_STAT_DD)) {
3991		IGB_RX_UNLOCK(rxr);
3992		return FALSE;
3993	}
3994
3995	/* Sync the ring */
3996	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3997	    BUS_DMASYNC_POSTREAD);
3998
3999	/* Main clean loop */
4000	while ((staterr & E1000_RXD_STAT_DD) &&
4001	    (count != 0) &&
4002	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4003		struct mbuf *sendmp, *mh, *mp;
4004		u16 hlen, plen, hdr, ptype, len_adj, vtag;
4005		u8 dopayload, accept_frame, eop;
4006
4007		accept_frame = 1;
4008		hlen = plen = len_adj = vtag = 0;
4009		sendmp = mh = mp = NULL;
4010		ptype = (u16)(cur->wb.lower.lo_dword.data >> 4);
4011
4012		/* Sync the buffers */
4013		bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
4014			    BUS_DMASYNC_POSTREAD);
4015
4016		/*
4017		** The way the hardware is configured to
4018		** split, it will ONLY use the header buffer
4019		** when header split is enabled, otherwise we
4020		** get normal behavior, ie, both header and
4021		** payload are DMA'd into the payload buffer.
4022		**
4023		** The fmp test is to catch the case where a
4024		** packet spans multiple descriptors, in that
4025		** case only the first header is valid.
4026		*/
4027		if ((rxr->hdr_split) && (rxr->fmp == NULL)){
4028			hdr = le16toh(cur->
4029			    wb.lower.lo_dword.hs_rss.hdr_info);
4030			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4031			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4032			if (hlen > IGB_HDR_BUF)
4033				hlen = IGB_HDR_BUF;
4034			plen = le16toh(cur->wb.upper.length);
4035			/* Handle the header mbuf */
4036			mh = rxr->rx_buffers[i].m_head;
4037			mh->m_len = hlen;
4038			dopayload = IGB_CLEAN_HEADER;
4039			/*
4040			** Get the payload length, this
4041			** could be zero if its a small
4042			** packet.
4043			*/
4044			if (plen) {
4045				mp = rxr->rx_buffers[i].m_pack;
4046				mp->m_len = plen;
4047				mp->m_next = NULL;
4048				mp->m_flags &= ~M_PKTHDR;
4049				mh->m_next = mp;
4050				mh->m_flags |= M_PKTHDR;
4051				dopayload = IGB_CLEAN_BOTH;
4052				rxr->rx_split_packets++;
4053			} else {  /* small packets */
4054				mh->m_flags &= ~M_PKTHDR;
4055				mh->m_next = NULL;
4056			}
4057		} else {
4058			/*
4059			** Either no header split, or a
4060			** secondary piece of a fragmented
4061			** split packet.
4062			*/
4063			mh = rxr->rx_buffers[i].m_pack;
4064			mh->m_flags |= M_PKTHDR;
4065			mh->m_len = le16toh(cur->wb.upper.length);
4066			dopayload = IGB_CLEAN_PAYLOAD;
4067		}
4068
4069		if (staterr & E1000_RXD_STAT_EOP) {
4070			count--;
4071			eop = 1;
4072			/*
4073			** Strip CRC and account for frag
4074			*/
4075			if (mp) {
4076				if (mp->m_len < ETHER_CRC_LEN) {
4077					/* a frag, how much is left? */
4078					len_adj = ETHER_CRC_LEN - mp->m_len;
4079					mp->m_len = 0;
4080				} else
4081					mp->m_len -= ETHER_CRC_LEN;
4082			} else { /* not split */
4083				if (mh->m_len < ETHER_CRC_LEN) {
4084					len_adj = ETHER_CRC_LEN - mh->m_len;
4085					mh->m_len = 0;
4086				} else
4087					mh->m_len -= ETHER_CRC_LEN;
4088			}
4089		} else
4090			eop = 0;
4091
4092		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)
4093			accept_frame = 0;
4094#ifdef IGB_IEEE1588
4095	This linux code needs to be converted to work here
4096	-----------------------------------------------------
4097               if (unlikely(staterr & E1000_RXD_STAT_TS)) {
4098                       u64 regval;
4099                       u64 ns;
4100// Create an mtag and set it up
4101                       struct skb_shared_hwtstamps *shhwtstamps =
4102                               skb_hwtstamps(skb);
4103
4104                       rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
4105                       "igb: no RX time stamp available for time stamped packet");
4106                       regval = rd32(E1000_RXSTMPL);
4107                       regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4108// Do time conversion from the register
4109                       ns = timecounter_cyc2time(&adapter->clock, regval);
4110                       clocksync_update(&adapter->sync, ns);
4111                       memset(shhwtstamps, 0, sizeof(*shhwtstamps));
4112                       shhwtstamps->hwtstamp = ns_to_ktime(ns);
4113                       shhwtstamps->syststamp =
4114                               clocksync_hw2sys(&adapter->sync, ns);
4115               }
4116#endif
4117		if (accept_frame) {
4118			/*
4119			** get_buf will overwrite the writeback
4120			** descriptor so save the VLAN tag now.
4121			*/
4122			vtag = le16toh(cur->wb.upper.vlan);
4123			if (igb_get_buf(rxr, i, dopayload) != 0) {
4124				ifp->if_iqdrops++;
4125				goto discard;
4126			}
4127			/* Initial frame - setup */
4128			if (rxr->fmp == NULL) {
4129				mh->m_flags |= M_PKTHDR;
4130				mh->m_pkthdr.len = mh->m_len;
4131				rxr->fmp = mh; /* Store the first mbuf */
4132				rxr->lmp = mh;
4133				if (mp) { /* Add payload if split */
4134					mh->m_pkthdr.len += mp->m_len;
4135					rxr->lmp = mh->m_next;
4136				}
4137			} else {
4138				/* Chain mbuf's together */
4139				mh->m_flags &= ~M_PKTHDR;
4140				rxr->lmp->m_next = mh;
4141				rxr->lmp = rxr->lmp->m_next;
4142				rxr->fmp->m_pkthdr.len += mh->m_len;
4143				/* Adjust for CRC frag */
4144				if (len_adj) {
4145					rxr->lmp->m_len -= len_adj;
4146					rxr->fmp->m_pkthdr.len -= len_adj;
4147				}
4148			}
4149
4150			if (eop) {
4151				bool sctp = ((ptype & 0x40) != 0);
4152				rxr->fmp->m_pkthdr.rcvif = ifp;
4153				ifp->if_ipackets++;
4154				rxr->rx_packets++;
4155				/* capture data for AIM */
4156				rxr->bytes += rxr->fmp->m_pkthdr.len;
4157				rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4158
4159				igb_rx_checksum(staterr, rxr->fmp, sctp);
4160				if (staterr & E1000_RXD_STAT_VP) {
4161					rxr->fmp->m_pkthdr.ether_vtag = vtag;
4162					rxr->fmp->m_flags |= M_VLANTAG;
4163				}
4164#if __FreeBSD_version >= 800000
4165                                rxr->fmp->m_pkthdr.flowid = curcpu;
4166                                rxr->fmp->m_flags |= M_FLOWID;
4167#endif
4168				sendmp = rxr->fmp;
4169				rxr->fmp = NULL;
4170				rxr->lmp = NULL;
4171			}
4172		} else {
4173			ifp->if_ierrors++;
4174discard:
4175			/* Reuse loaded DMA map and just update mbuf chain */
4176			if (hlen) {
4177				mh = rxr->rx_buffers[i].m_head;
4178				mh->m_len = MHLEN;
4179				mh->m_next = NULL;
4180			}
4181			mp = rxr->rx_buffers[i].m_pack;
4182			mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4183			mp->m_data = mp->m_ext.ext_buf;
4184			mp->m_next = NULL;
4185			if (adapter->max_frame_size <=
4186			    (MCLBYTES - ETHER_ALIGN))
4187				m_adj(mp, ETHER_ALIGN);
4188			if (rxr->fmp != NULL) {
4189				/* handles the whole chain */
4190				m_freem(rxr->fmp);
4191				rxr->fmp = NULL;
4192				rxr->lmp = NULL;
4193			}
4194			sendmp = NULL;
4195		}
4196
4197		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4198		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4199
4200		rxr->last_cleaned = i; /* For updating tail */
4201
4202		/* Advance our pointers to the next descriptor. */
4203		if (++i == adapter->num_rx_desc)
4204			i = 0;
4205
4206		/*
4207		** Note that we hold the RX lock thru
4208		** the following call so this ring's
4209		** next_to_check is not gonna change.
4210		*/
4211		if (sendmp != NULL) {
4212			/*
4213			** Send to the stack if:
4214			**  - LRO not enabled, or
4215			**  - no LRO resources, or
4216			**  - lro enqueue fails
4217			*/
4218			if ((!rxr->lro_enabled) ||
4219			    ((!lro->lro_cnt) || (tcp_lro_rx(lro, sendmp, 0))))
4220                                (*ifp->if_input)(ifp, sendmp);
4221                }
4222
4223		/* Get the next descriptor */
4224		cur = &rxr->rx_base[i];
4225		staterr = cur->wb.upper.status_error;
4226	}
4227	rxr->next_to_check = i;
4228
4229	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4230	E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4231
4232	/*
4233	 * Flush any outstanding LRO work
4234	 */
4235	while (!SLIST_EMPTY(&lro->lro_active)) {
4236		queued = SLIST_FIRST(&lro->lro_active);
4237		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4238		tcp_lro_flush(lro, queued);
4239	}
4240
4241	IGB_RX_UNLOCK(rxr);
4242
4243	/*
4244	** We still have cleaning to do?
4245	** Schedule another interrupt if so.
4246	*/
4247	if (staterr & E1000_RXD_STAT_DD) {
4248		E1000_WRITE_REG(&adapter->hw, E1000_EICS, rxr->eims);
4249		return TRUE;
4250	}
4251
4252	return FALSE;
4253}
4254
4255
4256/*********************************************************************
4257 *
4258 *  Verify that the hardware indicated that the checksum is valid.
4259 *  Inform the stack about the status of checksum so that stack
4260 *  doesn't spend time verifying the checksum.
4261 *
4262 *********************************************************************/
4263static void
4264igb_rx_checksum(u32 staterr, struct mbuf *mp, bool sctp)
4265{
4266	u16 status = (u16)staterr;
4267	u8  errors = (u8) (staterr >> 24);
4268
4269	/* Ignore Checksum bit is set */
4270	if (status & E1000_RXD_STAT_IXSM) {
4271		mp->m_pkthdr.csum_flags = 0;
4272		return;
4273	}
4274
4275	if (status & E1000_RXD_STAT_IPCS) {
4276		/* Did it pass? */
4277		if (!(errors & E1000_RXD_ERR_IPE)) {
4278			/* IP Checksum Good */
4279			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4280			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4281		} else
4282			mp->m_pkthdr.csum_flags = 0;
4283	}
4284
4285	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4286		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4287#if __FreeBSD_version >= 800000
4288		if (sctp) /* reassign */
4289			type = CSUM_SCTP_VALID;
4290#endif
4291		/* Did it pass? */
4292		if (!(errors & E1000_RXD_ERR_TCPE)) {
4293			mp->m_pkthdr.csum_flags |= type;
4294			if (!sctp)
4295				mp->m_pkthdr.csum_data = htons(0xffff);
4296		}
4297	}
4298	return;
4299}
4300
4301/*
4302 * This routine is run via an vlan
4303 * config EVENT
4304 */
4305static void
4306igb_register_vlan(void *unused, struct ifnet *ifp, u16 vtag)
4307{
4308	struct adapter	*adapter = ifp->if_softc;
4309	u32		index, bit;
4310
4311	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4312                return;
4313
4314	index = (vtag >> 5) & 0x7F;
4315	bit = vtag & 0x1F;
4316	igb_shadow_vfta[index] |= (1 << bit);
4317	++adapter->num_vlans;
4318	/* Re-init to load the changes */
4319	igb_init(adapter);
4320}
4321
4322/*
4323 * This routine is run via an vlan
4324 * unconfig EVENT
4325 */
4326static void
4327igb_unregister_vlan(void *unused, struct ifnet *ifp, u16 vtag)
4328{
4329	struct adapter	*adapter = ifp->if_softc;
4330	u32		index, bit;
4331
4332	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4333                return;
4334
4335	index = (vtag >> 5) & 0x7F;
4336	bit = vtag & 0x1F;
4337	igb_shadow_vfta[index] &= ~(1 << bit);
4338	--adapter->num_vlans;
4339	/* Re-init to load the changes */
4340	igb_init(adapter);
4341}
4342
4343static void
4344igb_setup_vlan_hw_support(struct adapter *adapter)
4345{
4346	struct e1000_hw *hw = &adapter->hw;
4347	u32             reg;
4348
4349	/*
4350	** We get here thru init_locked, meaning
4351	** a soft reset, this has already cleared
4352	** the VFTA and other state, so if there
4353	** have been no vlan's registered do nothing.
4354	*/
4355	if (adapter->num_vlans == 0)
4356                return;
4357
4358	/*
4359	** A soft reset zero's out the VFTA, so
4360	** we need to repopulate it now.
4361	*/
4362	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4363                if (igb_shadow_vfta[i] != 0)
4364			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4365                            i, igb_shadow_vfta[i]);
4366
4367	reg = E1000_READ_REG(hw, E1000_CTRL);
4368	reg |= E1000_CTRL_VME;
4369	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4370
4371	/* Enable the Filter Table */
4372	reg = E1000_READ_REG(hw, E1000_RCTL);
4373	reg &= ~E1000_RCTL_CFIEN;
4374	reg |= E1000_RCTL_VFE;
4375	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4376
4377	/* Update the frame size */
4378	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4379	    adapter->max_frame_size + VLAN_TAG_SIZE);
4380}
4381
4382static void
4383igb_enable_intr(struct adapter *adapter)
4384{
4385	/* With RSS set up what to auto clear */
4386	if (adapter->msix_mem) {
4387		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4388		    adapter->eims_mask);
4389		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4390		    adapter->eims_mask);
4391		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4392		    adapter->eims_mask);
4393		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4394		    E1000_IMS_LSC);
4395	} else {
4396		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4397		    IMS_ENABLE_MASK);
4398	}
4399	E1000_WRITE_FLUSH(&adapter->hw);
4400
4401	return;
4402}
4403
4404static void
4405igb_disable_intr(struct adapter *adapter)
4406{
4407	if (adapter->msix_mem) {
4408		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4409		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4410	}
4411	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4412	E1000_WRITE_FLUSH(&adapter->hw);
4413	return;
4414}
4415
4416/*
4417 * Bit of a misnomer, what this really means is
4418 * to enable OS management of the system... aka
4419 * to disable special hardware management features
4420 */
4421static void
4422igb_init_manageability(struct adapter *adapter)
4423{
4424	if (adapter->has_manage) {
4425		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4426		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4427
4428		/* disable hardware interception of ARP */
4429		manc &= ~(E1000_MANC_ARP_EN);
4430
4431                /* enable receiving management packets to the host */
4432		manc |= E1000_MANC_EN_MNG2HOST;
4433		manc2h |= 1 << 5;  /* Mng Port 623 */
4434		manc2h |= 1 << 6;  /* Mng Port 664 */
4435		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4436		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4437	}
4438}
4439
4440/*
4441 * Give control back to hardware management
4442 * controller if there is one.
4443 */
4444static void
4445igb_release_manageability(struct adapter *adapter)
4446{
4447	if (adapter->has_manage) {
4448		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4449
4450		/* re-enable hardware interception of ARP */
4451		manc |= E1000_MANC_ARP_EN;
4452		manc &= ~E1000_MANC_EN_MNG2HOST;
4453
4454		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4455	}
4456}
4457
4458/*
4459 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4460 * For ASF and Pass Through versions of f/w this means that
4461 * the driver is loaded.
4462 *
4463 */
4464static void
4465igb_get_hw_control(struct adapter *adapter)
4466{
4467	u32 ctrl_ext;
4468
4469	/* Let firmware know the driver has taken over */
4470	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4471	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4472	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4473}
4474
4475/*
4476 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4477 * For ASF and Pass Through versions of f/w this means that the
4478 * driver is no longer loaded.
4479 *
4480 */
4481static void
4482igb_release_hw_control(struct adapter *adapter)
4483{
4484	u32 ctrl_ext;
4485
4486	/* Let firmware taken over control of h/w */
4487	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4488	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4489	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4490}
4491
4492static int
4493igb_is_valid_ether_addr(uint8_t *addr)
4494{
4495	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4496
4497	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4498		return (FALSE);
4499	}
4500
4501	return (TRUE);
4502}
4503
4504
4505/*
4506 * Enable PCI Wake On Lan capability
4507 */
4508void
4509igb_enable_wakeup(device_t dev)
4510{
4511	u16     cap, status;
4512	u8      id;
4513
4514	/* First find the capabilities pointer*/
4515	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4516	/* Read the PM Capabilities */
4517	id = pci_read_config(dev, cap, 1);
4518	if (id != PCIY_PMG)     /* Something wrong */
4519		return;
4520	/* OK, we have the power capabilities, so
4521	   now get the status register */
4522	cap += PCIR_POWER_STATUS;
4523	status = pci_read_config(dev, cap, 2);
4524	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4525	pci_write_config(dev, cap, status, 2);
4526	return;
4527}
4528
4529
4530/**********************************************************************
4531 *
4532 *  Update the board statistics counters.
4533 *
4534 **********************************************************************/
4535static void
4536igb_update_stats_counters(struct adapter *adapter)
4537{
4538	struct ifnet   *ifp;
4539
4540	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4541	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4542		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4543		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4544	}
4545	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4546	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4547	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4548	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4549
4550	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4551	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4552	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4553	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4554	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4555	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4556	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4557	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4558	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4559	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4560	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4561	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4562	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4563	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4564	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4565	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4566	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4567	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4568	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4569	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4570
4571	/* For the 64-bit byte counters the low dword must be read first. */
4572	/* Both registers clear on the read of the high dword */
4573
4574	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4575	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4576
4577	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4578	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4579	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4580	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4581	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4582
4583	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4584	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4585
4586	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4587	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4588	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4589	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4590	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4591	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4592	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4593	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4594	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4595	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4596
4597	adapter->stats.algnerrc +=
4598		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4599	adapter->stats.rxerrc +=
4600		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4601	adapter->stats.tncrs +=
4602		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4603	adapter->stats.cexterr +=
4604		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4605	adapter->stats.tsctc +=
4606		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4607	adapter->stats.tsctfc +=
4608		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4609	ifp = adapter->ifp;
4610
4611	ifp->if_collisions = adapter->stats.colc;
4612
4613	/* Rx Errors */
4614	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4615	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4616	    adapter->stats.ruc + adapter->stats.roc +
4617	    adapter->stats.mpc + adapter->stats.cexterr;
4618
4619	/* Tx Errors */
4620	ifp->if_oerrors = adapter->stats.ecol +
4621	    adapter->stats.latecol + adapter->watchdog_events;
4622}
4623
4624
4625/**********************************************************************
4626 *
4627 *  This routine is called only when igb_display_debug_stats is enabled.
4628 *  This routine provides a way to take a look at important statistics
4629 *  maintained by the driver and hardware.
4630 *
4631 **********************************************************************/
4632static void
4633igb_print_debug_info(struct adapter *adapter)
4634{
4635	device_t dev = adapter->dev;
4636	struct rx_ring *rxr = adapter->rx_rings;
4637	struct tx_ring *txr = adapter->tx_rings;
4638	uint8_t *hw_addr = adapter->hw.hw_addr;
4639
4640	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4641	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4642	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4643	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4644
4645#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4646	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4647	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4648	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4649#endif
4650
4651	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4652	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4653	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4654	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4655	    adapter->hw.fc.high_water,
4656	    adapter->hw.fc.low_water);
4657
4658	for (int i = 0; i < adapter->num_queues; i++, txr++) {
4659		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4660		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4661		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4662		device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4663		    txr->me, (long long)txr->no_desc_avail);
4664		device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
4665		    (long long)txr->tx_irq);
4666		device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4667		    (long long)txr->tx_packets);
4668	}
4669
4670	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4671		struct lro_ctrl *lro = &rxr->lro;
4672		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4673		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4674		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4675		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4676		    (long long)rxr->rx_packets);
4677		device_printf(dev, "RX(%d) Split Packets = %lld\n", rxr->me,
4678		    (long long)rxr->rx_split_packets);
4679		device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4680		    (long long)rxr->rx_bytes);
4681		device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
4682		    (long long)rxr->rx_irq);
4683		device_printf(dev,"RX(%d) LRO Queued= %d\n",
4684		    rxr->me, lro->lro_queued);
4685		device_printf(dev,"RX(%d) LRO Flushed= %d\n",
4686		    rxr->me, lro->lro_flushed);
4687	}
4688
4689	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4690
4691	device_printf(dev, "Mbuf defrag failed = %ld\n",
4692	    adapter->mbuf_defrag_failed);
4693	device_printf(dev, "Std mbuf header failed = %ld\n",
4694	    adapter->mbuf_header_failed);
4695	device_printf(dev, "Std mbuf packet failed = %ld\n",
4696	    adapter->mbuf_packet_failed);
4697	device_printf(dev, "Driver dropped packets = %ld\n",
4698	    adapter->dropped_pkts);
4699	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4700		adapter->no_tx_dma_setup);
4701}
4702
4703static void
4704igb_print_hw_stats(struct adapter *adapter)
4705{
4706	device_t dev = adapter->dev;
4707
4708	device_printf(dev, "Excessive collisions = %lld\n",
4709	    (long long)adapter->stats.ecol);
4710#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4711	device_printf(dev, "Symbol errors = %lld\n",
4712	    (long long)adapter->stats.symerrs);
4713#endif
4714	device_printf(dev, "Sequence errors = %lld\n",
4715	    (long long)adapter->stats.sec);
4716	device_printf(dev, "Defer count = %lld\n",
4717	    (long long)adapter->stats.dc);
4718	device_printf(dev, "Missed Packets = %lld\n",
4719	    (long long)adapter->stats.mpc);
4720	device_printf(dev, "Receive No Buffers = %lld\n",
4721	    (long long)adapter->stats.rnbc);
4722	/* RLEC is inaccurate on some hardware, calculate our own. */
4723	device_printf(dev, "Receive Length Errors = %lld\n",
4724	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4725	device_printf(dev, "Receive errors = %lld\n",
4726	    (long long)adapter->stats.rxerrc);
4727	device_printf(dev, "Crc errors = %lld\n",
4728	    (long long)adapter->stats.crcerrs);
4729	device_printf(dev, "Alignment errors = %lld\n",
4730	    (long long)adapter->stats.algnerrc);
4731	/* On 82575 these are collision counts */
4732	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4733	    (long long)adapter->stats.cexterr);
4734	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4735	device_printf(dev, "watchdog timeouts = %ld\n",
4736	    adapter->watchdog_events);
4737	device_printf(dev, "XON Rcvd = %lld\n",
4738	    (long long)adapter->stats.xonrxc);
4739	device_printf(dev, "XON Xmtd = %lld\n",
4740	    (long long)adapter->stats.xontxc);
4741	device_printf(dev, "XOFF Rcvd = %lld\n",
4742	    (long long)adapter->stats.xoffrxc);
4743	device_printf(dev, "XOFF Xmtd = %lld\n",
4744	    (long long)adapter->stats.xofftxc);
4745	device_printf(dev, "Good Packets Rcvd = %lld\n",
4746	    (long long)adapter->stats.gprc);
4747	device_printf(dev, "Good Packets Xmtd = %lld\n",
4748	    (long long)adapter->stats.gptc);
4749	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4750	    (long long)adapter->stats.tsctc);
4751	device_printf(dev, "TSO Contexts Failed = %lld\n",
4752	    (long long)adapter->stats.tsctfc);
4753}
4754
4755/**********************************************************************
4756 *
4757 *  This routine provides a way to dump out the adapter eeprom,
4758 *  often a useful debug/service tool. This only dumps the first
4759 *  32 words, stuff that matters is in that extent.
4760 *
4761 **********************************************************************/
4762static void
4763igb_print_nvm_info(struct adapter *adapter)
4764{
4765	u16	eeprom_data;
4766	int	i, j, row = 0;
4767
4768	/* Its a bit crude, but it gets the job done */
4769	printf("\nInterface EEPROM Dump:\n");
4770	printf("Offset\n0x0000  ");
4771	for (i = 0, j = 0; i < 32; i++, j++) {
4772		if (j == 8) { /* Make the offset block */
4773			j = 0; ++row;
4774			printf("\n0x00%x0  ",row);
4775		}
4776		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4777		printf("%04x ", eeprom_data);
4778	}
4779	printf("\n");
4780}
4781
4782static int
4783igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4784{
4785	struct adapter *adapter;
4786	int error;
4787	int result;
4788
4789	result = -1;
4790	error = sysctl_handle_int(oidp, &result, 0, req);
4791
4792	if (error || !req->newptr)
4793		return (error);
4794
4795	if (result == 1) {
4796		adapter = (struct adapter *)arg1;
4797		igb_print_debug_info(adapter);
4798	}
4799	/*
4800	 * This value will cause a hex dump of the
4801	 * first 32 16-bit words of the EEPROM to
4802	 * the screen.
4803	 */
4804	if (result == 2) {
4805		adapter = (struct adapter *)arg1;
4806		igb_print_nvm_info(adapter);
4807        }
4808
4809	return (error);
4810}
4811
4812
4813static int
4814igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4815{
4816	struct adapter *adapter;
4817	int error;
4818	int result;
4819
4820	result = -1;
4821	error = sysctl_handle_int(oidp, &result, 0, req);
4822
4823	if (error || !req->newptr)
4824		return (error);
4825
4826	if (result == 1) {
4827		adapter = (struct adapter *)arg1;
4828		igb_print_hw_stats(adapter);
4829	}
4830
4831	return (error);
4832}
4833
4834static void
4835igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4836	const char *description, int *limit, int value)
4837{
4838	*limit = value;
4839	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4840	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4841	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4842}
4843
4844#ifdef IGB_IEEE1588
4845/*
4846** igb_hwtstamp_ioctl - control hardware time stamping
4847**
4848** Outgoing time stamping can be enabled and disabled. Play nice and
4849** disable it when requested, although it shouldn't case any overhead
4850** when no packet needs it. At most one packet in the queue may be
4851** marked for time stamping, otherwise it would be impossible to tell
4852** for sure to which packet the hardware time stamp belongs.
4853**
4854** Incoming time stamping has to be configured via the hardware
4855** filters. Not all combinations are supported, in particular event
4856** type has to be specified. Matching the kind of event packet is
4857** not supported, with the exception of "all V2 events regardless of
4858** level 2 or 4".
4859**
4860*/
4861static int
4862igb_hwtstamp_ioctl(struct adapter *adapter, struct ifreq *ifr)
4863{
4864	struct e1000_hw *hw = &adapter->hw;
4865	struct hwtstamp_ctrl *config;
4866	u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4867	u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
4868	u32 tsync_rx_ctl_type = 0;
4869	u32 tsync_rx_cfg = 0;
4870	int is_l4 = 0;
4871	int is_l2 = 0;
4872	u16 port = 319; /* PTP */
4873	u32 regval;
4874
4875	config = (struct hwtstamp_ctrl *) ifr->ifr_data;
4876
4877	/* reserved for future extensions */
4878	if (config->flags)
4879		return (EINVAL);
4880
4881	switch (config->tx_type) {
4882	case HWTSTAMP_TX_OFF:
4883		tsync_tx_ctl_bit = 0;
4884		break;
4885	case HWTSTAMP_TX_ON:
4886		tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4887		break;
4888	default:
4889		return (ERANGE);
4890	}
4891
4892	switch (config->rx_filter) {
4893	case HWTSTAMP_FILTER_NONE:
4894		tsync_rx_ctl_bit = 0;
4895		break;
4896	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
4897	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
4898	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
4899	case HWTSTAMP_FILTER_ALL:
4900		/*
4901		 * register TSYNCRXCFG must be set, therefore it is not
4902		 * possible to time stamp both Sync and Delay_Req messages
4903		 * => fall back to time stamping all packets
4904		 */
4905		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
4906		config->rx_filter = HWTSTAMP_FILTER_ALL;
4907		break;
4908	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
4909		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4910		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
4911		is_l4 = 1;
4912		break;
4913	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
4914		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4915		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
4916		is_l4 = 1;
4917		break;
4918	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
4919	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
4920		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4921		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
4922		is_l2 = 1;
4923		is_l4 = 1;
4924		config->rx_filter = HWTSTAMP_FILTER_SOME;
4925		break;
4926	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
4927	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
4928		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4929		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
4930		is_l2 = 1;
4931		is_l4 = 1;
4932		config->rx_filter = HWTSTAMP_FILTER_SOME;
4933		break;
4934	case HWTSTAMP_FILTER_PTP_V2_EVENT:
4935	case HWTSTAMP_FILTER_PTP_V2_SYNC:
4936	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
4937		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
4938		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
4939		is_l2 = 1;
4940		break;
4941	default:
4942		return -ERANGE;
4943	}
4944
4945	/* enable/disable TX */
4946	regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
4947	regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
4948	E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
4949
4950	/* enable/disable RX, define which PTP packets are time stamped */
4951	regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
4952	regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
4953	regval = (regval & ~0xE) | tsync_rx_ctl_type;
4954	E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
4955	E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
4956
4957	/*
4958	 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
4959	 *                                          (Ethertype to filter on)
4960	 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
4961	 * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
4962	 */
4963	E1000_WRITE_REG(hw, E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
4964
4965	/* L4 Queue Filter[0]: only filter by source and destination port */
4966	E1000_WRITE_REG(hw, E1000_SPQF0, htons(port));
4967	E1000_WRITE_REG(hw, E1000_IMIREXT(0), is_l4 ?
4968	     ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
4969	E1000_WRITE_REG(hw, E1000_IMIR(0), is_l4 ?
4970	     (htons(port)
4971	      | (0<<16) /* immediate interrupt disabled */
4972	      | 0 /* (1<<17) bit cleared: do not bypass
4973		     destination port check */)
4974		: 0);
4975	E1000_WRITE_REG(hw, E1000_FTQF0, is_l4 ?
4976	     (0x11 /* UDP */
4977	      | (1<<15) /* VF not compared */
4978	      | (1<<27) /* Enable Timestamping */
4979	      | (7<<28) /* only source port filter enabled,
4980			   source/target address and protocol
4981			   masked */)
4982	     : ((1<<15) | (15<<28) /* all mask bits set = filter not
4983				      enabled */));
4984
4985	wrfl();
4986
4987	adapter->hwtstamp_ctrl = config;
4988
4989	/* clear TX/RX time stamp registers, just to be sure */
4990	regval = E1000_READ_REG(hw, E1000_TXSTMPH);
4991	regval = E1000_READ_REG(hw, E1000_RXSTMPH);
4992
4993	return (error);
4994}
4995
4996/*
4997** igb_read_clock - read raw cycle counter (to be used by time counter)
4998*/
4999static cycle_t igb_read_clock(const struct cyclecounter *tc)
5000{
5001       struct igb_adapter *adapter =
5002               container_of(tc, struct igb_adapter, cycles);
5003       struct e1000_hw *hw = &adapter->hw;
5004       u64 stamp;
5005
5006       stamp =  E1000_READ_REG(hw, E1000_SYSTIML);
5007       stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIMH) << 32ULL;
5008
5009       return (stamp);
5010}
5011
5012#endif /* IGB_IEEE1588 */
5013