if_igb.c revision 197079
1/******************************************************************************
2
3  Copyright (c) 2001-2009, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 197079 2009-09-10 21:16:26Z jfv $*/
34
35
36#ifdef HAVE_KERNEL_OPTION_HEADERS
37#include "opt_device_polling.h"
38#include "opt_inet.h"
39#endif
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#if __FreeBSD_version >= 800000
44#include <sys/buf_ring.h>
45#endif
46#include <sys/bus.h>
47#include <sys/endian.h>
48#include <sys/kernel.h>
49#include <sys/kthread.h>
50#include <sys/malloc.h>
51#include <sys/mbuf.h>
52#include <sys/module.h>
53#include <sys/rman.h>
54#include <sys/socket.h>
55#include <sys/sockio.h>
56#include <sys/sysctl.h>
57#include <sys/taskqueue.h>
58#include <sys/eventhandler.h>
59#include <sys/pcpu.h>
60#include <sys/smp.h>
61#include <machine/smp.h>
62#include <machine/bus.h>
63#include <machine/resource.h>
64
65#ifdef IGB_IEEE1588
66#include <sys/ieee1588.h>
67#endif
68
69#include <net/bpf.h>
70#include <net/ethernet.h>
71#include <net/if.h>
72#include <net/if_arp.h>
73#include <net/if_dl.h>
74#include <net/if_media.h>
75
76#include <net/if_types.h>
77#include <net/if_vlan_var.h>
78
79#include <netinet/in_systm.h>
80#include <netinet/in.h>
81#include <netinet/if_ether.h>
82#include <netinet/ip.h>
83#include <netinet/ip6.h>
84#include <netinet/tcp.h>
85#include <netinet/tcp_lro.h>
86#include <netinet/udp.h>
87
88#include <machine/in_cksum.h>
89#include <dev/pci/pcivar.h>
90#include <dev/pci/pcireg.h>
91
92#include "e1000_api.h"
93#include "e1000_82575.h"
94#include "if_igb.h"
95
96/*********************************************************************
97 *  Set this to one to display debug statistics
98 *********************************************************************/
99int	igb_display_debug_stats = 0;
100
101/*********************************************************************
102 *  Driver version:
103 *********************************************************************/
104char igb_driver_version[] = "version - 1.7.3";
105
106
107/*********************************************************************
108 *  PCI Device ID Table
109 *
110 *  Used by probe to select devices to load on
111 *  Last field stores an index into e1000_strings
112 *  Last entry must be all 0s
113 *
114 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
115 *********************************************************************/
116
117static igb_vendor_info_t igb_vendor_info_array[] =
118{
119	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
121						PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
123						PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
128	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
129						PCI_ANY_ID, PCI_ANY_ID, 0},
130	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
131						PCI_ANY_ID, PCI_ANY_ID, 0},
132	/* required last entry */
133	{ 0, 0, 0, 0, 0}
134};
135
136/*********************************************************************
137 *  Table of branding strings for all supported NICs.
138 *********************************************************************/
139
140static char *igb_strings[] = {
141	"Intel(R) PRO/1000 Network Connection"
142};
143
144/*********************************************************************
145 *  Function prototypes
146 *********************************************************************/
147static int	igb_probe(device_t);
148static int	igb_attach(device_t);
149static int	igb_detach(device_t);
150static int	igb_shutdown(device_t);
151static int	igb_suspend(device_t);
152static int	igb_resume(device_t);
153static void	igb_start(struct ifnet *);
154static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
155#if __FreeBSD_version >= 800000
156static int	igb_mq_start(struct ifnet *, struct mbuf *);
157static int	igb_mq_start_locked(struct ifnet *,
158		    struct tx_ring *, struct mbuf *);
159static void	igb_qflush(struct ifnet *);
160#endif
161static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
162static void	igb_watchdog(struct adapter *);
163static void	igb_init(void *);
164static void	igb_init_locked(struct adapter *);
165static void	igb_stop(void *);
166static void	igb_media_status(struct ifnet *, struct ifmediareq *);
167static int	igb_media_change(struct ifnet *);
168static void	igb_identify_hardware(struct adapter *);
169static int	igb_allocate_pci_resources(struct adapter *);
170static int	igb_allocate_msix(struct adapter *);
171static int	igb_allocate_legacy(struct adapter *);
172static int	igb_setup_msix(struct adapter *);
173static void	igb_free_pci_resources(struct adapter *);
174static void	igb_local_timer(void *);
175static int	igb_hardware_init(struct adapter *);
176static void	igb_setup_interface(device_t, struct adapter *);
177static int	igb_allocate_queues(struct adapter *);
178static void	igb_configure_queues(struct adapter *);
179
180static int	igb_allocate_transmit_buffers(struct tx_ring *);
181static void	igb_setup_transmit_structures(struct adapter *);
182static void	igb_setup_transmit_ring(struct tx_ring *);
183static void	igb_initialize_transmit_units(struct adapter *);
184static void	igb_free_transmit_structures(struct adapter *);
185static void	igb_free_transmit_buffers(struct tx_ring *);
186
187static int	igb_allocate_receive_buffers(struct rx_ring *);
188static int	igb_setup_receive_structures(struct adapter *);
189static int	igb_setup_receive_ring(struct rx_ring *);
190static void	igb_initialize_receive_units(struct adapter *);
191static void	igb_free_receive_structures(struct adapter *);
192static void	igb_free_receive_buffers(struct rx_ring *);
193
194static void	igb_enable_intr(struct adapter *);
195static void	igb_disable_intr(struct adapter *);
196static void	igb_update_stats_counters(struct adapter *);
197static bool	igb_txeof(struct tx_ring *);
198static bool	igb_rxeof(struct rx_ring *, int);
199static void	igb_rx_checksum(u32, struct mbuf *, bool);
200static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
201static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
202static void	igb_set_promisc(struct adapter *);
203static void	igb_disable_promisc(struct adapter *);
204static void	igb_set_multi(struct adapter *);
205static void	igb_print_hw_stats(struct adapter *);
206static void	igb_update_link_status(struct adapter *);
207static int	igb_get_buf(struct rx_ring *, int, u8);
208
209static void	igb_register_vlan(void *, struct ifnet *, u16);
210static void	igb_unregister_vlan(void *, struct ifnet *, u16);
211static void	igb_setup_vlan_hw_support(struct adapter *);
212
213static int	igb_xmit(struct tx_ring *, struct mbuf **);
214static int	igb_dma_malloc(struct adapter *, bus_size_t,
215		    struct igb_dma_alloc *, int);
216static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
217static void	igb_print_debug_info(struct adapter *);
218static void	igb_print_nvm_info(struct adapter *);
219static int 	igb_is_valid_ether_addr(u8 *);
220static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
221static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
222/* Management and WOL Support */
223static void	igb_init_manageability(struct adapter *);
224static void	igb_release_manageability(struct adapter *);
225static void     igb_get_hw_control(struct adapter *);
226static void     igb_release_hw_control(struct adapter *);
227static void     igb_enable_wakeup(device_t);
228
229static int	igb_irq_fast(void *);
230static void	igb_add_rx_process_limit(struct adapter *, const char *,
231		    const char *, int *, int);
232static void	igb_handle_rxtx(void *context, int pending);
233static void	igb_handle_tx(void *context, int pending);
234static void	igb_handle_rx(void *context, int pending);
235
236/* These are MSIX only irq handlers */
237static void	igb_msix_rx(void *);
238static void	igb_msix_tx(void *);
239static void	igb_msix_link(void *);
240
241/* Adaptive Interrupt Moderation */
242static void	igb_update_aim(struct rx_ring *);
243
244/*********************************************************************
245 *  FreeBSD Device Interface Entry Points
246 *********************************************************************/
247
248static device_method_t igb_methods[] = {
249	/* Device interface */
250	DEVMETHOD(device_probe, igb_probe),
251	DEVMETHOD(device_attach, igb_attach),
252	DEVMETHOD(device_detach, igb_detach),
253	DEVMETHOD(device_shutdown, igb_shutdown),
254	DEVMETHOD(device_suspend, igb_suspend),
255	DEVMETHOD(device_resume, igb_resume),
256	{0, 0}
257};
258
259static driver_t igb_driver = {
260	"igb", igb_methods, sizeof(struct adapter),
261};
262
263static devclass_t igb_devclass;
264DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
265MODULE_DEPEND(igb, pci, 1, 1, 1);
266MODULE_DEPEND(igb, ether, 1, 1, 1);
267
268/*********************************************************************
269 *  Tunable default values.
270 *********************************************************************/
271
272/* Descriptor defaults */
273static int igb_rxd = IGB_DEFAULT_RXD;
274static int igb_txd = IGB_DEFAULT_TXD;
275TUNABLE_INT("hw.igb.rxd", &igb_rxd);
276TUNABLE_INT("hw.igb.txd", &igb_txd);
277
278/*
279** These parameters are used in Adaptive
280** Interrupt Moderation. The value is set
281** into EITR and controls the interrupt
282** frequency. A variable static scheme can
283** be created by changing the assigned value
284** of igb_ave_latency to the desired value,
285** and then set igb_enable_aim to FALSE.
286** This will result in all EITR registers
287** getting set to that value statically.
288*/
289static int igb_enable_aim = TRUE;
290TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
291static int igb_low_latency = IGB_LOW_LATENCY;
292TUNABLE_INT("hw.igb.low_latency", &igb_low_latency);
293static int igb_ave_latency = IGB_AVE_LATENCY;
294TUNABLE_INT("hw.igb.ave_latency", &igb_ave_latency);
295static int igb_bulk_latency = IGB_BULK_LATENCY;
296TUNABLE_INT("hw.igb.bulk_latency", &igb_bulk_latency);
297
298/*
299** This will autoconfigure based on the number
300** of CPUs if set to 0. Only a matched pair of
301** TX and RX rings are allowed.
302*/
303static int igb_num_queues = 1;
304TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
305
306/* How many packets rxeof tries to clean at a time */
307static int igb_rx_process_limit = 100;
308TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
309
310/* Flow control setting - default to FULL */
311static int igb_fc_setting = e1000_fc_full;
312TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
313
314/*
315** Shadow VFTA table, this is needed because
316** the real filter table gets cleared during
317** a soft reset and the driver needs to be able
318** to repopulate it.
319*/
320static u32 igb_shadow_vfta[IGB_VFTA_SIZE];
321
322
323/*********************************************************************
324 *  Device identification routine
325 *
326 *  igb_probe determines if the driver should be loaded on
327 *  adapter based on PCI vendor/device id of the adapter.
328 *
329 *  return BUS_PROBE_DEFAULT on success, positive on failure
330 *********************************************************************/
331
332static int
333igb_probe(device_t dev)
334{
335	char		adapter_name[60];
336	uint16_t	pci_vendor_id = 0;
337	uint16_t	pci_device_id = 0;
338	uint16_t	pci_subvendor_id = 0;
339	uint16_t	pci_subdevice_id = 0;
340	igb_vendor_info_t *ent;
341
342	INIT_DEBUGOUT("igb_probe: begin");
343
344	pci_vendor_id = pci_get_vendor(dev);
345	if (pci_vendor_id != IGB_VENDOR_ID)
346		return (ENXIO);
347
348	pci_device_id = pci_get_device(dev);
349	pci_subvendor_id = pci_get_subvendor(dev);
350	pci_subdevice_id = pci_get_subdevice(dev);
351
352	ent = igb_vendor_info_array;
353	while (ent->vendor_id != 0) {
354		if ((pci_vendor_id == ent->vendor_id) &&
355		    (pci_device_id == ent->device_id) &&
356
357		    ((pci_subvendor_id == ent->subvendor_id) ||
358		    (ent->subvendor_id == PCI_ANY_ID)) &&
359
360		    ((pci_subdevice_id == ent->subdevice_id) ||
361		    (ent->subdevice_id == PCI_ANY_ID))) {
362			sprintf(adapter_name, "%s %s",
363				igb_strings[ent->index],
364				igb_driver_version);
365			device_set_desc_copy(dev, adapter_name);
366			return (BUS_PROBE_DEFAULT);
367		}
368		ent++;
369	}
370
371	return (ENXIO);
372}
373
374/*********************************************************************
375 *  Device initialization routine
376 *
377 *  The attach entry point is called when the driver is being loaded.
378 *  This routine identifies the type of hardware, allocates all resources
379 *  and initializes the hardware.
380 *
381 *  return 0 on success, positive on failure
382 *********************************************************************/
383
384static int
385igb_attach(device_t dev)
386{
387	struct adapter	*adapter;
388	int		error = 0;
389	u16		eeprom_data;
390
391	INIT_DEBUGOUT("igb_attach: begin");
392
393	adapter = device_get_softc(dev);
394	adapter->dev = adapter->osdep.dev = dev;
395	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
396
397	/* SYSCTL stuff */
398	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
399	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
400	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
401	    igb_sysctl_debug_info, "I", "Debug Information");
402
403	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
404	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
405	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
406	    igb_sysctl_stats, "I", "Statistics");
407
408	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
409	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
410	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
411	    &igb_fc_setting, 0, "Flow Control");
412
413	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
414	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
415	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
416	    &igb_enable_aim, 1, "Interrupt Moderation");
417
418	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
419	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
420	    OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
421	    &igb_low_latency, 1, "Low Latency");
422
423	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
424	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
425	    OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
426	    &igb_ave_latency, 1, "Average Latency");
427
428	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
429	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430	    OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
431	    &igb_bulk_latency, 1, "Bulk Latency");
432
433	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
434
435	/* Determine hardware and mac info */
436	igb_identify_hardware(adapter);
437
438	/* Setup PCI resources */
439	if (igb_allocate_pci_resources(adapter)) {
440		device_printf(dev, "Allocation of PCI resources failed\n");
441		error = ENXIO;
442		goto err_pci;
443	}
444
445	/* Do Shared Code initialization */
446	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
447		device_printf(dev, "Setup of Shared code failed\n");
448		error = ENXIO;
449		goto err_pci;
450	}
451
452	e1000_get_bus_info(&adapter->hw);
453
454	/* Sysctls for limiting the amount of work done in the taskqueue */
455	igb_add_rx_process_limit(adapter, "rx_processing_limit",
456	    "max number of rx packets to process", &adapter->rx_process_limit,
457	    igb_rx_process_limit);
458
459	/*
460	 * Validate number of transmit and receive descriptors. It
461	 * must not exceed hardware maximum, and must be multiple
462	 * of E1000_DBA_ALIGN.
463	 */
464	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
465	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
466		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
467		    IGB_DEFAULT_TXD, igb_txd);
468		adapter->num_tx_desc = IGB_DEFAULT_TXD;
469	} else
470		adapter->num_tx_desc = igb_txd;
471	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
472	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
473		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
474		    IGB_DEFAULT_RXD, igb_rxd);
475		adapter->num_rx_desc = IGB_DEFAULT_RXD;
476	} else
477		adapter->num_rx_desc = igb_rxd;
478
479	adapter->hw.mac.autoneg = DO_AUTO_NEG;
480	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
481	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
482
483	/* Copper options */
484	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
485		adapter->hw.phy.mdix = AUTO_ALL_MODES;
486		adapter->hw.phy.disable_polarity_correction = FALSE;
487		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
488	}
489
490	/*
491	 * Set the frame limits assuming
492	 * standard ethernet sized frames.
493	 */
494	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
495	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
496
497	/*
498	** Allocate and Setup Queues
499	*/
500	if (igb_allocate_queues(adapter)) {
501		error = ENOMEM;
502		goto err_pci;
503	}
504
505	/*
506	** Start from a known state, this is
507	** important in reading the nvm and
508	** mac from that.
509	*/
510	e1000_reset_hw(&adapter->hw);
511
512	/* Make sure we have a good EEPROM before we read from it */
513	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
514		/*
515		** Some PCI-E parts fail the first check due to
516		** the link being in sleep state, call it again,
517		** if it fails a second time its a real issue.
518		*/
519		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
520			device_printf(dev,
521			    "The EEPROM Checksum Is Not Valid\n");
522			error = EIO;
523			goto err_late;
524		}
525	}
526
527	/*
528	** Copy the permanent MAC address out of the EEPROM
529	*/
530	if (e1000_read_mac_addr(&adapter->hw) < 0) {
531		device_printf(dev, "EEPROM read error while reading MAC"
532		    " address\n");
533		error = EIO;
534		goto err_late;
535	}
536	/* Check its sanity */
537	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
538		device_printf(dev, "Invalid MAC address\n");
539		error = EIO;
540		goto err_late;
541	}
542
543	/* Now Initialize the hardware */
544	if (igb_hardware_init(adapter)) {
545		device_printf(dev, "Unable to initialize the hardware\n");
546		error = EIO;
547		goto err_late;
548	}
549
550	/*
551	** Configure Interrupts
552	*/
553	if (adapter->msix > 1) /* MSIX */
554		error = igb_allocate_msix(adapter);
555	else /* MSI or Legacy */
556		error = igb_allocate_legacy(adapter);
557	if (error)
558		goto err_late;
559
560	/* Setup OS specific network interface */
561	igb_setup_interface(dev, adapter);
562
563#ifdef IGB_IEEE1588
564        /*
565        ** Setup the timer: IEEE 1588 support
566        */
567        adapter->cycles.read = igb_read_clock;
568        adapter->cycles.mask = (u64)-1;
569        adapter->cycles.mult = 1;
570        adapter->cycles.shift = IGB_TSYNC_SHIFT;
571        E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
572            IGB_TSYNC_CYCLE_TIME * IGB_TSYNC_SHIFT);
573        E1000_WRITE_REG(&adapter->hw, E1000_SYSTIML, 0x00000000);
574        E1000_WRITE_REG(&adapter->hw, E1000_SYSTIMH, 0xFF800000);
575
576	// JFV - this is not complete yet
577#endif
578
579	/* Initialize statistics */
580	igb_update_stats_counters(adapter);
581
582	adapter->hw.mac.get_link_status = 1;
583	igb_update_link_status(adapter);
584
585	/* Indicate SOL/IDER usage */
586	if (e1000_check_reset_block(&adapter->hw))
587		device_printf(dev,
588		    "PHY reset is blocked due to SOL/IDER session.\n");
589
590	/* Determine if we have to control management hardware */
591	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
592
593	/*
594	 * Setup Wake-on-Lan
595	 */
596	/* APME bit in EEPROM is mapped to WUC.APME */
597	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
598	if (eeprom_data)
599		adapter->wol = E1000_WUFC_MAG;
600
601	/* Register for VLAN events */
602	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
603	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
604	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
605	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
606
607	/* Tell the stack that the interface is not active */
608	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
609
610	INIT_DEBUGOUT("igb_attach: end");
611
612	return (0);
613
614err_late:
615	igb_free_transmit_structures(adapter);
616	igb_free_receive_structures(adapter);
617	igb_release_hw_control(adapter);
618err_pci:
619	igb_free_pci_resources(adapter);
620	IGB_CORE_LOCK_DESTROY(adapter);
621
622	return (error);
623}
624
625/*********************************************************************
626 *  Device removal routine
627 *
628 *  The detach entry point is called when the driver is being removed.
629 *  This routine stops the adapter and deallocates all the resources
630 *  that were allocated for driver operation.
631 *
632 *  return 0 on success, positive on failure
633 *********************************************************************/
634
635static int
636igb_detach(device_t dev)
637{
638	struct adapter	*adapter = device_get_softc(dev);
639	struct ifnet	*ifp = adapter->ifp;
640
641	INIT_DEBUGOUT("igb_detach: begin");
642
643	/* Make sure VLANS are not using driver */
644	if (adapter->ifp->if_vlantrunk != NULL) {
645		device_printf(dev,"Vlan in use, detach first\n");
646		return (EBUSY);
647	}
648
649	IGB_CORE_LOCK(adapter);
650	adapter->in_detach = 1;
651	igb_stop(adapter);
652	IGB_CORE_UNLOCK(adapter);
653
654	e1000_phy_hw_reset(&adapter->hw);
655
656	/* Give control back to firmware */
657	igb_release_manageability(adapter);
658	igb_release_hw_control(adapter);
659
660	if (adapter->wol) {
661		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
662		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
663		igb_enable_wakeup(dev);
664	}
665
666	/* Unregister VLAN events */
667	if (adapter->vlan_attach != NULL)
668		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
669	if (adapter->vlan_detach != NULL)
670		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
671
672	ether_ifdetach(adapter->ifp);
673
674	callout_drain(&adapter->timer);
675
676	igb_free_pci_resources(adapter);
677	bus_generic_detach(dev);
678	if_free(ifp);
679
680	igb_free_transmit_structures(adapter);
681	igb_free_receive_structures(adapter);
682
683	IGB_CORE_LOCK_DESTROY(adapter);
684
685	return (0);
686}
687
688/*********************************************************************
689 *
690 *  Shutdown entry point
691 *
692 **********************************************************************/
693
694static int
695igb_shutdown(device_t dev)
696{
697	return igb_suspend(dev);
698}
699
700/*
701 * Suspend/resume device methods.
702 */
703static int
704igb_suspend(device_t dev)
705{
706	struct adapter *adapter = device_get_softc(dev);
707
708	IGB_CORE_LOCK(adapter);
709
710	igb_stop(adapter);
711
712        igb_release_manageability(adapter);
713	igb_release_hw_control(adapter);
714
715        if (adapter->wol) {
716                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
717                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
718                igb_enable_wakeup(dev);
719        }
720
721	IGB_CORE_UNLOCK(adapter);
722
723	return bus_generic_suspend(dev);
724}
725
726static int
727igb_resume(device_t dev)
728{
729	struct adapter *adapter = device_get_softc(dev);
730	struct ifnet *ifp = adapter->ifp;
731
732	IGB_CORE_LOCK(adapter);
733	igb_init_locked(adapter);
734	igb_init_manageability(adapter);
735
736	if ((ifp->if_flags & IFF_UP) &&
737	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
738		igb_start(ifp);
739
740	IGB_CORE_UNLOCK(adapter);
741
742	return bus_generic_resume(dev);
743}
744
745
746/*********************************************************************
747 *  Transmit entry point
748 *
749 *  igb_start is called by the stack to initiate a transmit.
750 *  The driver will remain in this routine as long as there are
751 *  packets to transmit and transmit resources are available.
752 *  In case resources are not available stack is notified and
753 *  the packet is requeued.
754 **********************************************************************/
755
756static void
757igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
758{
759	struct adapter	*adapter = ifp->if_softc;
760	struct mbuf	*m_head;
761
762	IGB_TX_LOCK_ASSERT(txr);
763
764	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
765	    IFF_DRV_RUNNING)
766		return;
767	if (!adapter->link_active)
768		return;
769
770	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
771
772		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
773		if (m_head == NULL)
774			break;
775		/*
776		 *  Encapsulation can modify our pointer, and or make it
777		 *  NULL on failure.  In that event, we can't requeue.
778		 */
779		if (igb_xmit(txr, &m_head)) {
780			if (m_head == NULL)
781				break;
782			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
783			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
784			break;
785		}
786
787		/* Send a copy of the frame to the BPF listener */
788		ETHER_BPF_MTAP(ifp, m_head);
789
790		/* Set timeout in case hardware has problems transmitting. */
791		txr->watchdog_timer = IGB_TX_TIMEOUT;
792	}
793}
794
795/*
796 * Legacy TX driver routine, called from the
797 * stack, always uses tx[0], and spins for it.
798 * Should not be used with multiqueue tx
799 */
800static void
801igb_start(struct ifnet *ifp)
802{
803	struct adapter	*adapter = ifp->if_softc;
804	struct tx_ring	*txr = adapter->tx_rings;
805
806	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
807		IGB_TX_LOCK(txr);
808		igb_start_locked(txr, ifp);
809		IGB_TX_UNLOCK(txr);
810	}
811	return;
812}
813
814#if __FreeBSD_version >= 800000
815/*
816** Multiqueue Transmit driver
817**
818*/
819static int
820igb_mq_start(struct ifnet *ifp, struct mbuf *m)
821{
822	struct adapter	*adapter = ifp->if_softc;
823	struct tx_ring	*txr;
824	int 		i = 0, err = 0;
825
826	/* Which queue to use */
827	if ((m->m_flags & M_FLOWID) != 0)
828		i = m->m_pkthdr.flowid % adapter->num_queues;
829	txr = &adapter->tx_rings[i];
830
831	if (IGB_TX_TRYLOCK(txr)) {
832		err = igb_mq_start_locked(ifp, txr, m);
833		IGB_TX_UNLOCK(txr);
834	} else
835		err = drbr_enqueue(ifp, txr->br, m);
836
837	return (err);
838}
839
840static int
841igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
842{
843	struct adapter  *adapter = txr->adapter;
844        struct mbuf     *next;
845        int             err = 0;
846
847	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
848		err = drbr_enqueue(ifp, txr->br, m);
849		return (err);
850	}
851
852	if (m == NULL) /* Called by tasklet */
853		goto process;
854
855	/* If nothing queued go right to xmit */
856	if (drbr_empty(ifp, txr->br)) {
857		if ((err = igb_xmit(txr, &m)) != 0) {
858			if (m != NULL)
859				err = drbr_enqueue(ifp, txr->br, m);
860			return (err);
861		} else {
862			/* Success, update stats */
863			drbr_stats_update(ifp, m->m_pkthdr.len, m->m_flags);
864			/* Send a copy of the frame to the BPF listener */
865			ETHER_BPF_MTAP(ifp, m);
866			/* Set the watchdog */
867			txr->watchdog_timer = IGB_TX_TIMEOUT;
868                }
869
870        } else if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
871		return (err);
872
873process:
874	if (drbr_empty(ifp, txr->br))
875		return (err);
876
877	/* Process the queue */
878	while (TRUE) {
879		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
880			break;
881		next = drbr_dequeue(ifp, txr->br);
882		if (next == NULL)
883			break;
884		if ((err = igb_xmit(txr, &next)) != 0) {
885			if (next != NULL)
886				err = drbr_enqueue(ifp, txr->br, next);
887			break;
888		}
889		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
890		ETHER_BPF_MTAP(ifp, next);
891		/* Set the watchdog */
892		txr->watchdog_timer = IGB_TX_TIMEOUT;
893	}
894
895	if (txr->tx_avail <= IGB_TX_OP_THRESHOLD)
896		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
897
898	return (err);
899}
900
901/*
902** Flush all ring buffers
903*/
904static void
905igb_qflush(struct ifnet *ifp)
906{
907	struct adapter	*adapter = ifp->if_softc;
908	struct tx_ring	*txr = adapter->tx_rings;
909	struct mbuf	*m;
910
911	for (int i = 0; i < adapter->num_queues; i++, txr++) {
912		IGB_TX_LOCK(txr);
913		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
914			m_freem(m);
915		IGB_TX_UNLOCK(txr);
916	}
917	if_qflush(ifp);
918}
919#endif /* __FreeBSD_version >= 800000 */
920
921/*********************************************************************
922 *  Ioctl entry point
923 *
924 *  igb_ioctl is called when the user wants to configure the
925 *  interface.
926 *
927 *  return 0 on success, positive on failure
928 **********************************************************************/
929
930static int
931igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
932{
933	struct adapter	*adapter = ifp->if_softc;
934	struct ifreq *ifr = (struct ifreq *)data;
935#ifdef INET
936	struct ifaddr *ifa = (struct ifaddr *)data;
937#endif
938	int error = 0;
939
940	if (adapter->in_detach)
941		return (error);
942
943	switch (command) {
944	case SIOCSIFADDR:
945#ifdef INET
946		if (ifa->ifa_addr->sa_family == AF_INET) {
947			/*
948			 * XXX
949			 * Since resetting hardware takes a very long time
950			 * and results in link renegotiation we only
951			 * initialize the hardware only when it is absolutely
952			 * required.
953			 */
954			ifp->if_flags |= IFF_UP;
955			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
956				IGB_CORE_LOCK(adapter);
957				igb_init_locked(adapter);
958				IGB_CORE_UNLOCK(adapter);
959			}
960			if (!(ifp->if_flags & IFF_NOARP))
961				arp_ifinit(ifp, ifa);
962		} else
963#endif
964			error = ether_ioctl(ifp, command, data);
965		break;
966	case SIOCSIFMTU:
967	    {
968		int max_frame_size;
969
970		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
971
972		IGB_CORE_LOCK(adapter);
973		max_frame_size = 9234;
974		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
975		    ETHER_CRC_LEN) {
976			IGB_CORE_UNLOCK(adapter);
977			error = EINVAL;
978			break;
979		}
980
981		ifp->if_mtu = ifr->ifr_mtu;
982		adapter->max_frame_size =
983		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
984		igb_init_locked(adapter);
985		IGB_CORE_UNLOCK(adapter);
986		break;
987	    }
988	case SIOCSIFFLAGS:
989		IOCTL_DEBUGOUT("ioctl rcv'd:\
990		    SIOCSIFFLAGS (Set Interface Flags)");
991		IGB_CORE_LOCK(adapter);
992		if (ifp->if_flags & IFF_UP) {
993			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
994				if ((ifp->if_flags ^ adapter->if_flags) &
995				    (IFF_PROMISC | IFF_ALLMULTI)) {
996					igb_disable_promisc(adapter);
997					igb_set_promisc(adapter);
998				}
999			} else
1000				igb_init_locked(adapter);
1001		} else
1002			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1003				igb_stop(adapter);
1004		adapter->if_flags = ifp->if_flags;
1005		IGB_CORE_UNLOCK(adapter);
1006		break;
1007	case SIOCADDMULTI:
1008	case SIOCDELMULTI:
1009		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1010		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1011			IGB_CORE_LOCK(adapter);
1012			igb_disable_intr(adapter);
1013			igb_set_multi(adapter);
1014				igb_enable_intr(adapter);
1015			IGB_CORE_UNLOCK(adapter);
1016		}
1017		break;
1018	case SIOCSIFMEDIA:
1019		/* Check SOL/IDER usage */
1020		IGB_CORE_LOCK(adapter);
1021		if (e1000_check_reset_block(&adapter->hw)) {
1022			IGB_CORE_UNLOCK(adapter);
1023			device_printf(adapter->dev, "Media change is"
1024			    " blocked due to SOL/IDER session.\n");
1025			break;
1026		}
1027		IGB_CORE_UNLOCK(adapter);
1028	case SIOCGIFMEDIA:
1029		IOCTL_DEBUGOUT("ioctl rcv'd: \
1030		    SIOCxIFMEDIA (Get/Set Interface Media)");
1031		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1032		break;
1033	case SIOCSIFCAP:
1034	    {
1035		int mask, reinit;
1036
1037		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1038		reinit = 0;
1039		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1040		if (mask & IFCAP_HWCSUM) {
1041			ifp->if_capenable ^= IFCAP_HWCSUM;
1042			reinit = 1;
1043		}
1044		if (mask & IFCAP_TSO4) {
1045			ifp->if_capenable ^= IFCAP_TSO4;
1046			reinit = 1;
1047		}
1048		if (mask & IFCAP_VLAN_HWTAGGING) {
1049			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1050			reinit = 1;
1051		}
1052		if (mask & IFCAP_LRO) {
1053			ifp->if_capenable ^= IFCAP_LRO;
1054			reinit = 1;
1055		}
1056		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1057			igb_init(adapter);
1058		VLAN_CAPABILITIES(ifp);
1059		break;
1060	    }
1061
1062#ifdef IGB_IEEE1588
1063	/*
1064	** IOCTL support for Precision Time (IEEE 1588) Support
1065	*/
1066	case SIOCSHWTSTAMP:
1067		error = igb_hwtstamp_ioctl(adapter, ifp);
1068		break;
1069#endif
1070
1071	default:
1072		error = ether_ioctl(ifp, command, data);
1073		break;
1074	}
1075
1076	return (error);
1077}
1078
1079/*********************************************************************
1080 *  Watchdog timer:
1081 *
1082 *  This routine is called from the local timer every second.
1083 *  As long as transmit descriptors are being cleaned the value
1084 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1085 *  and we then reset the device.
1086 *
1087 **********************************************************************/
1088
1089static void
1090igb_watchdog(struct adapter *adapter)
1091{
1092	struct tx_ring	*txr = adapter->tx_rings;
1093	bool		tx_hang = FALSE;
1094
1095	IGB_CORE_LOCK_ASSERT(adapter);
1096
1097	/*
1098	** The timer is set to 5 every time start() queues a packet.
1099	** Then txeof keeps resetting it as long as it cleans at
1100	** least one descriptor.
1101	** Finally, anytime all descriptors are clean the timer is
1102	** set to 0.
1103	**
1104	** With TX Multiqueue we need to check every queue's timer,
1105	** if any time out we do the reset.
1106	*/
1107	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1108		IGB_TX_LOCK(txr);
1109		if (txr->watchdog_timer == 0 ||
1110		    (--txr->watchdog_timer)) {
1111			IGB_TX_UNLOCK(txr);
1112			continue;
1113		} else {
1114			tx_hang = TRUE;
1115			IGB_TX_UNLOCK(txr);
1116			break;
1117		}
1118	}
1119	if (tx_hang == FALSE)
1120		return;
1121
1122	/* If we are in this routine because of pause frames, then
1123	 * don't reset the hardware.
1124	 */
1125	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1126	    E1000_STATUS_TXOFF) {
1127		txr = adapter->tx_rings; /* reset pointer */
1128		for (int i = 0; i < adapter->num_queues; i++, txr++) {
1129			IGB_TX_LOCK(txr);
1130			txr->watchdog_timer = IGB_TX_TIMEOUT;
1131			IGB_TX_UNLOCK(txr);
1132		}
1133		return;
1134	}
1135
1136	if (e1000_check_for_link(&adapter->hw) == 0)
1137		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1138
1139	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1140		device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
1141		    i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
1142		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
1143		device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
1144		    " Next Desc to Clean = %d\n", i, txr->tx_avail,
1145		    txr->next_to_clean);
1146	}
1147
1148	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1149	adapter->watchdog_events++;
1150
1151	igb_init_locked(adapter);
1152}
1153
1154/*********************************************************************
1155 *  Init entry point
1156 *
1157 *  This routine is used in two ways. It is used by the stack as
1158 *  init entry point in network interface structure. It is also used
1159 *  by the driver as a hw/sw initialization routine to get to a
1160 *  consistent state.
1161 *
1162 *  return 0 on success, positive on failure
1163 **********************************************************************/
1164
1165static void
1166igb_init_locked(struct adapter *adapter)
1167{
1168	struct rx_ring *rxr = adapter->rx_rings;
1169	struct tx_ring *txr = adapter->tx_rings;
1170	struct ifnet	*ifp = adapter->ifp;
1171	device_t	dev = adapter->dev;
1172	u32		pba = 0;
1173
1174	INIT_DEBUGOUT("igb_init: begin");
1175
1176	IGB_CORE_LOCK_ASSERT(adapter);
1177
1178	igb_stop(adapter);
1179
1180	/*
1181	 * Packet Buffer Allocation (PBA)
1182	 * Writing PBA sets the receive portion of the buffer
1183	 * the remainder is used for the transmit buffer.
1184	 */
1185	if (adapter->hw.mac.type == e1000_82575) {
1186		INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1187		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1188		E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1189	}
1190
1191	/* Get the latest mac address, User can use a LAA */
1192        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1193              ETHER_ADDR_LEN);
1194
1195	/* Put the address into the Receive Address Array */
1196	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1197
1198	/* Initialize the hardware */
1199	if (igb_hardware_init(adapter)) {
1200		device_printf(dev, "Unable to initialize the hardware\n");
1201		return;
1202	}
1203	igb_update_link_status(adapter);
1204
1205	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1206
1207	/* Set hardware offload abilities */
1208	ifp->if_hwassist = 0;
1209	if (ifp->if_capenable & IFCAP_TXCSUM) {
1210		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1211#if __FreeBSD_version >= 800000
1212		if (adapter->hw.mac.type == e1000_82576)
1213			ifp->if_hwassist |= CSUM_SCTP;
1214#endif
1215	}
1216
1217	if (ifp->if_capenable & IFCAP_TSO4)
1218		ifp->if_hwassist |= CSUM_TSO;
1219
1220	/* Configure for OS presence */
1221	igb_init_manageability(adapter);
1222
1223	/* Prepare transmit descriptors and buffers */
1224	igb_setup_transmit_structures(adapter);
1225	igb_initialize_transmit_units(adapter);
1226
1227	/* Setup Multicast table */
1228	igb_set_multi(adapter);
1229
1230	/*
1231	** Figure out the desired mbuf pool
1232	** for doing jumbo/packetsplit
1233	*/
1234	if (ifp->if_mtu > ETHERMTU)
1235		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1236	else
1237		adapter->rx_mbuf_sz = MCLBYTES;
1238
1239	/* Prepare receive descriptors and buffers */
1240	if (igb_setup_receive_structures(adapter)) {
1241		device_printf(dev, "Could not setup receive structures\n");
1242		igb_stop(adapter);
1243		return;
1244	}
1245	igb_initialize_receive_units(adapter);
1246
1247	/* Don't lose promiscuous settings */
1248	igb_set_promisc(adapter);
1249
1250	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1251	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1252
1253	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1254	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1255
1256	if (adapter->msix > 1) /* Set up queue routing */
1257		igb_configure_queues(adapter);
1258
1259	/* Set up VLAN tag offload and filter */
1260	igb_setup_vlan_hw_support(adapter);
1261
1262        /* Set default RX interrupt moderation */
1263	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1264		E1000_WRITE_REG(&adapter->hw,
1265		    E1000_EITR(rxr->msix), igb_ave_latency);
1266		rxr->eitr_setting = igb_ave_latency;
1267	}
1268
1269	/* Set TX interrupt rate & reset TX watchdog */
1270	for (int i = 0; i < adapter->num_queues; i++, txr++) {
1271		E1000_WRITE_REG(&adapter->hw,
1272		    E1000_EITR(txr->msix), igb_ave_latency);
1273		txr->watchdog_timer = FALSE;
1274	}
1275
1276	{
1277		/* this clears any pending interrupts */
1278		E1000_READ_REG(&adapter->hw, E1000_ICR);
1279		igb_enable_intr(adapter);
1280		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1281	}
1282
1283	/* Don't reset the phy next time init gets called */
1284	adapter->hw.phy.reset_disable = TRUE;
1285}
1286
1287static void
1288igb_init(void *arg)
1289{
1290	struct adapter *adapter = arg;
1291
1292	IGB_CORE_LOCK(adapter);
1293	igb_init_locked(adapter);
1294	IGB_CORE_UNLOCK(adapter);
1295}
1296
1297
1298static void
1299igb_handle_rxtx(void *context, int pending)
1300{
1301	struct adapter	*adapter = context;
1302	struct tx_ring	*txr = adapter->tx_rings;
1303	struct rx_ring	*rxr = adapter->rx_rings;
1304	struct ifnet	*ifp;
1305
1306	ifp = adapter->ifp;
1307
1308	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1309		if (igb_rxeof(rxr, adapter->rx_process_limit))
1310			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1311		IGB_TX_LOCK(txr);
1312		igb_txeof(txr);
1313
1314#if __FreeBSD_version >= 800000
1315		if (!drbr_empty(ifp, txr->br))
1316			igb_mq_start_locked(ifp, txr, NULL);
1317#else
1318		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1319			igb_start_locked(txr, ifp);
1320#endif
1321		IGB_TX_UNLOCK(txr);
1322	}
1323
1324	igb_enable_intr(adapter);
1325}
1326
1327static void
1328igb_handle_rx(void *context, int pending)
1329{
1330	struct rx_ring  *rxr = context;
1331	struct adapter  *adapter = rxr->adapter;
1332	struct ifnet    *ifp = adapter->ifp;
1333
1334	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1335		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1336			/* More to clean, schedule another task */
1337			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1338
1339}
1340
1341static void
1342igb_handle_tx(void *context, int pending)
1343{
1344	struct tx_ring  *txr = context;
1345	struct adapter  *adapter = txr->adapter;
1346	struct ifnet    *ifp = adapter->ifp;
1347
1348	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1349		IGB_TX_LOCK(txr);
1350		igb_txeof(txr);
1351#if __FreeBSD_version >= 800000
1352		if (!drbr_empty(ifp, txr->br))
1353			igb_mq_start_locked(ifp, txr, NULL);
1354#else
1355		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1356			igb_start_locked(txr, ifp);
1357#endif
1358		IGB_TX_UNLOCK(txr);
1359	}
1360}
1361
1362
1363/*********************************************************************
1364 *
1365 *  MSI/Legacy Deferred
1366 *  Interrupt Service routine
1367 *
1368 *********************************************************************/
1369static int
1370igb_irq_fast(void *arg)
1371{
1372	struct adapter	*adapter = arg;
1373	uint32_t	reg_icr;
1374
1375
1376	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1377
1378	/* Hot eject?  */
1379	if (reg_icr == 0xffffffff)
1380		return FILTER_STRAY;
1381
1382	/* Definitely not our interrupt.  */
1383	if (reg_icr == 0x0)
1384		return FILTER_STRAY;
1385
1386	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1387		return FILTER_STRAY;
1388
1389	/*
1390	 * Mask interrupts until the taskqueue is finished running.  This is
1391	 * cheap, just assume that it is needed.  This also works around the
1392	 * MSI message reordering errata on certain systems.
1393	 */
1394	igb_disable_intr(adapter);
1395	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1396
1397	/* Link status change */
1398	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1399		adapter->hw.mac.get_link_status = 1;
1400		igb_update_link_status(adapter);
1401	}
1402
1403	if (reg_icr & E1000_ICR_RXO)
1404		adapter->rx_overruns++;
1405	return FILTER_HANDLED;
1406}
1407
1408
1409/*********************************************************************
1410 *
1411 *  MSIX TX Interrupt Service routine
1412 *
1413 **********************************************************************/
1414static void
1415igb_msix_tx(void *arg)
1416{
1417	struct tx_ring *txr = arg;
1418	struct adapter *adapter = txr->adapter;
1419	u32		loop = IGB_MAX_LOOP;
1420	bool		more;
1421
1422	++txr->tx_irq;
1423	IGB_TX_LOCK(txr);
1424
1425	do {
1426		more = igb_txeof(txr);
1427	} while (loop-- && more);
1428
1429	IGB_TX_UNLOCK(txr);
1430
1431	/* Schedule a clean task */
1432	taskqueue_enqueue(adapter->tq, &txr->tx_task);
1433
1434	/* Reenable this interrupt */
1435	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1436	return;
1437}
1438
1439/*********************************************************************
1440 *
1441 *  MSIX RX Interrupt Service routine
1442 *
1443 **********************************************************************/
1444
1445static void
1446igb_msix_rx(void *arg)
1447{
1448	struct rx_ring *rxr = arg;
1449	struct adapter *adapter = rxr->adapter;
1450	u32		loop = IGB_MAX_LOOP;
1451	bool		more;
1452
1453	++rxr->rx_irq;
1454	do {
1455		more = igb_rxeof(rxr, adapter->rx_process_limit);
1456	} while (loop-- && more);
1457
1458	/* Update interrupt rate */
1459	if (igb_enable_aim == TRUE)
1460		igb_update_aim(rxr);
1461
1462	/* Schedule another clean */
1463	taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1464
1465	/* Reenable this interrupt */
1466	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1467	return;
1468}
1469
1470
1471/*********************************************************************
1472 *
1473 *  MSIX Link Interrupt Service routine
1474 *
1475 **********************************************************************/
1476
1477static void
1478igb_msix_link(void *arg)
1479{
1480	struct adapter	*adapter = arg;
1481	u32       	icr;
1482
1483	++adapter->link_irq;
1484	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1485	if (!(icr & E1000_ICR_LSC))
1486		goto spurious;
1487	adapter->hw.mac.get_link_status = 1;
1488	igb_update_link_status(adapter);
1489
1490spurious:
1491	/* Rearm */
1492	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1493	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1494	return;
1495}
1496
1497
1498/*
1499** Routine to adjust the RX EITR value based on traffic,
1500** its a simple three state model, but seems to help.
1501**
1502** Note that the three EITR values are tuneable using
1503** sysctl in real time. The feature can be effectively
1504** nullified by setting them equal.
1505*/
1506#define BULK_THRESHOLD	10000
1507#define AVE_THRESHOLD	1600
1508
1509static void
1510igb_update_aim(struct rx_ring *rxr)
1511{
1512	struct adapter	*adapter = rxr->adapter;
1513	u32		olditr, newitr;
1514
1515	/* Update interrupt moderation based on traffic */
1516	olditr = rxr->eitr_setting;
1517	newitr = olditr;
1518
1519	/* Idle, don't change setting */
1520	if (rxr->bytes == 0)
1521		return;
1522
1523	if (olditr == igb_low_latency) {
1524		if (rxr->bytes > AVE_THRESHOLD)
1525			newitr = igb_ave_latency;
1526	} else if (olditr == igb_ave_latency) {
1527		if (rxr->bytes < AVE_THRESHOLD)
1528			newitr = igb_low_latency;
1529		else if (rxr->bytes > BULK_THRESHOLD)
1530			newitr = igb_bulk_latency;
1531	} else if (olditr == igb_bulk_latency) {
1532		if (rxr->bytes < BULK_THRESHOLD)
1533			newitr = igb_ave_latency;
1534	}
1535
1536	if (olditr != newitr) {
1537		/* Change interrupt rate */
1538		rxr->eitr_setting = newitr;
1539		if (adapter->hw.mac.type == e1000_82575)
1540			newitr |= newitr << 16;
1541		else
1542			newitr |= 0x8000000;
1543		E1000_WRITE_REG(&adapter->hw, E1000_EITR(rxr->me), newitr);
1544	}
1545
1546	rxr->bytes = 0;
1547        return;
1548}
1549
1550
1551/*********************************************************************
1552 *
1553 *  Media Ioctl callback
1554 *
1555 *  This routine is called whenever the user queries the status of
1556 *  the interface using ifconfig.
1557 *
1558 **********************************************************************/
1559static void
1560igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1561{
1562	struct adapter *adapter = ifp->if_softc;
1563	u_char fiber_type = IFM_1000_SX;
1564
1565	INIT_DEBUGOUT("igb_media_status: begin");
1566
1567	IGB_CORE_LOCK(adapter);
1568	igb_update_link_status(adapter);
1569
1570	ifmr->ifm_status = IFM_AVALID;
1571	ifmr->ifm_active = IFM_ETHER;
1572
1573	if (!adapter->link_active) {
1574		IGB_CORE_UNLOCK(adapter);
1575		return;
1576	}
1577
1578	ifmr->ifm_status |= IFM_ACTIVE;
1579
1580	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1581	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1582		ifmr->ifm_active |= fiber_type | IFM_FDX;
1583	else {
1584		switch (adapter->link_speed) {
1585		case 10:
1586			ifmr->ifm_active |= IFM_10_T;
1587			break;
1588		case 100:
1589			ifmr->ifm_active |= IFM_100_TX;
1590			break;
1591		case 1000:
1592			ifmr->ifm_active |= IFM_1000_T;
1593			break;
1594		}
1595		if (adapter->link_duplex == FULL_DUPLEX)
1596			ifmr->ifm_active |= IFM_FDX;
1597		else
1598			ifmr->ifm_active |= IFM_HDX;
1599	}
1600	IGB_CORE_UNLOCK(adapter);
1601}
1602
1603/*********************************************************************
1604 *
1605 *  Media Ioctl callback
1606 *
1607 *  This routine is called when the user changes speed/duplex using
1608 *  media/mediopt option with ifconfig.
1609 *
1610 **********************************************************************/
1611static int
1612igb_media_change(struct ifnet *ifp)
1613{
1614	struct adapter *adapter = ifp->if_softc;
1615	struct ifmedia  *ifm = &adapter->media;
1616
1617	INIT_DEBUGOUT("igb_media_change: begin");
1618
1619	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1620		return (EINVAL);
1621
1622	IGB_CORE_LOCK(adapter);
1623	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1624	case IFM_AUTO:
1625		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1626		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1627		break;
1628	case IFM_1000_LX:
1629	case IFM_1000_SX:
1630	case IFM_1000_T:
1631		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1632		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1633		break;
1634	case IFM_100_TX:
1635		adapter->hw.mac.autoneg = FALSE;
1636		adapter->hw.phy.autoneg_advertised = 0;
1637		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1638			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1639		else
1640			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1641		break;
1642	case IFM_10_T:
1643		adapter->hw.mac.autoneg = FALSE;
1644		adapter->hw.phy.autoneg_advertised = 0;
1645		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1646			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1647		else
1648			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1649		break;
1650	default:
1651		device_printf(adapter->dev, "Unsupported media type\n");
1652	}
1653
1654	/* As the speed/duplex settings my have changed we need to
1655	 * reset the PHY.
1656	 */
1657	adapter->hw.phy.reset_disable = FALSE;
1658
1659	igb_init_locked(adapter);
1660	IGB_CORE_UNLOCK(adapter);
1661
1662	return (0);
1663}
1664
1665
1666/*********************************************************************
1667 *
1668 *  This routine maps the mbufs to Advanced TX descriptors.
1669 *  used by the 82575 adapter.
1670 *
1671 **********************************************************************/
1672
1673static int
1674igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1675{
1676	struct adapter		*adapter = txr->adapter;
1677	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1678	bus_dmamap_t		map;
1679	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1680	union e1000_adv_tx_desc	*txd = NULL;
1681	struct mbuf		*m_head;
1682	u32			olinfo_status = 0, cmd_type_len = 0;
1683	int			nsegs, i, j, error, first, last = 0;
1684	u32			hdrlen = 0;
1685
1686	m_head = *m_headp;
1687
1688
1689	/* Set basic descriptor constants */
1690	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1691	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1692	if (m_head->m_flags & M_VLANTAG)
1693		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1694
1695        /*
1696         * Force a cleanup if number of TX descriptors
1697         * available hits the threshold
1698         */
1699	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1700		igb_txeof(txr);
1701		/* Now do we at least have a minimal? */
1702		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1703			txr->no_desc_avail++;
1704			return (ENOBUFS);
1705		}
1706	}
1707
1708	/*
1709         * Map the packet for DMA.
1710	 *
1711	 * Capture the first descriptor index,
1712	 * this descriptor will have the index
1713	 * of the EOP which is the only one that
1714	 * now gets a DONE bit writeback.
1715	 */
1716	first = txr->next_avail_desc;
1717	tx_buffer = &txr->tx_buffers[first];
1718	tx_buffer_mapped = tx_buffer;
1719	map = tx_buffer->map;
1720
1721	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1722	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1723
1724	if (error == EFBIG) {
1725		struct mbuf *m;
1726
1727		m = m_defrag(*m_headp, M_DONTWAIT);
1728		if (m == NULL) {
1729			adapter->mbuf_defrag_failed++;
1730			m_freem(*m_headp);
1731			*m_headp = NULL;
1732			return (ENOBUFS);
1733		}
1734		*m_headp = m;
1735
1736		/* Try it again */
1737		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1738		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1739
1740		if (error == ENOMEM) {
1741			adapter->no_tx_dma_setup++;
1742			return (error);
1743		} else if (error != 0) {
1744			adapter->no_tx_dma_setup++;
1745			m_freem(*m_headp);
1746			*m_headp = NULL;
1747			return (error);
1748		}
1749	} else if (error == ENOMEM) {
1750		adapter->no_tx_dma_setup++;
1751		return (error);
1752	} else if (error != 0) {
1753		adapter->no_tx_dma_setup++;
1754		m_freem(*m_headp);
1755		*m_headp = NULL;
1756		return (error);
1757	}
1758
1759	/* Check again to be sure we have enough descriptors */
1760        if (nsegs > (txr->tx_avail - 2)) {
1761                txr->no_desc_avail++;
1762		bus_dmamap_unload(txr->txtag, map);
1763		return (ENOBUFS);
1764        }
1765	m_head = *m_headp;
1766
1767        /*
1768         * Set up the context descriptor:
1769         * used when any hardware offload is done.
1770	 * This includes CSUM, VLAN, and TSO. It
1771	 * will use the first descriptor.
1772         */
1773        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1774		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1775			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1776			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1777			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1778		} else
1779			return (ENXIO);
1780	} else if (igb_tx_ctx_setup(txr, m_head))
1781		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1782
1783#ifdef IGB_IEEE1588
1784	/* This is changing soon to an mtag detection */
1785	if (we detect this mbuf has a TSTAMP mtag)
1786		cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
1787#endif
1788	/* Calculate payload length */
1789	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1790	    << E1000_ADVTXD_PAYLEN_SHIFT);
1791
1792	/* Set up our transmit descriptors */
1793	i = txr->next_avail_desc;
1794	for (j = 0; j < nsegs; j++) {
1795		bus_size_t seg_len;
1796		bus_addr_t seg_addr;
1797
1798		tx_buffer = &txr->tx_buffers[i];
1799		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1800		seg_addr = segs[j].ds_addr;
1801		seg_len  = segs[j].ds_len;
1802
1803		txd->read.buffer_addr = htole64(seg_addr);
1804		txd->read.cmd_type_len = htole32(
1805		    adapter->txd_cmd | cmd_type_len | seg_len);
1806		txd->read.olinfo_status = htole32(olinfo_status);
1807		last = i;
1808		if (++i == adapter->num_tx_desc)
1809			i = 0;
1810		tx_buffer->m_head = NULL;
1811		tx_buffer->next_eop = -1;
1812	}
1813
1814	txr->next_avail_desc = i;
1815	txr->tx_avail -= nsegs;
1816
1817        tx_buffer->m_head = m_head;
1818	tx_buffer_mapped->map = tx_buffer->map;
1819	tx_buffer->map = map;
1820        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1821
1822        /*
1823         * Last Descriptor of Packet
1824	 * needs End Of Packet (EOP)
1825	 * and Report Status (RS)
1826         */
1827        txd->read.cmd_type_len |=
1828	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1829	/*
1830	 * Keep track in the first buffer which
1831	 * descriptor will be written back
1832	 */
1833	tx_buffer = &txr->tx_buffers[first];
1834	tx_buffer->next_eop = last;
1835
1836	/*
1837	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1838	 * that this frame is available to transmit.
1839	 */
1840	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1841	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1842	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1843	++txr->tx_packets;
1844
1845	return (0);
1846
1847}
1848
1849static void
1850igb_set_promisc(struct adapter *adapter)
1851{
1852	struct ifnet	*ifp = adapter->ifp;
1853	uint32_t	reg_rctl;
1854
1855	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1856
1857	if (ifp->if_flags & IFF_PROMISC) {
1858		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1859		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1860	} else if (ifp->if_flags & IFF_ALLMULTI) {
1861		reg_rctl |= E1000_RCTL_MPE;
1862		reg_rctl &= ~E1000_RCTL_UPE;
1863		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1864	}
1865}
1866
1867static void
1868igb_disable_promisc(struct adapter *adapter)
1869{
1870	uint32_t	reg_rctl;
1871
1872	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1873
1874	reg_rctl &=  (~E1000_RCTL_UPE);
1875	reg_rctl &=  (~E1000_RCTL_MPE);
1876	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1877}
1878
1879
1880/*********************************************************************
1881 *  Multicast Update
1882 *
1883 *  This routine is called whenever multicast address list is updated.
1884 *
1885 **********************************************************************/
1886
1887static void
1888igb_set_multi(struct adapter *adapter)
1889{
1890	struct ifnet	*ifp = adapter->ifp;
1891	struct ifmultiaddr *ifma;
1892	u32 reg_rctl = 0;
1893	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1894
1895	int mcnt = 0;
1896
1897	IOCTL_DEBUGOUT("igb_set_multi: begin");
1898
1899	if_maddr_rlock(ifp);
1900	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1901		if (ifma->ifma_addr->sa_family != AF_LINK)
1902			continue;
1903
1904		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1905			break;
1906
1907		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1908		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1909		mcnt++;
1910	}
1911	if_maddr_runlock(ifp);
1912
1913	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1914		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1915		reg_rctl |= E1000_RCTL_MPE;
1916		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1917	} else
1918		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1919}
1920
1921
1922/*********************************************************************
1923 *  Timer routine
1924 *
1925 *  This routine checks for link status and updates statistics.
1926 *
1927 **********************************************************************/
1928
1929static void
1930igb_local_timer(void *arg)
1931{
1932	struct adapter	*adapter = arg;
1933	struct ifnet	*ifp = adapter->ifp;
1934
1935	IGB_CORE_LOCK_ASSERT(adapter);
1936
1937	igb_update_link_status(adapter);
1938	igb_update_stats_counters(adapter);
1939
1940	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1941		igb_print_hw_stats(adapter);
1942
1943	/*
1944	 * Each second we check the watchdog to
1945	 * protect against hardware hangs.
1946	 */
1947	igb_watchdog(adapter);
1948
1949	/* Trigger an RX interrupt on all queues */
1950	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1951
1952	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1953
1954}
1955
1956static void
1957igb_update_link_status(struct adapter *adapter)
1958{
1959	struct e1000_hw *hw = &adapter->hw;
1960	struct ifnet *ifp = adapter->ifp;
1961	device_t dev = adapter->dev;
1962	struct tx_ring *txr = adapter->tx_rings;
1963	u32 link_check = 0;
1964
1965	/* Get the cached link value or read for real */
1966        switch (hw->phy.media_type) {
1967        case e1000_media_type_copper:
1968                if (hw->mac.get_link_status) {
1969			/* Do the work to read phy */
1970                        e1000_check_for_link(hw);
1971                        link_check = !hw->mac.get_link_status;
1972                } else
1973                        link_check = TRUE;
1974                break;
1975        case e1000_media_type_fiber:
1976                e1000_check_for_link(hw);
1977                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1978                                 E1000_STATUS_LU);
1979                break;
1980        case e1000_media_type_internal_serdes:
1981                e1000_check_for_link(hw);
1982                link_check = adapter->hw.mac.serdes_has_link;
1983                break;
1984        default:
1985        case e1000_media_type_unknown:
1986                break;
1987        }
1988
1989	/* Now we check if a transition has happened */
1990	if (link_check && (adapter->link_active == 0)) {
1991		e1000_get_speed_and_duplex(&adapter->hw,
1992		    &adapter->link_speed, &adapter->link_duplex);
1993		if (bootverbose)
1994			device_printf(dev, "Link is up %d Mbps %s\n",
1995			    adapter->link_speed,
1996			    ((adapter->link_duplex == FULL_DUPLEX) ?
1997			    "Full Duplex" : "Half Duplex"));
1998		adapter->link_active = 1;
1999		ifp->if_baudrate = adapter->link_speed * 1000000;
2000		if_link_state_change(ifp, LINK_STATE_UP);
2001	} else if (!link_check && (adapter->link_active == 1)) {
2002		ifp->if_baudrate = adapter->link_speed = 0;
2003		adapter->link_duplex = 0;
2004		if (bootverbose)
2005			device_printf(dev, "Link is Down\n");
2006		adapter->link_active = 0;
2007		if_link_state_change(ifp, LINK_STATE_DOWN);
2008		/* Turn off watchdogs */
2009		for (int i = 0; i < adapter->num_queues; i++, txr++)
2010			txr->watchdog_timer = FALSE;
2011	}
2012}
2013
2014/*********************************************************************
2015 *
2016 *  This routine disables all traffic on the adapter by issuing a
2017 *  global reset on the MAC and deallocates TX/RX buffers.
2018 *
2019 **********************************************************************/
2020
2021static void
2022igb_stop(void *arg)
2023{
2024	struct adapter	*adapter = arg;
2025	struct ifnet	*ifp = adapter->ifp;
2026
2027	IGB_CORE_LOCK_ASSERT(adapter);
2028
2029	INIT_DEBUGOUT("igb_stop: begin");
2030
2031	igb_disable_intr(adapter);
2032
2033	callout_stop(&adapter->timer);
2034
2035	/* Tell the stack that the interface is no longer active */
2036	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
2037
2038	e1000_reset_hw(&adapter->hw);
2039	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2040}
2041
2042
2043/*********************************************************************
2044 *
2045 *  Determine hardware revision.
2046 *
2047 **********************************************************************/
2048static void
2049igb_identify_hardware(struct adapter *adapter)
2050{
2051	device_t dev = adapter->dev;
2052
2053	/* Make sure our PCI config space has the necessary stuff set */
2054	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2055	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2056	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2057		device_printf(dev, "Memory Access and/or Bus Master bits "
2058		    "were not set!\n");
2059		adapter->hw.bus.pci_cmd_word |=
2060		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2061		pci_write_config(dev, PCIR_COMMAND,
2062		    adapter->hw.bus.pci_cmd_word, 2);
2063	}
2064
2065	/* Save off the information about this board */
2066	adapter->hw.vendor_id = pci_get_vendor(dev);
2067	adapter->hw.device_id = pci_get_device(dev);
2068	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2069	adapter->hw.subsystem_vendor_id =
2070	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2071	adapter->hw.subsystem_device_id =
2072	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2073
2074	/* Do Shared Code Init and Setup */
2075	if (e1000_set_mac_type(&adapter->hw)) {
2076		device_printf(dev, "Setup init failure\n");
2077		return;
2078	}
2079}
2080
2081static int
2082igb_allocate_pci_resources(struct adapter *adapter)
2083{
2084	device_t	dev = adapter->dev;
2085	int		rid;
2086
2087	rid = PCIR_BAR(0);
2088	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2089	    &rid, RF_ACTIVE);
2090	if (adapter->pci_mem == NULL) {
2091		device_printf(dev, "Unable to allocate bus resource: memory\n");
2092		return (ENXIO);
2093	}
2094	adapter->osdep.mem_bus_space_tag =
2095	    rman_get_bustag(adapter->pci_mem);
2096	adapter->osdep.mem_bus_space_handle =
2097	    rman_get_bushandle(adapter->pci_mem);
2098	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2099
2100	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2101
2102	/* This will setup either MSI/X or MSI */
2103	adapter->msix = igb_setup_msix(adapter);
2104	adapter->hw.back = &adapter->osdep;
2105
2106	return (0);
2107}
2108
2109/*********************************************************************
2110 *
2111 *  Setup the Legacy or MSI Interrupt handler
2112 *
2113 **********************************************************************/
2114static int
2115igb_allocate_legacy(struct adapter *adapter)
2116{
2117	device_t dev = adapter->dev;
2118	int error, rid = 0;
2119
2120	/* Turn off all interrupts */
2121	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2122
2123	/* MSI RID is 1 */
2124	if (adapter->msix == 1)
2125		rid = 1;
2126
2127	/* We allocate a single interrupt resource */
2128	adapter->res = bus_alloc_resource_any(dev,
2129	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2130	if (adapter->res == NULL) {
2131		device_printf(dev, "Unable to allocate bus resource: "
2132		    "interrupt\n");
2133		return (ENXIO);
2134	}
2135
2136	/*
2137	 * Try allocating a fast interrupt and the associated deferred
2138	 * processing contexts.
2139	 */
2140	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2141	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2142	    taskqueue_thread_enqueue, &adapter->tq);
2143	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2144	    device_get_nameunit(adapter->dev));
2145	if ((error = bus_setup_intr(dev, adapter->res,
2146	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2147	    adapter, &adapter->tag)) != 0) {
2148		device_printf(dev, "Failed to register fast interrupt "
2149			    "handler: %d\n", error);
2150		taskqueue_free(adapter->tq);
2151		adapter->tq = NULL;
2152		return (error);
2153	}
2154
2155	return (0);
2156}
2157
2158
2159/*********************************************************************
2160 *
2161 *  Setup the MSIX Interrupt handlers:
2162 *
2163 **********************************************************************/
2164static int
2165igb_allocate_msix(struct adapter *adapter)
2166{
2167	device_t dev = adapter->dev;
2168	struct tx_ring *txr = adapter->tx_rings;
2169	struct rx_ring *rxr = adapter->rx_rings;
2170	int error, rid, vector = 0;
2171
2172	/*
2173	 * Setup the interrupt handlers
2174	 */
2175
2176	/* TX Setup */
2177	for (int i = 0; i < adapter->num_queues; i++, vector++, txr++) {
2178		rid = vector +1;
2179		txr->res = bus_alloc_resource_any(dev,
2180		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2181		if (txr->res == NULL) {
2182			device_printf(dev,
2183			    "Unable to allocate bus resource: "
2184			    "MSIX TX Interrupt\n");
2185			return (ENXIO);
2186		}
2187		error = bus_setup_intr(dev, txr->res,
2188	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2189		    igb_msix_tx, txr, &txr->tag);
2190		if (error) {
2191			txr->res = NULL;
2192			device_printf(dev, "Failed to register TX handler");
2193			return (error);
2194		}
2195		/* Make tasklet for deferred handling - one per queue */
2196		TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2197		txr->msix = vector;
2198		if (adapter->hw.mac.type == e1000_82575)
2199			txr->eims = E1000_EICR_TX_QUEUE0 << i;
2200		else
2201			txr->eims = 1 << vector;
2202		/*
2203		** Bind the msix vector, and thus the
2204		** ring to the corresponding cpu.
2205		*/
2206		if (adapter->num_queues > 1)
2207			bus_bind_intr(dev, txr->res, i);
2208	}
2209
2210	/* RX Setup */
2211	for (int i = 0; i < adapter->num_queues; i++, vector++, rxr++) {
2212		rid = vector +1;
2213		rxr->res = bus_alloc_resource_any(dev,
2214		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2215		if (rxr->res == NULL) {
2216			device_printf(dev,
2217			    "Unable to allocate bus resource: "
2218			    "MSIX RX Interrupt\n");
2219			return (ENXIO);
2220		}
2221		error = bus_setup_intr(dev, rxr->res,
2222	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2223		    igb_msix_rx, rxr, &rxr->tag);
2224		if (error) {
2225			rxr->res = NULL;
2226			device_printf(dev, "Failed to register RX handler");
2227			return (error);
2228		}
2229		/* Make tasklet for deferred handling - one per queue */
2230		TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2231		rxr->msix = vector;
2232		if (adapter->hw.mac.type == e1000_82575)
2233			rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2234		else
2235			rxr->eims = 1 << vector;
2236		/* Get a mask for local timer */
2237		adapter->rx_mask |= rxr->eims;
2238		/*
2239		** Bind the msix vector, and thus the
2240		** ring to the corresponding cpu.
2241		** Notice that this makes an RX/TX pair
2242		** bound to each CPU, limited by the MSIX
2243		** vectors.
2244		*/
2245		if (adapter->num_queues > 1)
2246			bus_bind_intr(dev, rxr->res, i);
2247	}
2248
2249	/* And Link */
2250	rid = vector +1;
2251	adapter->res = bus_alloc_resource_any(dev,
2252	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2253	if (adapter->res == NULL) {
2254		device_printf(dev,
2255		    "Unable to allocate bus resource: "
2256		    "MSIX Link Interrupt\n");
2257		return (ENXIO);
2258	}
2259	if ((error = bus_setup_intr(dev, adapter->res,
2260	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2261	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2262		device_printf(dev, "Failed to register Link handler");
2263		return (error);
2264	}
2265	adapter->linkvec = vector;
2266	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2267	    taskqueue_thread_enqueue, &adapter->tq);
2268	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2269	    device_get_nameunit(adapter->dev));
2270
2271	return (0);
2272}
2273
2274
2275static void
2276igb_configure_queues(struct adapter *adapter)
2277{
2278	struct	e1000_hw *hw = &adapter->hw;
2279	struct	tx_ring	*txr;
2280	struct	rx_ring	*rxr;
2281
2282	/* Turn on MSIX */
2283	/*
2284	** 82576 uses IVARs to route MSI/X
2285	** interrupts, its not very intuitive,
2286	** study the code carefully :)
2287	*/
2288	if (adapter->hw.mac.type == e1000_82576) {
2289		u32	ivar = 0;
2290		/* First turn on the capability */
2291		E1000_WRITE_REG(hw, E1000_GPIE,
2292		    E1000_GPIE_MSIX_MODE |
2293		    E1000_GPIE_EIAME |
2294		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2295		/* RX */
2296		for (int i = 0; i < adapter->num_queues; i++) {
2297			u32 index = i & 0x7; /* Each IVAR has two entries */
2298			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2299			rxr = &adapter->rx_rings[i];
2300			if (i < 8) {
2301				ivar &= 0xFFFFFF00;
2302				ivar |= rxr->msix | E1000_IVAR_VALID;
2303			} else {
2304				ivar &= 0xFF00FFFF;
2305				ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2306			}
2307			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2308			adapter->eims_mask |= rxr->eims;
2309		}
2310		/* TX */
2311		for (int i = 0; i < adapter->num_queues; i++) {
2312			u32 index = i & 0x7; /* Each IVAR has two entries */
2313			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2314			txr = &adapter->tx_rings[i];
2315			if (i < 8) {
2316				ivar &= 0xFFFF00FF;
2317				ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2318			} else {
2319				ivar &= 0x00FFFFFF;
2320				ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2321			}
2322			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2323			adapter->eims_mask |= txr->eims;
2324		}
2325
2326		/* And for the link interrupt */
2327		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2328		adapter->link_mask = 1 << adapter->linkvec;
2329		adapter->eims_mask |= adapter->link_mask;
2330		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2331	} else
2332	{ /* 82575 */
2333		int tmp;
2334
2335                /* enable MSI-X PBA support*/
2336		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2337                tmp |= E1000_CTRL_EXT_PBA_CLR;
2338                /* Auto-Mask interrupts upon ICR read. */
2339                tmp |= E1000_CTRL_EXT_EIAME;
2340                tmp |= E1000_CTRL_EXT_IRCA;
2341                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2342
2343		/* TX */
2344		for (int i = 0; i < adapter->num_queues; i++) {
2345			txr = &adapter->tx_rings[i];
2346			E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2347			    txr->eims);
2348			adapter->eims_mask |= txr->eims;
2349		}
2350
2351		/* RX */
2352		for (int i = 0; i < adapter->num_queues; i++) {
2353			rxr = &adapter->rx_rings[i];
2354			E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2355			    rxr->eims);
2356			adapter->eims_mask |= rxr->eims;
2357		}
2358
2359		/* Link */
2360		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2361		    E1000_EIMS_OTHER);
2362		adapter->link_mask |= E1000_EIMS_OTHER;
2363		adapter->eims_mask |= adapter->link_mask;
2364	}
2365	return;
2366}
2367
2368
2369static void
2370igb_free_pci_resources(struct adapter *adapter)
2371{
2372	struct          tx_ring *txr = adapter->tx_rings;
2373	struct          rx_ring *rxr = adapter->rx_rings;
2374	device_t	dev = adapter->dev;
2375	int		rid;
2376
2377	/*
2378	** There is a slight possibility of a failure mode
2379	** in attach that will result in entering this function
2380	** before interrupt resources have been initialized, and
2381	** in that case we do not want to execute the loops below
2382	** We can detect this reliably by the state of the adapter
2383	** res pointer.
2384	*/
2385	if (adapter->res == NULL)
2386		goto mem;
2387
2388	/*
2389	 * First release all the TX/RX interrupt resources:
2390	 */
2391	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2392		rid = txr->msix + 1;
2393		if (txr->tag != NULL) {
2394			bus_teardown_intr(dev, txr->res, txr->tag);
2395			txr->tag = NULL;
2396		}
2397		if (txr->res != NULL)
2398			bus_release_resource(dev, SYS_RES_IRQ, rid, txr->res);
2399	}
2400
2401	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
2402		rid = rxr->msix + 1;
2403		if (rxr->tag != NULL) {
2404			bus_teardown_intr(dev, rxr->res, rxr->tag);
2405			rxr->tag = NULL;
2406		}
2407		if (rxr->res != NULL)
2408			bus_release_resource(dev, SYS_RES_IRQ, rid, rxr->res);
2409	}
2410
2411	/* Clean the Legacy or Link interrupt last */
2412	if (adapter->linkvec) /* we are doing MSIX */
2413		rid = adapter->linkvec + 1;
2414	else
2415		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2416
2417	if (adapter->tag != NULL) {
2418		bus_teardown_intr(dev, adapter->res, adapter->tag);
2419		adapter->tag = NULL;
2420	}
2421	if (adapter->res != NULL)
2422		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2423
2424mem:
2425	if (adapter->msix)
2426		pci_release_msi(dev);
2427
2428	if (adapter->msix_mem != NULL)
2429		bus_release_resource(dev, SYS_RES_MEMORY,
2430		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2431
2432	if (adapter->pci_mem != NULL)
2433		bus_release_resource(dev, SYS_RES_MEMORY,
2434		    PCIR_BAR(0), adapter->pci_mem);
2435
2436}
2437
2438/*
2439 * Setup Either MSI/X or MSI
2440 */
2441static int
2442igb_setup_msix(struct adapter *adapter)
2443{
2444	device_t dev = adapter->dev;
2445	int rid, want, queues, msgs;
2446
2447	/* First try MSI/X */
2448	rid = PCIR_BAR(IGB_MSIX_BAR);
2449	adapter->msix_mem = bus_alloc_resource_any(dev,
2450	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2451       	if (!adapter->msix_mem) {
2452		/* May not be enabled */
2453		device_printf(adapter->dev,
2454		    "Unable to map MSIX table \n");
2455		goto msi;
2456	}
2457
2458	msgs = pci_msix_count(dev);
2459	if (msgs == 0) { /* system has msix disabled */
2460		bus_release_resource(dev, SYS_RES_MEMORY,
2461		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2462		adapter->msix_mem = NULL;
2463		goto msi;
2464	}
2465
2466	/* Figure out a reasonable auto config value */
2467	queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2468
2469	if (igb_num_queues == 0)
2470		igb_num_queues = queues;
2471	/*
2472	** Two vectors (RX/TX pair) per queue
2473	** plus an additional for Link interrupt
2474	*/
2475	want = (igb_num_queues * 2) + 1;
2476	if (msgs >= want)
2477		msgs = want;
2478	else {
2479               	device_printf(adapter->dev,
2480		    "MSIX Configuration Problem, "
2481		    "%d vectors configured, but %d queues wanted!\n",
2482		    msgs, want);
2483		return (ENXIO);
2484	}
2485	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2486               	device_printf(adapter->dev,
2487		    "Using MSIX interrupts with %d vectors\n", msgs);
2488		adapter->num_queues = igb_num_queues;
2489		return (msgs);
2490	}
2491msi:
2492       	msgs = pci_msi_count(dev);
2493       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2494               	device_printf(adapter->dev,"Using MSI interrupt\n");
2495	return (msgs);
2496}
2497
2498/*********************************************************************
2499 *
2500 *  Initialize the hardware to a configuration
2501 *  as specified by the adapter structure.
2502 *
2503 **********************************************************************/
2504static int
2505igb_hardware_init(struct adapter *adapter)
2506{
2507	device_t	dev = adapter->dev;
2508	u32		rx_buffer_size;
2509
2510	INIT_DEBUGOUT("igb_hardware_init: begin");
2511
2512	/* Issue a global reset */
2513	e1000_reset_hw(&adapter->hw);
2514
2515	/* Let the firmware know the OS is in control */
2516	igb_get_hw_control(adapter);
2517
2518	/*
2519	 * These parameters control the automatic generation (Tx) and
2520	 * response (Rx) to Ethernet PAUSE frames.
2521	 * - High water mark should allow for at least two frames to be
2522	 *   received after sending an XOFF.
2523	 * - Low water mark works best when it is very near the high water mark.
2524	 *   This allows the receiver to restart by sending XON when it has
2525	 *   drained a bit. Here we use an arbitary value of 1500 which will
2526	 *   restart after one full frame is pulled from the buffer. There
2527	 *   could be several smaller frames in the buffer and if so they will
2528	 *   not trigger the XON until their total number reduces the buffer
2529	 *   by 1500.
2530	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2531	 */
2532	if (adapter->hw.mac.type == e1000_82576)
2533		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2534		    E1000_RXPBS) & 0xffff) << 10 );
2535	else
2536		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2537		    E1000_PBA) & 0xffff) << 10 );
2538
2539	adapter->hw.fc.high_water = rx_buffer_size -
2540	    roundup2(adapter->max_frame_size, 1024);
2541	adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2542
2543	adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2544	adapter->hw.fc.send_xon = TRUE;
2545
2546	/* Set Flow control, use the tunable location if sane */
2547	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2548		adapter->hw.fc.requested_mode = igb_fc_setting;
2549	else
2550		adapter->hw.fc.requested_mode = e1000_fc_none;
2551
2552	if (e1000_init_hw(&adapter->hw) < 0) {
2553		device_printf(dev, "Hardware Initialization Failed\n");
2554		return (EIO);
2555	}
2556
2557	e1000_check_for_link(&adapter->hw);
2558
2559	return (0);
2560}
2561
2562/*********************************************************************
2563 *
2564 *  Setup networking device structure and register an interface.
2565 *
2566 **********************************************************************/
2567static void
2568igb_setup_interface(device_t dev, struct adapter *adapter)
2569{
2570	struct ifnet   *ifp;
2571
2572	INIT_DEBUGOUT("igb_setup_interface: begin");
2573
2574	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2575	if (ifp == NULL)
2576		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2577	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2578	ifp->if_mtu = ETHERMTU;
2579	ifp->if_init =  igb_init;
2580	ifp->if_softc = adapter;
2581	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2582	ifp->if_ioctl = igb_ioctl;
2583	ifp->if_start = igb_start;
2584#if __FreeBSD_version >= 800000
2585	ifp->if_transmit = igb_mq_start;
2586	ifp->if_qflush = igb_qflush;
2587#endif
2588	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2589	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2590	IFQ_SET_READY(&ifp->if_snd);
2591
2592	ether_ifattach(ifp, adapter->hw.mac.addr);
2593
2594	ifp->if_capabilities = ifp->if_capenable = 0;
2595
2596	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_MTU;
2597	ifp->if_capabilities |= IFCAP_TSO4;
2598	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2599	ifp->if_capenable = ifp->if_capabilities;
2600
2601	/*
2602	 * Tell the upper layer(s) we support long frames.
2603	 */
2604	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2605	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2606	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
2607
2608	/*
2609	 * Specify the media types supported by this adapter and register
2610	 * callbacks to update media and link information
2611	 */
2612	ifmedia_init(&adapter->media, IFM_IMASK,
2613	    igb_media_change, igb_media_status);
2614	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2615	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2616		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2617			    0, NULL);
2618		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2619	} else {
2620		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2621		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2622			    0, NULL);
2623		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2624			    0, NULL);
2625		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2626			    0, NULL);
2627		if (adapter->hw.phy.type != e1000_phy_ife) {
2628			ifmedia_add(&adapter->media,
2629				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2630			ifmedia_add(&adapter->media,
2631				IFM_ETHER | IFM_1000_T, 0, NULL);
2632		}
2633	}
2634	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2635	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2636}
2637
2638
2639/*
2640 * Manage DMA'able memory.
2641 */
2642static void
2643igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2644{
2645	if (error)
2646		return;
2647	*(bus_addr_t *) arg = segs[0].ds_addr;
2648}
2649
2650static int
2651igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2652        struct igb_dma_alloc *dma, int mapflags)
2653{
2654	int error;
2655
2656	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2657				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2658				BUS_SPACE_MAXADDR,	/* lowaddr */
2659				BUS_SPACE_MAXADDR,	/* highaddr */
2660				NULL, NULL,		/* filter, filterarg */
2661				size,			/* maxsize */
2662				1,			/* nsegments */
2663				size,			/* maxsegsize */
2664				0,			/* flags */
2665				NULL,			/* lockfunc */
2666				NULL,			/* lockarg */
2667				&dma->dma_tag);
2668	if (error) {
2669		device_printf(adapter->dev,
2670		    "%s: bus_dma_tag_create failed: %d\n",
2671		    __func__, error);
2672		goto fail_0;
2673	}
2674
2675	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2676	    BUS_DMA_NOWAIT, &dma->dma_map);
2677	if (error) {
2678		device_printf(adapter->dev,
2679		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2680		    __func__, (uintmax_t)size, error);
2681		goto fail_2;
2682	}
2683
2684	dma->dma_paddr = 0;
2685	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2686	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2687	if (error || dma->dma_paddr == 0) {
2688		device_printf(adapter->dev,
2689		    "%s: bus_dmamap_load failed: %d\n",
2690		    __func__, error);
2691		goto fail_3;
2692	}
2693
2694	return (0);
2695
2696fail_3:
2697	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2698fail_2:
2699	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2700	bus_dma_tag_destroy(dma->dma_tag);
2701fail_0:
2702	dma->dma_map = NULL;
2703	dma->dma_tag = NULL;
2704
2705	return (error);
2706}
2707
2708static void
2709igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2710{
2711	if (dma->dma_tag == NULL)
2712		return;
2713	if (dma->dma_map != NULL) {
2714		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2715		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2716		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2717		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2718		dma->dma_map = NULL;
2719	}
2720	bus_dma_tag_destroy(dma->dma_tag);
2721	dma->dma_tag = NULL;
2722}
2723
2724
2725/*********************************************************************
2726 *
2727 *  Allocate memory for the transmit and receive rings, and then
2728 *  the descriptors associated with each, called only once at attach.
2729 *
2730 **********************************************************************/
2731static int
2732igb_allocate_queues(struct adapter *adapter)
2733{
2734	device_t dev = adapter->dev;
2735	struct tx_ring *txr;
2736	struct rx_ring *rxr;
2737	int rsize, tsize, error = E1000_SUCCESS;
2738	int txconf = 0, rxconf = 0;
2739
2740	/* First allocate the TX ring struct memory */
2741	if (!(adapter->tx_rings =
2742	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2743	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2744		device_printf(dev, "Unable to allocate TX ring memory\n");
2745		error = ENOMEM;
2746		goto fail;
2747	}
2748	txr = adapter->tx_rings;
2749
2750	/* Next allocate the RX */
2751	if (!(adapter->rx_rings =
2752	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2753	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2754		device_printf(dev, "Unable to allocate RX ring memory\n");
2755		error = ENOMEM;
2756		goto rx_fail;
2757	}
2758	rxr = adapter->rx_rings;
2759
2760	tsize = roundup2(adapter->num_tx_desc *
2761	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2762	/*
2763	 * Now set up the TX queues, txconf is needed to handle the
2764	 * possibility that things fail midcourse and we need to
2765	 * undo memory gracefully
2766	 */
2767	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2768		/* Set up some basics */
2769		txr = &adapter->tx_rings[i];
2770		txr->adapter = adapter;
2771		txr->me = i;
2772
2773		/* Initialize the TX lock */
2774		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2775		    device_get_nameunit(dev), txr->me);
2776		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2777
2778		if (igb_dma_malloc(adapter, tsize,
2779			&txr->txdma, BUS_DMA_NOWAIT)) {
2780			device_printf(dev,
2781			    "Unable to allocate TX Descriptor memory\n");
2782			error = ENOMEM;
2783			goto err_tx_desc;
2784		}
2785		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2786		bzero((void *)txr->tx_base, tsize);
2787
2788        	/* Now allocate transmit buffers for the ring */
2789        	if (igb_allocate_transmit_buffers(txr)) {
2790			device_printf(dev,
2791			    "Critical Failure setting up transmit buffers\n");
2792			error = ENOMEM;
2793			goto err_tx_desc;
2794        	}
2795#if __FreeBSD_version >= 800000
2796		/* Allocate a buf ring */
2797		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
2798		    M_WAITOK, &txr->tx_mtx);
2799#endif
2800	}
2801
2802	/*
2803	 * Next the RX queues...
2804	 */
2805	rsize = roundup2(adapter->num_rx_desc *
2806	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2807	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2808		rxr = &adapter->rx_rings[i];
2809		rxr->adapter = adapter;
2810		rxr->me = i;
2811
2812		/* Initialize the RX lock */
2813		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2814		    device_get_nameunit(dev), txr->me);
2815		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2816
2817		if (igb_dma_malloc(adapter, rsize,
2818			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2819			device_printf(dev,
2820			    "Unable to allocate RxDescriptor memory\n");
2821			error = ENOMEM;
2822			goto err_rx_desc;
2823		}
2824		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2825		bzero((void *)rxr->rx_base, rsize);
2826
2827        	/* Allocate receive buffers for the ring*/
2828		if (igb_allocate_receive_buffers(rxr)) {
2829			device_printf(dev,
2830			    "Critical Failure setting up receive buffers\n");
2831			error = ENOMEM;
2832			goto err_rx_desc;
2833		}
2834	}
2835
2836	return (0);
2837
2838err_rx_desc:
2839	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2840		igb_dma_free(adapter, &rxr->rxdma);
2841err_tx_desc:
2842	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2843		igb_dma_free(adapter, &txr->txdma);
2844	free(adapter->rx_rings, M_DEVBUF);
2845rx_fail:
2846	free(adapter->tx_rings, M_DEVBUF);
2847fail:
2848	return (error);
2849}
2850
2851/*********************************************************************
2852 *
2853 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2854 *  the information needed to transmit a packet on the wire. This is
2855 *  called only once at attach, setup is done every reset.
2856 *
2857 **********************************************************************/
2858static int
2859igb_allocate_transmit_buffers(struct tx_ring *txr)
2860{
2861	struct adapter *adapter = txr->adapter;
2862	device_t dev = adapter->dev;
2863	struct igb_tx_buffer *txbuf;
2864	int error, i;
2865
2866	/*
2867	 * Setup DMA descriptor areas.
2868	 */
2869	if ((error = bus_dma_tag_create(NULL,		/* parent */
2870			       PAGE_SIZE, 0,		/* alignment, bounds */
2871			       BUS_SPACE_MAXADDR,	/* lowaddr */
2872			       BUS_SPACE_MAXADDR,	/* highaddr */
2873			       NULL, NULL,		/* filter, filterarg */
2874			       IGB_TSO_SIZE,		/* maxsize */
2875			       IGB_MAX_SCATTER,		/* nsegments */
2876			       PAGE_SIZE,		/* maxsegsize */
2877			       0,			/* flags */
2878			       NULL,			/* lockfunc */
2879			       NULL,			/* lockfuncarg */
2880			       &txr->txtag))) {
2881		device_printf(dev,"Unable to allocate TX DMA tag\n");
2882		goto fail;
2883	}
2884
2885	if (!(txr->tx_buffers =
2886	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
2887	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2888		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2889		error = ENOMEM;
2890		goto fail;
2891	}
2892
2893        /* Create the descriptor buffer dma maps */
2894	txbuf = txr->tx_buffers;
2895	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2896		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2897		if (error != 0) {
2898			device_printf(dev, "Unable to create TX DMA map\n");
2899			goto fail;
2900		}
2901	}
2902
2903	return 0;
2904fail:
2905	/* We free all, it handles case where we are in the middle */
2906	igb_free_transmit_structures(adapter);
2907	return (error);
2908}
2909
2910/*********************************************************************
2911 *
2912 *  Initialize a transmit ring.
2913 *
2914 **********************************************************************/
2915static void
2916igb_setup_transmit_ring(struct tx_ring *txr)
2917{
2918	struct adapter *adapter = txr->adapter;
2919	struct igb_tx_buffer *txbuf;
2920	int i;
2921
2922	/* Clear the old descriptor contents */
2923	bzero((void *)txr->tx_base,
2924	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2925	/* Reset indices */
2926	txr->next_avail_desc = 0;
2927	txr->next_to_clean = 0;
2928
2929	/* Free any existing tx buffers. */
2930        txbuf = txr->tx_buffers;
2931	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2932		if (txbuf->m_head != NULL) {
2933			bus_dmamap_sync(txr->txtag, txbuf->map,
2934			    BUS_DMASYNC_POSTWRITE);
2935			bus_dmamap_unload(txr->txtag, txbuf->map);
2936			m_freem(txbuf->m_head);
2937			txbuf->m_head = NULL;
2938		}
2939		/* clear the watch index */
2940		txbuf->next_eop = -1;
2941        }
2942
2943	/* Set number of descriptors available */
2944	txr->tx_avail = adapter->num_tx_desc;
2945
2946	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2947	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2948
2949}
2950
2951/*********************************************************************
2952 *
2953 *  Initialize all transmit rings.
2954 *
2955 **********************************************************************/
2956static void
2957igb_setup_transmit_structures(struct adapter *adapter)
2958{
2959	struct tx_ring *txr = adapter->tx_rings;
2960
2961	for (int i = 0; i < adapter->num_queues; i++, txr++)
2962		igb_setup_transmit_ring(txr);
2963
2964	return;
2965}
2966
2967/*********************************************************************
2968 *
2969 *  Enable transmit unit.
2970 *
2971 **********************************************************************/
2972static void
2973igb_initialize_transmit_units(struct adapter *adapter)
2974{
2975	struct tx_ring	*txr = adapter->tx_rings;
2976	u32		tctl, txdctl;
2977
2978	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2979
2980	/* Setup the Base and Length of the Tx Descriptor Rings */
2981	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2982		u64 bus_addr = txr->txdma.dma_paddr;
2983
2984		E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2985		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2986		E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2987		    (uint32_t)(bus_addr >> 32));
2988		E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2989		    (uint32_t)bus_addr);
2990
2991		/* Setup the HW Tx Head and Tail descriptor pointers */
2992		E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2993		E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2994
2995		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2996		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2997		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2998
2999		/* Setup Transmit Descriptor Base Settings */
3000		adapter->txd_cmd = E1000_TXD_CMD_IFCS;
3001
3002		txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
3003		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3004		E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
3005	}
3006
3007	/* Program the Transmit Control Register */
3008	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
3009	tctl &= ~E1000_TCTL_CT;
3010	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3011		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3012
3013	e1000_config_collision_dist(&adapter->hw);
3014
3015	/* This write will effectively turn on the transmit unit. */
3016	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
3017
3018}
3019
3020/*********************************************************************
3021 *
3022 *  Free all transmit rings.
3023 *
3024 **********************************************************************/
3025static void
3026igb_free_transmit_structures(struct adapter *adapter)
3027{
3028	struct tx_ring *txr = adapter->tx_rings;
3029
3030	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3031		IGB_TX_LOCK(txr);
3032		igb_free_transmit_buffers(txr);
3033		igb_dma_free(adapter, &txr->txdma);
3034		IGB_TX_UNLOCK(txr);
3035		IGB_TX_LOCK_DESTROY(txr);
3036	}
3037	free(adapter->tx_rings, M_DEVBUF);
3038}
3039
3040/*********************************************************************
3041 *
3042 *  Free transmit ring related data structures.
3043 *
3044 **********************************************************************/
3045static void
3046igb_free_transmit_buffers(struct tx_ring *txr)
3047{
3048	struct adapter *adapter = txr->adapter;
3049	struct igb_tx_buffer *tx_buffer;
3050	int             i;
3051
3052	INIT_DEBUGOUT("free_transmit_ring: begin");
3053
3054	if (txr->tx_buffers == NULL)
3055		return;
3056
3057	tx_buffer = txr->tx_buffers;
3058	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3059		if (tx_buffer->m_head != NULL) {
3060			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3061			    BUS_DMASYNC_POSTWRITE);
3062			bus_dmamap_unload(txr->txtag,
3063			    tx_buffer->map);
3064			m_freem(tx_buffer->m_head);
3065			tx_buffer->m_head = NULL;
3066			if (tx_buffer->map != NULL) {
3067				bus_dmamap_destroy(txr->txtag,
3068				    tx_buffer->map);
3069				tx_buffer->map = NULL;
3070			}
3071		} else if (tx_buffer->map != NULL) {
3072			bus_dmamap_unload(txr->txtag,
3073			    tx_buffer->map);
3074			bus_dmamap_destroy(txr->txtag,
3075			    tx_buffer->map);
3076			tx_buffer->map = NULL;
3077		}
3078	}
3079#if __FreeBSD_version >= 800000
3080	if (txr->br != NULL)
3081		buf_ring_free(txr->br, M_DEVBUF);
3082#endif
3083	if (txr->tx_buffers != NULL) {
3084		free(txr->tx_buffers, M_DEVBUF);
3085		txr->tx_buffers = NULL;
3086	}
3087	if (txr->txtag != NULL) {
3088		bus_dma_tag_destroy(txr->txtag);
3089		txr->txtag = NULL;
3090	}
3091	return;
3092}
3093
3094/**********************************************************************
3095 *
3096 *  Setup work for hardware segmentation offload (TSO) on
3097 *  adapters using advanced tx descriptors (82575)
3098 *
3099 **********************************************************************/
3100static boolean_t
3101igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3102{
3103	struct adapter *adapter = txr->adapter;
3104	struct e1000_adv_tx_context_desc *TXD;
3105	struct igb_tx_buffer        *tx_buffer;
3106	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3107	u32 mss_l4len_idx = 0;
3108	u16 vtag = 0;
3109	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3110	struct ether_vlan_header *eh;
3111	struct ip *ip;
3112	struct tcphdr *th;
3113
3114
3115	/*
3116	 * Determine where frame payload starts.
3117	 * Jump over vlan headers if already present
3118	 */
3119	eh = mtod(mp, struct ether_vlan_header *);
3120	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3121		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3122	else
3123		ehdrlen = ETHER_HDR_LEN;
3124
3125	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3126	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3127		return FALSE;
3128
3129	/* Only supports IPV4 for now */
3130	ctxd = txr->next_avail_desc;
3131	tx_buffer = &txr->tx_buffers[ctxd];
3132	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3133
3134	ip = (struct ip *)(mp->m_data + ehdrlen);
3135	if (ip->ip_p != IPPROTO_TCP)
3136                return FALSE;   /* 0 */
3137	ip->ip_sum = 0;
3138	ip_hlen = ip->ip_hl << 2;
3139	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3140	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3141	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3142	tcp_hlen = th->th_off << 2;
3143	/*
3144	 * Calculate header length, this is used
3145	 * in the transmit desc in igb_xmit
3146	 */
3147	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3148
3149	/* VLAN MACLEN IPLEN */
3150	if (mp->m_flags & M_VLANTAG) {
3151		vtag = htole16(mp->m_pkthdr.ether_vtag);
3152		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3153	}
3154
3155	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3156	vlan_macip_lens |= ip_hlen;
3157	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3158
3159	/* ADV DTYPE TUCMD */
3160	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3161	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3162	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3163	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3164
3165	/* MSS L4LEN IDX */
3166	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3167	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3168	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3169
3170	TXD->seqnum_seed = htole32(0);
3171	tx_buffer->m_head = NULL;
3172	tx_buffer->next_eop = -1;
3173
3174	if (++ctxd == adapter->num_tx_desc)
3175		ctxd = 0;
3176
3177	txr->tx_avail--;
3178	txr->next_avail_desc = ctxd;
3179	return TRUE;
3180}
3181
3182
3183/*********************************************************************
3184 *
3185 *  Context Descriptor setup for VLAN or CSUM
3186 *
3187 **********************************************************************/
3188
3189static bool
3190igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3191{
3192	struct adapter *adapter = txr->adapter;
3193	struct e1000_adv_tx_context_desc *TXD;
3194	struct igb_tx_buffer        *tx_buffer;
3195	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3196	struct ether_vlan_header *eh;
3197	struct ip *ip = NULL;
3198	struct ip6_hdr *ip6;
3199	int  ehdrlen, ctxd, ip_hlen = 0;
3200	u16	etype, vtag = 0;
3201	u8	ipproto = 0;
3202	bool	offload = TRUE;
3203
3204	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3205		offload = FALSE;
3206
3207	ctxd = txr->next_avail_desc;
3208	tx_buffer = &txr->tx_buffers[ctxd];
3209	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3210
3211	/*
3212	** In advanced descriptors the vlan tag must
3213	** be placed into the context descriptor, thus
3214	** we need to be here just for that setup.
3215	*/
3216	if (mp->m_flags & M_VLANTAG) {
3217		vtag = htole16(mp->m_pkthdr.ether_vtag);
3218		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3219	} else if (offload == FALSE)
3220		return FALSE;
3221
3222	/*
3223	 * Determine where frame payload starts.
3224	 * Jump over vlan headers if already present,
3225	 * helpful for QinQ too.
3226	 */
3227	eh = mtod(mp, struct ether_vlan_header *);
3228	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3229		etype = ntohs(eh->evl_proto);
3230		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3231	} else {
3232		etype = ntohs(eh->evl_encap_proto);
3233		ehdrlen = ETHER_HDR_LEN;
3234	}
3235
3236	/* Set the ether header length */
3237	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3238
3239	switch (etype) {
3240		case ETHERTYPE_IP:
3241			ip = (struct ip *)(mp->m_data + ehdrlen);
3242			ip_hlen = ip->ip_hl << 2;
3243			if (mp->m_len < ehdrlen + ip_hlen) {
3244				offload = FALSE;
3245				break;
3246			}
3247			ipproto = ip->ip_p;
3248			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3249			break;
3250		case ETHERTYPE_IPV6:
3251			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3252			ip_hlen = sizeof(struct ip6_hdr);
3253			if (mp->m_len < ehdrlen + ip_hlen)
3254				return (FALSE);
3255			ipproto = ip6->ip6_nxt;
3256			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3257			break;
3258		default:
3259			offload = FALSE;
3260			break;
3261	}
3262
3263	vlan_macip_lens |= ip_hlen;
3264	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3265
3266	switch (ipproto) {
3267		case IPPROTO_TCP:
3268			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3269				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3270			break;
3271		case IPPROTO_UDP:
3272			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3273				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3274			break;
3275#if __FreeBSD_version >= 800000
3276		case IPPROTO_SCTP:
3277			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3278				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3279			break;
3280#endif
3281		default:
3282			offload = FALSE;
3283			break;
3284	}
3285
3286	/* Now copy bits into descriptor */
3287	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3288	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3289	TXD->seqnum_seed = htole32(0);
3290	TXD->mss_l4len_idx = htole32(0);
3291
3292	tx_buffer->m_head = NULL;
3293	tx_buffer->next_eop = -1;
3294
3295	/* We've consumed the first desc, adjust counters */
3296	if (++ctxd == adapter->num_tx_desc)
3297		ctxd = 0;
3298	txr->next_avail_desc = ctxd;
3299	--txr->tx_avail;
3300
3301        return (offload);
3302}
3303
3304
3305/**********************************************************************
3306 *
3307 *  Examine each tx_buffer in the used queue. If the hardware is done
3308 *  processing the packet then free associated resources. The
3309 *  tx_buffer is put back on the free queue.
3310 *
3311 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3312 **********************************************************************/
3313static bool
3314igb_txeof(struct tx_ring *txr)
3315{
3316	struct adapter	*adapter = txr->adapter;
3317        int first, last, done, num_avail;
3318	u32	cleaned = 0;
3319        struct igb_tx_buffer *tx_buffer;
3320        struct e1000_tx_desc   *tx_desc, *eop_desc;
3321	struct ifnet   *ifp = adapter->ifp;
3322
3323	IGB_TX_LOCK_ASSERT(txr);
3324
3325        if (txr->tx_avail == adapter->num_tx_desc)
3326                return FALSE;
3327
3328        num_avail = txr->tx_avail;
3329        first = txr->next_to_clean;
3330        tx_desc = &txr->tx_base[first];
3331        tx_buffer = &txr->tx_buffers[first];
3332	last = tx_buffer->next_eop;
3333        eop_desc = &txr->tx_base[last];
3334
3335	/*
3336	 * What this does is get the index of the
3337	 * first descriptor AFTER the EOP of the
3338	 * first packet, that way we can do the
3339	 * simple comparison on the inner while loop.
3340	 */
3341	if (++last == adapter->num_tx_desc)
3342 		last = 0;
3343	done = last;
3344
3345        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3346            BUS_DMASYNC_POSTREAD);
3347
3348        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3349		/* We clean the range of the packet */
3350		while (first != done) {
3351                	tx_desc->upper.data = 0;
3352                	tx_desc->lower.data = 0;
3353                	tx_desc->buffer_addr = 0;
3354                	++num_avail; ++cleaned;
3355
3356			if (tx_buffer->m_head) {
3357				ifp->if_opackets++;
3358				bus_dmamap_sync(txr->txtag,
3359				    tx_buffer->map,
3360				    BUS_DMASYNC_POSTWRITE);
3361				bus_dmamap_unload(txr->txtag,
3362				    tx_buffer->map);
3363
3364                        	m_freem(tx_buffer->m_head);
3365                        	tx_buffer->m_head = NULL;
3366                	}
3367			tx_buffer->next_eop = -1;
3368
3369	                if (++first == adapter->num_tx_desc)
3370				first = 0;
3371
3372	                tx_buffer = &txr->tx_buffers[first];
3373			tx_desc = &txr->tx_base[first];
3374		}
3375		/* See if we can continue to the next packet */
3376		last = tx_buffer->next_eop;
3377		if (last != -1) {
3378        		eop_desc = &txr->tx_base[last];
3379			/* Get new done point */
3380			if (++last == adapter->num_tx_desc) last = 0;
3381			done = last;
3382		} else
3383			break;
3384        }
3385        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3386            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3387
3388        txr->next_to_clean = first;
3389
3390        /*
3391         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3392         * that it is OK to send packets.
3393         * If there are no pending descriptors, clear the timeout. Otherwise,
3394         * if some descriptors have been freed, restart the timeout.
3395         */
3396        if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3397                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3398		/* All clean, turn off the timer */
3399                if (num_avail == adapter->num_tx_desc) {
3400			txr->watchdog_timer = 0;
3401        		txr->tx_avail = num_avail;
3402			return FALSE;
3403		}
3404        }
3405
3406	/* Some cleaned, reset the timer */
3407	if (cleaned)
3408		txr->watchdog_timer = IGB_TX_TIMEOUT;
3409        txr->tx_avail = num_avail;
3410        return TRUE;
3411}
3412
3413
3414/*********************************************************************
3415 *
3416 *  Setup descriptor buffer(s) from system mbuf buffer pools.
3417 *  		i - designates the ring index
3418 *		clean - tells the function whether to update
3419 *		        the header, the packet buffer, or both.
3420 *
3421 **********************************************************************/
3422static int
3423igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3424{
3425	struct adapter		*adapter = rxr->adapter;
3426	struct mbuf		*mh, *mp;
3427	bus_dma_segment_t	seg[2];
3428	bus_dmamap_t		map;
3429	struct igb_rx_buffer	*rx_buffer;
3430	int			error, nsegs;
3431	int			merr = 0;
3432
3433
3434	rx_buffer = &rxr->rx_buffers[i];
3435
3436	/* First get our header and payload mbuf */
3437	if (clean & IGB_CLEAN_HEADER) {
3438		mh = m_gethdr(M_DONTWAIT, MT_DATA);
3439		if (mh == NULL)
3440			goto remap;
3441	} else  /* reuse */
3442		mh = rxr->rx_buffers[i].m_head;
3443
3444	mh->m_len = MHLEN;
3445	mh->m_flags |= M_PKTHDR;
3446
3447	if (clean & IGB_CLEAN_PAYLOAD) {
3448		mp = m_getjcl(M_DONTWAIT, MT_DATA,
3449		    M_PKTHDR, adapter->rx_mbuf_sz);
3450		if (mp == NULL)
3451			goto remap;
3452		mp->m_len = adapter->rx_mbuf_sz;
3453		mp->m_flags &= ~M_PKTHDR;
3454	} else {	/* reusing */
3455		mp = rxr->rx_buffers[i].m_pack;
3456		mp->m_len = adapter->rx_mbuf_sz;
3457		mp->m_flags &= ~M_PKTHDR;
3458	}
3459	/*
3460	** Need to create a chain for the following
3461	** dmamap call at this point.
3462	*/
3463	mh->m_next = mp;
3464	mh->m_pkthdr.len = mh->m_len + mp->m_len;
3465
3466	/* Get the memory mapping */
3467	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3468	    rxr->rx_spare_map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3469	if (error != 0) {
3470		printf("GET BUF: dmamap load failure - %d\n", error);
3471		m_free(mh);
3472		return (error);
3473	}
3474
3475	/* Unload old mapping and update buffer struct */
3476	if (rx_buffer->m_head != NULL)
3477			bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3478	map = rx_buffer->map;
3479	rx_buffer->map = rxr->rx_spare_map;
3480	rxr->rx_spare_map = map;
3481	rx_buffer->m_head = mh;
3482	rx_buffer->m_pack = mp;
3483	bus_dmamap_sync(rxr->rxtag,
3484	    rx_buffer->map, BUS_DMASYNC_PREREAD);
3485
3486	/* Update descriptor */
3487	rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3488	rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3489
3490	return (0);
3491
3492	/*
3493	** If we get here, we have an mbuf resource
3494	** issue, so we discard the incoming packet
3495	** and attempt to reuse existing mbufs next
3496	** pass thru the ring, but to do so we must
3497	** fix up the descriptor which had the address
3498	** clobbered with writeback info.
3499	*/
3500remap:
3501	adapter->mbuf_header_failed++;
3502	merr = ENOBUFS;
3503	/* Is there a reusable buffer? */
3504	mh = rxr->rx_buffers[i].m_head;
3505	if (mh == NULL) /* Nope, init error */
3506		return (merr);
3507	mp = rxr->rx_buffers[i].m_pack;
3508	if (mp == NULL) /* Nope, init error */
3509		return (merr);
3510	/* Get our old mapping */
3511	rx_buffer = &rxr->rx_buffers[i];
3512	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3513	    rx_buffer->map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3514	if (error != 0) {
3515		/* We really have a problem */
3516		m_free(mh);
3517		return (error);
3518	}
3519	/* Now fix the descriptor as needed */
3520	rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3521	rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3522	return (merr);
3523}
3524
3525
3526/*********************************************************************
3527 *
3528 *  Allocate memory for rx_buffer structures. Since we use one
3529 *  rx_buffer per received packet, the maximum number of rx_buffer's
3530 *  that we'll need is equal to the number of receive descriptors
3531 *  that we've allocated.
3532 *
3533 **********************************************************************/
3534static int
3535igb_allocate_receive_buffers(struct rx_ring *rxr)
3536{
3537	struct	adapter 	*adapter = rxr->adapter;
3538	device_t 		dev = adapter->dev;
3539	struct igb_rx_buffer 	*rxbuf;
3540	int             	i, bsize, error;
3541
3542	bsize = sizeof(struct igb_rx_buffer) * adapter->num_rx_desc;
3543	if (!(rxr->rx_buffers =
3544	    (struct igb_rx_buffer *) malloc(bsize,
3545	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3546		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3547		error = ENOMEM;
3548		goto fail;
3549	}
3550
3551	/*
3552	** The tag is made to accomodate the largest buffer size
3553	** with packet split (hence the two segments, even though
3554	** it may not always use this.
3555	*/
3556	if ((error = bus_dma_tag_create(NULL,		/* parent */
3557				   PAGE_SIZE, 0,	/* alignment, bounds */
3558				   BUS_SPACE_MAXADDR,	/* lowaddr */
3559				   BUS_SPACE_MAXADDR,	/* highaddr */
3560				   NULL, NULL,		/* filter, filterarg */
3561				   MJUM16BYTES,		/* maxsize */
3562				   2,			/* nsegments */
3563				   MJUMPAGESIZE,	/* maxsegsize */
3564				   0,			/* flags */
3565				   NULL,		/* lockfunc */
3566				   NULL,		/* lockfuncarg */
3567				   &rxr->rxtag))) {
3568		device_printf(dev, "Unable to create RX DMA tag\n");
3569		goto fail;
3570	}
3571
3572	/* Create the spare map (used by getbuf) */
3573        error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3574	     &rxr->rx_spare_map);
3575	if (error) {
3576		device_printf(dev,
3577		    "%s: bus_dmamap_create header spare failed: %d\n",
3578		    __func__, error);
3579		goto fail;
3580	}
3581
3582	for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3583		rxbuf = &rxr->rx_buffers[i];
3584		error = bus_dmamap_create(rxr->rxtag,
3585		    BUS_DMA_NOWAIT, &rxbuf->map);
3586		if (error) {
3587			device_printf(dev, "Unable to create RX DMA maps\n");
3588			goto fail;
3589		}
3590	}
3591
3592	return (0);
3593
3594fail:
3595	/* Frees all, but can handle partial completion */
3596	igb_free_receive_structures(adapter);
3597	return (error);
3598}
3599
3600/*********************************************************************
3601 *
3602 *  Initialize a receive ring and its buffers.
3603 *
3604 **********************************************************************/
3605static int
3606igb_setup_receive_ring(struct rx_ring *rxr)
3607{
3608	struct	adapter		*adapter;
3609	struct  ifnet		*ifp;
3610	device_t		dev;
3611	struct igb_rx_buffer	*rxbuf;
3612	struct lro_ctrl		*lro = &rxr->lro;
3613	int			j, rsize;
3614
3615	adapter = rxr->adapter;
3616	dev = adapter->dev;
3617	ifp = adapter->ifp;
3618	rxr->lro_enabled = FALSE;
3619	rxr->hdr_split = FALSE;
3620
3621	/* Clear the ring contents */
3622	rsize = roundup2(adapter->num_rx_desc *
3623	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3624	bzero((void *)rxr->rx_base, rsize);
3625
3626	/*
3627	** Free current RX buffer structures and their mbufs
3628	*/
3629	for (int i = 0; i < adapter->num_rx_desc; i++) {
3630		rxbuf = &rxr->rx_buffers[i];
3631		bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3632		    BUS_DMASYNC_POSTREAD);
3633		bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3634		if (rxbuf->m_head) {
3635			rxbuf->m_head->m_next = rxbuf->m_pack;
3636			m_freem(rxbuf->m_head);
3637		}
3638		rxbuf->m_head = NULL;
3639		rxbuf->m_pack = NULL;
3640	}
3641
3642	/* Next replenish the ring */
3643	for (j = 0; j < adapter->num_rx_desc; j++) {
3644		if (igb_get_buf(rxr, j, IGB_CLEAN_BOTH) == ENOBUFS) {
3645			rxr->rx_buffers[j].m_head = NULL;
3646			rxr->rx_buffers[j].m_pack = NULL;
3647			rxr->rx_base[j].read.hdr_addr = 0;
3648			rxr->rx_base[j].read.pkt_addr = 0;
3649			goto fail;
3650		}
3651	}
3652
3653	/* Setup our descriptor indices */
3654	rxr->next_to_check = 0;
3655	rxr->last_cleaned = 0;
3656
3657	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3658	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3659
3660	/*
3661	** Now set up the LRO interface, we
3662	** also only do head split when LRO
3663	** is enabled, since so often they
3664	** are undesireable in similar setups.
3665	*/
3666	if (ifp->if_capenable & IFCAP_LRO) {
3667		int err = tcp_lro_init(lro);
3668		if (err) {
3669			device_printf(dev,"LRO Initialization failed!\n");
3670			goto fail;
3671		}
3672		INIT_DEBUGOUT("RX LRO Initialized\n");
3673		rxr->lro_enabled = TRUE;
3674		rxr->hdr_split = TRUE;
3675		lro->ifp = adapter->ifp;
3676	}
3677
3678	return (0);
3679fail:
3680	/*
3681	 * We need to clean up any buffers allocated
3682	 * so far, 'j' is the failing index.
3683	 */
3684	for (int i = 0; i < j; i++) {
3685		rxbuf = &rxr->rx_buffers[i];
3686		if (rxbuf->m_head != NULL) {
3687			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3688			    BUS_DMASYNC_POSTREAD);
3689			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3690			m_freem(rxbuf->m_head);
3691			rxbuf->m_head = NULL;
3692		}
3693	}
3694	return (ENOBUFS);
3695}
3696
3697/*********************************************************************
3698 *
3699 *  Initialize all receive rings.
3700 *
3701 **********************************************************************/
3702static int
3703igb_setup_receive_structures(struct adapter *adapter)
3704{
3705	struct rx_ring *rxr = adapter->rx_rings;
3706	int i, j;
3707
3708	for (i = 0; i < adapter->num_queues; i++, rxr++)
3709		if (igb_setup_receive_ring(rxr))
3710			goto fail;
3711
3712	return (0);
3713fail:
3714	/*
3715	 * Free RX buffers allocated so far, we will only handle
3716	 * the rings that completed, the failing case will have
3717	 * cleaned up for itself. The value of 'i' will be the
3718	 * failed ring so we must pre-decrement it.
3719	 */
3720	rxr = adapter->rx_rings;
3721	for (--i; i > 0; i--, rxr++) {
3722		for (j = 0; j < adapter->num_rx_desc; j++) {
3723			struct igb_rx_buffer *rxbuf;
3724			rxbuf = &rxr->rx_buffers[j];
3725			if (rxbuf->m_head != NULL) {
3726				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3727			  	  BUS_DMASYNC_POSTREAD);
3728				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3729				m_freem(rxbuf->m_head);
3730				rxbuf->m_head = NULL;
3731			}
3732		}
3733	}
3734
3735	return (ENOBUFS);
3736}
3737
3738/*********************************************************************
3739 *
3740 *  Enable receive unit.
3741 *
3742 **********************************************************************/
3743static void
3744igb_initialize_receive_units(struct adapter *adapter)
3745{
3746	struct rx_ring	*rxr = adapter->rx_rings;
3747	struct ifnet	*ifp = adapter->ifp;
3748	u32		rctl, rxcsum, psize, srrctl = 0;
3749
3750	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3751
3752	/*
3753	 * Make sure receives are disabled while setting
3754	 * up the descriptor ring
3755	 */
3756	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3757	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3758
3759	/*
3760	** Set up for header split
3761	*/
3762	if (rxr->hdr_split) {
3763		/* Use a standard mbuf for the header */
3764		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3765		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3766	} else
3767		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3768
3769	/*
3770	** Set up for jumbo frames
3771	*/
3772	if (ifp->if_mtu > ETHERMTU) {
3773		rctl |= E1000_RCTL_LPE;
3774		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3775		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3776
3777		/* Set maximum packet len */
3778		psize = adapter->max_frame_size;
3779		/* are we on a vlan? */
3780		if (adapter->ifp->if_vlantrunk != NULL)
3781			psize += VLAN_TAG_SIZE;
3782		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3783	} else {
3784		rctl &= ~E1000_RCTL_LPE;
3785		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3786		rctl |= E1000_RCTL_SZ_2048;
3787	}
3788
3789	/* Setup the Base and Length of the Rx Descriptor Rings */
3790	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3791		u64 bus_addr = rxr->rxdma.dma_paddr;
3792		u32 rxdctl;
3793
3794		E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3795		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3796		E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3797		    (uint32_t)(bus_addr >> 32));
3798		E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3799		    (uint32_t)bus_addr);
3800		E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3801		/* Enable this Queue */
3802		rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3803		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3804		rxdctl &= 0xFFF00000;
3805		rxdctl |= IGB_RX_PTHRESH;
3806		rxdctl |= IGB_RX_HTHRESH << 8;
3807		rxdctl |= IGB_RX_WTHRESH << 16;
3808		E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3809	}
3810
3811	/*
3812	** Setup for RX MultiQueue
3813	*/
3814	rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3815	if (adapter->num_queues >1) {
3816		u32 random[10], mrqc, shift = 0;
3817		union igb_reta {
3818			u32 dword;
3819			u8  bytes[4];
3820		} reta;
3821
3822		arc4rand(&random, sizeof(random), 0);
3823		if (adapter->hw.mac.type == e1000_82575)
3824			shift = 6;
3825		/* Warning FM follows */
3826		for (int i = 0; i < 128; i++) {
3827			reta.bytes[i & 3] =
3828			    (i % adapter->num_queues) << shift;
3829			if ((i & 3) == 3)
3830				E1000_WRITE_REG(&adapter->hw,
3831				    E1000_RETA(i >> 2), reta.dword);
3832		}
3833		/* Now fill in hash table */
3834		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3835		for (int i = 0; i < 10; i++)
3836			E1000_WRITE_REG_ARRAY(&adapter->hw,
3837			    E1000_RSSRK(0), i, random[i]);
3838
3839		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3840		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3841		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3842		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3843		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3844		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3845		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3846		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3847
3848		E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3849
3850		/*
3851		** NOTE: Receive Full-Packet Checksum Offload
3852		** is mutually exclusive with Multiqueue. However
3853		** this is not the same as TCP/IP checksums which
3854		** still work.
3855		*/
3856		rxcsum |= E1000_RXCSUM_PCSD;
3857#if __FreeBSD_version >= 800000
3858		/* For SCTP Offload */
3859		if ((adapter->hw.mac.type == e1000_82576)
3860		    && (ifp->if_capenable & IFCAP_RXCSUM))
3861			rxcsum |= E1000_RXCSUM_CRCOFL;
3862#endif
3863	} else {
3864		/* Non RSS setup */
3865		if (ifp->if_capenable & IFCAP_RXCSUM) {
3866			rxcsum |= E1000_RXCSUM_IPPCSE;
3867#if __FreeBSD_version >= 800000
3868			if (adapter->hw.mac.type == e1000_82576)
3869				rxcsum |= E1000_RXCSUM_CRCOFL;
3870#endif
3871		} else
3872			rxcsum &= ~E1000_RXCSUM_TUOFL;
3873	}
3874	E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3875
3876	/* Setup the Receive Control Register */
3877	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3878	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3879		   E1000_RCTL_RDMTS_HALF |
3880		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3881
3882	/* Make sure VLAN Filters are off */
3883	rctl &= ~E1000_RCTL_VFE;
3884	/* Don't store bad packets */
3885	rctl &= ~E1000_RCTL_SBP;
3886
3887	/* Enable Receives */
3888	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3889
3890	/*
3891	 * Setup the HW Rx Head and Tail Descriptor Pointers
3892	 *   - needs to be after enable
3893	 */
3894	for (int i = 0; i < adapter->num_queues; i++) {
3895		E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3896		E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3897		     adapter->num_rx_desc - 1);
3898	}
3899	return;
3900}
3901
3902/*********************************************************************
3903 *
3904 *  Free receive rings.
3905 *
3906 **********************************************************************/
3907static void
3908igb_free_receive_structures(struct adapter *adapter)
3909{
3910	struct rx_ring *rxr = adapter->rx_rings;
3911
3912	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
3913		struct lro_ctrl	*lro = &rxr->lro;
3914		igb_free_receive_buffers(rxr);
3915		tcp_lro_free(lro);
3916		igb_dma_free(adapter, &rxr->rxdma);
3917	}
3918
3919	free(adapter->rx_rings, M_DEVBUF);
3920}
3921
3922/*********************************************************************
3923 *
3924 *  Free receive ring data structures.
3925 *
3926 **********************************************************************/
3927static void
3928igb_free_receive_buffers(struct rx_ring *rxr)
3929{
3930	struct adapter	*adapter = rxr->adapter;
3931	struct igb_rx_buffer *rx_buffer;
3932
3933	INIT_DEBUGOUT("free_receive_structures: begin");
3934
3935	if (rxr->rx_spare_map) {
3936		bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3937		rxr->rx_spare_map = NULL;
3938	}
3939
3940	/* Cleanup any existing buffers */
3941	if (rxr->rx_buffers != NULL) {
3942		rx_buffer = &rxr->rx_buffers[0];
3943		for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3944			if (rx_buffer->m_head != NULL) {
3945				bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3946				    BUS_DMASYNC_POSTREAD);
3947				bus_dmamap_unload(rxr->rxtag,
3948				    rx_buffer->map);
3949				m_freem(rx_buffer->m_head);
3950				rx_buffer->m_head = NULL;
3951			} else if (rx_buffer->map != NULL)
3952				bus_dmamap_unload(rxr->rxtag,
3953				    rx_buffer->map);
3954			if (rx_buffer->map != NULL) {
3955				bus_dmamap_destroy(rxr->rxtag,
3956				    rx_buffer->map);
3957				rx_buffer->map = NULL;
3958			}
3959		}
3960	}
3961
3962	if (rxr->rx_buffers != NULL) {
3963		free(rxr->rx_buffers, M_DEVBUF);
3964		rxr->rx_buffers = NULL;
3965	}
3966
3967	if (rxr->rxtag != NULL) {
3968		bus_dma_tag_destroy(rxr->rxtag);
3969		rxr->rxtag = NULL;
3970	}
3971}
3972/*********************************************************************
3973 *
3974 *  This routine executes in interrupt context. It replenishes
3975 *  the mbufs in the descriptor and sends data which has been
3976 *  dma'ed into host memory to upper layer.
3977 *
3978 *  We loop at most count times if count is > 0, or until done if
3979 *  count < 0.
3980 *
3981 *  Return TRUE if more to clean, FALSE otherwise
3982 *********************************************************************/
3983static bool
3984igb_rxeof(struct rx_ring *rxr, int count)
3985{
3986	struct adapter		*adapter = rxr->adapter;
3987	struct ifnet		*ifp;
3988	struct lro_ctrl		*lro = &rxr->lro;
3989	struct lro_entry	*queued;
3990	int			i;
3991	u32			staterr;
3992	union e1000_adv_rx_desc	*cur;
3993
3994
3995	IGB_RX_LOCK(rxr);
3996	ifp = adapter->ifp;
3997	i = rxr->next_to_check;
3998	cur = &rxr->rx_base[i];
3999	staterr = cur->wb.upper.status_error;
4000
4001	if (!(staterr & E1000_RXD_STAT_DD)) {
4002		IGB_RX_UNLOCK(rxr);
4003		return FALSE;
4004	}
4005
4006	/* Sync the ring */
4007	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4008	    BUS_DMASYNC_POSTREAD);
4009
4010	/* Main clean loop */
4011	while ((staterr & E1000_RXD_STAT_DD) &&
4012	    (count != 0) &&
4013	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
4014		struct mbuf *sendmp, *mh, *mp;
4015		u16 hlen, plen, hdr, ptype, len_adj, vtag;
4016		u8 dopayload, accept_frame, eop;
4017
4018		accept_frame = 1;
4019		hlen = plen = len_adj = vtag = 0;
4020		sendmp = mh = mp = NULL;
4021		ptype = (u16)(cur->wb.lower.lo_dword.data >> 4);
4022
4023		/* Sync the buffers */
4024		bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
4025			    BUS_DMASYNC_POSTREAD);
4026
4027		/*
4028		** The way the hardware is configured to
4029		** split, it will ONLY use the header buffer
4030		** when header split is enabled, otherwise we
4031		** get normal behavior, ie, both header and
4032		** payload are DMA'd into the payload buffer.
4033		**
4034		** The fmp test is to catch the case where a
4035		** packet spans multiple descriptors, in that
4036		** case only the first header is valid.
4037		*/
4038		if ((rxr->hdr_split) && (rxr->fmp == NULL)){
4039			hdr = le16toh(cur->
4040			    wb.lower.lo_dword.hs_rss.hdr_info);
4041			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4042			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4043			if (hlen > IGB_HDR_BUF)
4044				hlen = IGB_HDR_BUF;
4045			plen = le16toh(cur->wb.upper.length);
4046			/* Handle the header mbuf */
4047			mh = rxr->rx_buffers[i].m_head;
4048			mh->m_len = hlen;
4049			dopayload = IGB_CLEAN_HEADER;
4050			/*
4051			** Get the payload length, this
4052			** could be zero if its a small
4053			** packet.
4054			*/
4055			if (plen) {
4056				mp = rxr->rx_buffers[i].m_pack;
4057				mp->m_len = plen;
4058				mp->m_next = NULL;
4059				mp->m_flags &= ~M_PKTHDR;
4060				mh->m_next = mp;
4061				mh->m_flags |= M_PKTHDR;
4062				dopayload = IGB_CLEAN_BOTH;
4063				rxr->rx_split_packets++;
4064			} else {  /* small packets */
4065				mh->m_flags &= ~M_PKTHDR;
4066				mh->m_next = NULL;
4067			}
4068		} else {
4069			/*
4070			** Either no header split, or a
4071			** secondary piece of a fragmented
4072			** split packet.
4073			*/
4074			mh = rxr->rx_buffers[i].m_pack;
4075			mh->m_flags |= M_PKTHDR;
4076			mh->m_len = le16toh(cur->wb.upper.length);
4077			dopayload = IGB_CLEAN_PAYLOAD;
4078		}
4079
4080		if (staterr & E1000_RXD_STAT_EOP) {
4081			count--;
4082			eop = 1;
4083			/*
4084			** Strip CRC and account for frag
4085			*/
4086			if (mp) {
4087				if (mp->m_len < ETHER_CRC_LEN) {
4088					/* a frag, how much is left? */
4089					len_adj = ETHER_CRC_LEN - mp->m_len;
4090					mp->m_len = 0;
4091				} else
4092					mp->m_len -= ETHER_CRC_LEN;
4093			} else { /* not split */
4094				if (mh->m_len < ETHER_CRC_LEN) {
4095					len_adj = ETHER_CRC_LEN - mh->m_len;
4096					mh->m_len = 0;
4097				} else
4098					mh->m_len -= ETHER_CRC_LEN;
4099			}
4100		} else
4101			eop = 0;
4102
4103		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)
4104			accept_frame = 0;
4105#ifdef IGB_IEEE1588
4106	This linux code needs to be converted to work here
4107	-----------------------------------------------------
4108               if (unlikely(staterr & E1000_RXD_STAT_TS)) {
4109                       u64 regval;
4110                       u64 ns;
4111// Create an mtag and set it up
4112                       struct skb_shared_hwtstamps *shhwtstamps =
4113                               skb_hwtstamps(skb);
4114
4115                       rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID),
4116                       "igb: no RX time stamp available for time stamped packet");
4117                       regval = rd32(E1000_RXSTMPL);
4118                       regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4119// Do time conversion from the register
4120                       ns = timecounter_cyc2time(&adapter->clock, regval);
4121                       clocksync_update(&adapter->sync, ns);
4122                       memset(shhwtstamps, 0, sizeof(*shhwtstamps));
4123                       shhwtstamps->hwtstamp = ns_to_ktime(ns);
4124                       shhwtstamps->syststamp =
4125                               clocksync_hw2sys(&adapter->sync, ns);
4126               }
4127#endif
4128		if (accept_frame) {
4129			/*
4130			** get_buf will overwrite the writeback
4131			** descriptor so save the VLAN tag now.
4132			*/
4133			vtag = le16toh(cur->wb.upper.vlan);
4134			if (igb_get_buf(rxr, i, dopayload) != 0) {
4135				ifp->if_iqdrops++;
4136				goto discard;
4137			}
4138			/* Initial frame - setup */
4139			if (rxr->fmp == NULL) {
4140				mh->m_flags |= M_PKTHDR;
4141				mh->m_pkthdr.len = mh->m_len;
4142				rxr->fmp = mh; /* Store the first mbuf */
4143				rxr->lmp = mh;
4144				if (mp) { /* Add payload if split */
4145					mh->m_pkthdr.len += mp->m_len;
4146					rxr->lmp = mh->m_next;
4147				}
4148			} else {
4149				/* Chain mbuf's together */
4150				mh->m_flags &= ~M_PKTHDR;
4151				rxr->lmp->m_next = mh;
4152				rxr->lmp = rxr->lmp->m_next;
4153				rxr->fmp->m_pkthdr.len += mh->m_len;
4154				/* Adjust for CRC frag */
4155				if (len_adj) {
4156					rxr->lmp->m_len -= len_adj;
4157					rxr->fmp->m_pkthdr.len -= len_adj;
4158				}
4159			}
4160
4161			if (eop) {
4162				bool sctp = ((ptype & 0x40) != 0);
4163				rxr->fmp->m_pkthdr.rcvif = ifp;
4164				ifp->if_ipackets++;
4165				rxr->rx_packets++;
4166				/* capture data for AIM */
4167				rxr->bytes += rxr->fmp->m_pkthdr.len;
4168				rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4169
4170				igb_rx_checksum(staterr, rxr->fmp, sctp);
4171				if (staterr & E1000_RXD_STAT_VP) {
4172					rxr->fmp->m_pkthdr.ether_vtag = vtag;
4173					rxr->fmp->m_flags |= M_VLANTAG;
4174				}
4175#if __FreeBSD_version >= 800000
4176                                rxr->fmp->m_pkthdr.flowid = curcpu;
4177                                rxr->fmp->m_flags |= M_FLOWID;
4178#endif
4179				sendmp = rxr->fmp;
4180				rxr->fmp = NULL;
4181				rxr->lmp = NULL;
4182			}
4183		} else {
4184			ifp->if_ierrors++;
4185discard:
4186			/* Reuse loaded DMA map and just update mbuf chain */
4187			if (hlen) {
4188				mh = rxr->rx_buffers[i].m_head;
4189				mh->m_len = MHLEN;
4190				mh->m_next = NULL;
4191			}
4192			mp = rxr->rx_buffers[i].m_pack;
4193			mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4194			mp->m_data = mp->m_ext.ext_buf;
4195			mp->m_next = NULL;
4196			if (adapter->max_frame_size <=
4197			    (MCLBYTES - ETHER_ALIGN))
4198				m_adj(mp, ETHER_ALIGN);
4199			if (rxr->fmp != NULL) {
4200				/* handles the whole chain */
4201				m_freem(rxr->fmp);
4202				rxr->fmp = NULL;
4203				rxr->lmp = NULL;
4204			}
4205			sendmp = NULL;
4206		}
4207
4208		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4209		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4210
4211		rxr->last_cleaned = i; /* For updating tail */
4212
4213		/* Advance our pointers to the next descriptor. */
4214		if (++i == adapter->num_rx_desc)
4215			i = 0;
4216
4217		/*
4218		** Note that we hold the RX lock thru
4219		** the following call so this ring's
4220		** next_to_check is not gonna change.
4221		*/
4222		if (sendmp != NULL) {
4223			/*
4224			** Send to the stack if:
4225			**  - LRO not enabled, or
4226			**  - no LRO resources, or
4227			**  - lro enqueue fails
4228			*/
4229			if ((!rxr->lro_enabled) ||
4230			    ((!lro->lro_cnt) || (tcp_lro_rx(lro, sendmp, 0))))
4231                                (*ifp->if_input)(ifp, sendmp);
4232                }
4233
4234		/* Get the next descriptor */
4235		cur = &rxr->rx_base[i];
4236		staterr = cur->wb.upper.status_error;
4237	}
4238	rxr->next_to_check = i;
4239
4240	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4241	E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4242
4243	/*
4244	 * Flush any outstanding LRO work
4245	 */
4246	while (!SLIST_EMPTY(&lro->lro_active)) {
4247		queued = SLIST_FIRST(&lro->lro_active);
4248		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4249		tcp_lro_flush(lro, queued);
4250	}
4251
4252	IGB_RX_UNLOCK(rxr);
4253
4254	/*
4255	** We still have cleaning to do?
4256	** Schedule another interrupt if so.
4257	*/
4258	if (staterr & E1000_RXD_STAT_DD) {
4259		E1000_WRITE_REG(&adapter->hw, E1000_EICS, rxr->eims);
4260		return TRUE;
4261	}
4262
4263	return FALSE;
4264}
4265
4266
4267/*********************************************************************
4268 *
4269 *  Verify that the hardware indicated that the checksum is valid.
4270 *  Inform the stack about the status of checksum so that stack
4271 *  doesn't spend time verifying the checksum.
4272 *
4273 *********************************************************************/
4274static void
4275igb_rx_checksum(u32 staterr, struct mbuf *mp, bool sctp)
4276{
4277	u16 status = (u16)staterr;
4278	u8  errors = (u8) (staterr >> 24);
4279
4280	/* Ignore Checksum bit is set */
4281	if (status & E1000_RXD_STAT_IXSM) {
4282		mp->m_pkthdr.csum_flags = 0;
4283		return;
4284	}
4285
4286	if (status & E1000_RXD_STAT_IPCS) {
4287		/* Did it pass? */
4288		if (!(errors & E1000_RXD_ERR_IPE)) {
4289			/* IP Checksum Good */
4290			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4291			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4292		} else
4293			mp->m_pkthdr.csum_flags = 0;
4294	}
4295
4296	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4297		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4298#if __FreeBSD_version >= 800000
4299		if (sctp) /* reassign */
4300			type = CSUM_SCTP_VALID;
4301#endif
4302		/* Did it pass? */
4303		if (!(errors & E1000_RXD_ERR_TCPE)) {
4304			mp->m_pkthdr.csum_flags |= type;
4305			if (!sctp)
4306				mp->m_pkthdr.csum_data = htons(0xffff);
4307		}
4308	}
4309	return;
4310}
4311
4312/*
4313 * This routine is run via an vlan
4314 * config EVENT
4315 */
4316static void
4317igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4318{
4319	struct adapter	*adapter = ifp->if_softc;
4320	u32		index, bit;
4321
4322	if (ifp->if_softc !=  arg)   /* Not our event */
4323		return;
4324
4325	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4326                return;
4327
4328	index = (vtag >> 5) & 0x7F;
4329	bit = vtag & 0x1F;
4330	igb_shadow_vfta[index] |= (1 << bit);
4331	++adapter->num_vlans;
4332	/* Re-init to load the changes */
4333	igb_init(adapter);
4334}
4335
4336/*
4337 * This routine is run via an vlan
4338 * unconfig EVENT
4339 */
4340static void
4341igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4342{
4343	struct adapter	*adapter = ifp->if_softc;
4344	u32		index, bit;
4345
4346	if (ifp->if_softc !=  arg)
4347		return;
4348
4349	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4350                return;
4351
4352	index = (vtag >> 5) & 0x7F;
4353	bit = vtag & 0x1F;
4354	igb_shadow_vfta[index] &= ~(1 << bit);
4355	--adapter->num_vlans;
4356	/* Re-init to load the changes */
4357	igb_init(adapter);
4358}
4359
4360static void
4361igb_setup_vlan_hw_support(struct adapter *adapter)
4362{
4363	struct e1000_hw *hw = &adapter->hw;
4364	u32             reg;
4365
4366	/*
4367	** We get here thru init_locked, meaning
4368	** a soft reset, this has already cleared
4369	** the VFTA and other state, so if there
4370	** have been no vlan's registered do nothing.
4371	*/
4372	if (adapter->num_vlans == 0)
4373                return;
4374
4375	/*
4376	** A soft reset zero's out the VFTA, so
4377	** we need to repopulate it now.
4378	*/
4379	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4380                if (igb_shadow_vfta[i] != 0)
4381			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4382                            i, igb_shadow_vfta[i]);
4383
4384	reg = E1000_READ_REG(hw, E1000_CTRL);
4385	reg |= E1000_CTRL_VME;
4386	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4387
4388	/* Enable the Filter Table */
4389	reg = E1000_READ_REG(hw, E1000_RCTL);
4390	reg &= ~E1000_RCTL_CFIEN;
4391	reg |= E1000_RCTL_VFE;
4392	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4393
4394	/* Update the frame size */
4395	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4396	    adapter->max_frame_size + VLAN_TAG_SIZE);
4397}
4398
4399static void
4400igb_enable_intr(struct adapter *adapter)
4401{
4402	/* With RSS set up what to auto clear */
4403	if (adapter->msix_mem) {
4404		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4405		    adapter->eims_mask);
4406		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4407		    adapter->eims_mask);
4408		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4409		    adapter->eims_mask);
4410		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4411		    E1000_IMS_LSC);
4412	} else {
4413		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4414		    IMS_ENABLE_MASK);
4415	}
4416	E1000_WRITE_FLUSH(&adapter->hw);
4417
4418	return;
4419}
4420
4421static void
4422igb_disable_intr(struct adapter *adapter)
4423{
4424	if (adapter->msix_mem) {
4425		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4426		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4427	}
4428	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4429	E1000_WRITE_FLUSH(&adapter->hw);
4430	return;
4431}
4432
4433/*
4434 * Bit of a misnomer, what this really means is
4435 * to enable OS management of the system... aka
4436 * to disable special hardware management features
4437 */
4438static void
4439igb_init_manageability(struct adapter *adapter)
4440{
4441	if (adapter->has_manage) {
4442		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4443		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4444
4445		/* disable hardware interception of ARP */
4446		manc &= ~(E1000_MANC_ARP_EN);
4447
4448                /* enable receiving management packets to the host */
4449		manc |= E1000_MANC_EN_MNG2HOST;
4450		manc2h |= 1 << 5;  /* Mng Port 623 */
4451		manc2h |= 1 << 6;  /* Mng Port 664 */
4452		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4453		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4454	}
4455}
4456
4457/*
4458 * Give control back to hardware management
4459 * controller if there is one.
4460 */
4461static void
4462igb_release_manageability(struct adapter *adapter)
4463{
4464	if (adapter->has_manage) {
4465		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4466
4467		/* re-enable hardware interception of ARP */
4468		manc |= E1000_MANC_ARP_EN;
4469		manc &= ~E1000_MANC_EN_MNG2HOST;
4470
4471		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4472	}
4473}
4474
4475/*
4476 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4477 * For ASF and Pass Through versions of f/w this means that
4478 * the driver is loaded.
4479 *
4480 */
4481static void
4482igb_get_hw_control(struct adapter *adapter)
4483{
4484	u32 ctrl_ext;
4485
4486	/* Let firmware know the driver has taken over */
4487	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4488	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4489	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4490}
4491
4492/*
4493 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4494 * For ASF and Pass Through versions of f/w this means that the
4495 * driver is no longer loaded.
4496 *
4497 */
4498static void
4499igb_release_hw_control(struct adapter *adapter)
4500{
4501	u32 ctrl_ext;
4502
4503	/* Let firmware taken over control of h/w */
4504	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4505	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4506	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4507}
4508
4509static int
4510igb_is_valid_ether_addr(uint8_t *addr)
4511{
4512	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4513
4514	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4515		return (FALSE);
4516	}
4517
4518	return (TRUE);
4519}
4520
4521
4522/*
4523 * Enable PCI Wake On Lan capability
4524 */
4525void
4526igb_enable_wakeup(device_t dev)
4527{
4528	u16     cap, status;
4529	u8      id;
4530
4531	/* First find the capabilities pointer*/
4532	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4533	/* Read the PM Capabilities */
4534	id = pci_read_config(dev, cap, 1);
4535	if (id != PCIY_PMG)     /* Something wrong */
4536		return;
4537	/* OK, we have the power capabilities, so
4538	   now get the status register */
4539	cap += PCIR_POWER_STATUS;
4540	status = pci_read_config(dev, cap, 2);
4541	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4542	pci_write_config(dev, cap, status, 2);
4543	return;
4544}
4545
4546
4547/**********************************************************************
4548 *
4549 *  Update the board statistics counters.
4550 *
4551 **********************************************************************/
4552static void
4553igb_update_stats_counters(struct adapter *adapter)
4554{
4555	struct ifnet   *ifp;
4556
4557	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4558	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4559		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4560		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4561	}
4562	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4563	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4564	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4565	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4566
4567	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4568	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4569	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4570	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4571	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4572	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4573	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4574	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4575	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4576	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4577	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4578	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4579	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4580	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4581	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4582	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4583	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4584	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4585	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4586	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4587
4588	/* For the 64-bit byte counters the low dword must be read first. */
4589	/* Both registers clear on the read of the high dword */
4590
4591	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4592	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4593
4594	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4595	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4596	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4597	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4598	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4599
4600	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4601	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4602
4603	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4604	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4605	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4606	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4607	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4608	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4609	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4610	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4611	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4612	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4613
4614	adapter->stats.algnerrc +=
4615		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4616	adapter->stats.rxerrc +=
4617		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4618	adapter->stats.tncrs +=
4619		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4620	adapter->stats.cexterr +=
4621		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4622	adapter->stats.tsctc +=
4623		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4624	adapter->stats.tsctfc +=
4625		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4626	ifp = adapter->ifp;
4627
4628	ifp->if_collisions = adapter->stats.colc;
4629
4630	/* Rx Errors */
4631	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4632	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4633	    adapter->stats.ruc + adapter->stats.roc +
4634	    adapter->stats.mpc + adapter->stats.cexterr;
4635
4636	/* Tx Errors */
4637	ifp->if_oerrors = adapter->stats.ecol +
4638	    adapter->stats.latecol + adapter->watchdog_events;
4639}
4640
4641
4642/**********************************************************************
4643 *
4644 *  This routine is called only when igb_display_debug_stats is enabled.
4645 *  This routine provides a way to take a look at important statistics
4646 *  maintained by the driver and hardware.
4647 *
4648 **********************************************************************/
4649static void
4650igb_print_debug_info(struct adapter *adapter)
4651{
4652	device_t dev = adapter->dev;
4653	struct rx_ring *rxr = adapter->rx_rings;
4654	struct tx_ring *txr = adapter->tx_rings;
4655	uint8_t *hw_addr = adapter->hw.hw_addr;
4656
4657	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4658	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4659	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4660	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4661
4662#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4663	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4664	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4665	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4666#endif
4667
4668	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4669	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4670	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4671	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4672	    adapter->hw.fc.high_water,
4673	    adapter->hw.fc.low_water);
4674
4675	for (int i = 0; i < adapter->num_queues; i++, txr++) {
4676		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4677		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4678		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4679		device_printf(dev, "TX(%d) no descriptors avail event = %lld\n",
4680		    txr->me, (long long)txr->no_desc_avail);
4681		device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
4682		    (long long)txr->tx_irq);
4683		device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4684		    (long long)txr->tx_packets);
4685	}
4686
4687	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4688		struct lro_ctrl *lro = &rxr->lro;
4689		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4690		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4691		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4692		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4693		    (long long)rxr->rx_packets);
4694		device_printf(dev, "RX(%d) Split Packets = %lld\n", rxr->me,
4695		    (long long)rxr->rx_split_packets);
4696		device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4697		    (long long)rxr->rx_bytes);
4698		device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
4699		    (long long)rxr->rx_irq);
4700		device_printf(dev,"RX(%d) LRO Queued= %d\n",
4701		    rxr->me, lro->lro_queued);
4702		device_printf(dev,"RX(%d) LRO Flushed= %d\n",
4703		    rxr->me, lro->lro_flushed);
4704	}
4705
4706	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4707
4708	device_printf(dev, "Mbuf defrag failed = %ld\n",
4709	    adapter->mbuf_defrag_failed);
4710	device_printf(dev, "Std mbuf header failed = %ld\n",
4711	    adapter->mbuf_header_failed);
4712	device_printf(dev, "Std mbuf packet failed = %ld\n",
4713	    adapter->mbuf_packet_failed);
4714	device_printf(dev, "Driver dropped packets = %ld\n",
4715	    adapter->dropped_pkts);
4716	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4717		adapter->no_tx_dma_setup);
4718}
4719
4720static void
4721igb_print_hw_stats(struct adapter *adapter)
4722{
4723	device_t dev = adapter->dev;
4724
4725	device_printf(dev, "Excessive collisions = %lld\n",
4726	    (long long)adapter->stats.ecol);
4727#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4728	device_printf(dev, "Symbol errors = %lld\n",
4729	    (long long)adapter->stats.symerrs);
4730#endif
4731	device_printf(dev, "Sequence errors = %lld\n",
4732	    (long long)adapter->stats.sec);
4733	device_printf(dev, "Defer count = %lld\n",
4734	    (long long)adapter->stats.dc);
4735	device_printf(dev, "Missed Packets = %lld\n",
4736	    (long long)adapter->stats.mpc);
4737	device_printf(dev, "Receive No Buffers = %lld\n",
4738	    (long long)adapter->stats.rnbc);
4739	/* RLEC is inaccurate on some hardware, calculate our own. */
4740	device_printf(dev, "Receive Length Errors = %lld\n",
4741	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4742	device_printf(dev, "Receive errors = %lld\n",
4743	    (long long)adapter->stats.rxerrc);
4744	device_printf(dev, "Crc errors = %lld\n",
4745	    (long long)adapter->stats.crcerrs);
4746	device_printf(dev, "Alignment errors = %lld\n",
4747	    (long long)adapter->stats.algnerrc);
4748	/* On 82575 these are collision counts */
4749	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4750	    (long long)adapter->stats.cexterr);
4751	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4752	device_printf(dev, "watchdog timeouts = %ld\n",
4753	    adapter->watchdog_events);
4754	device_printf(dev, "XON Rcvd = %lld\n",
4755	    (long long)adapter->stats.xonrxc);
4756	device_printf(dev, "XON Xmtd = %lld\n",
4757	    (long long)adapter->stats.xontxc);
4758	device_printf(dev, "XOFF Rcvd = %lld\n",
4759	    (long long)adapter->stats.xoffrxc);
4760	device_printf(dev, "XOFF Xmtd = %lld\n",
4761	    (long long)adapter->stats.xofftxc);
4762	device_printf(dev, "Good Packets Rcvd = %lld\n",
4763	    (long long)adapter->stats.gprc);
4764	device_printf(dev, "Good Packets Xmtd = %lld\n",
4765	    (long long)adapter->stats.gptc);
4766	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4767	    (long long)adapter->stats.tsctc);
4768	device_printf(dev, "TSO Contexts Failed = %lld\n",
4769	    (long long)adapter->stats.tsctfc);
4770}
4771
4772/**********************************************************************
4773 *
4774 *  This routine provides a way to dump out the adapter eeprom,
4775 *  often a useful debug/service tool. This only dumps the first
4776 *  32 words, stuff that matters is in that extent.
4777 *
4778 **********************************************************************/
4779static void
4780igb_print_nvm_info(struct adapter *adapter)
4781{
4782	u16	eeprom_data;
4783	int	i, j, row = 0;
4784
4785	/* Its a bit crude, but it gets the job done */
4786	printf("\nInterface EEPROM Dump:\n");
4787	printf("Offset\n0x0000  ");
4788	for (i = 0, j = 0; i < 32; i++, j++) {
4789		if (j == 8) { /* Make the offset block */
4790			j = 0; ++row;
4791			printf("\n0x00%x0  ",row);
4792		}
4793		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4794		printf("%04x ", eeprom_data);
4795	}
4796	printf("\n");
4797}
4798
4799static int
4800igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4801{
4802	struct adapter *adapter;
4803	int error;
4804	int result;
4805
4806	result = -1;
4807	error = sysctl_handle_int(oidp, &result, 0, req);
4808
4809	if (error || !req->newptr)
4810		return (error);
4811
4812	if (result == 1) {
4813		adapter = (struct adapter *)arg1;
4814		igb_print_debug_info(adapter);
4815	}
4816	/*
4817	 * This value will cause a hex dump of the
4818	 * first 32 16-bit words of the EEPROM to
4819	 * the screen.
4820	 */
4821	if (result == 2) {
4822		adapter = (struct adapter *)arg1;
4823		igb_print_nvm_info(adapter);
4824        }
4825
4826	return (error);
4827}
4828
4829
4830static int
4831igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4832{
4833	struct adapter *adapter;
4834	int error;
4835	int result;
4836
4837	result = -1;
4838	error = sysctl_handle_int(oidp, &result, 0, req);
4839
4840	if (error || !req->newptr)
4841		return (error);
4842
4843	if (result == 1) {
4844		adapter = (struct adapter *)arg1;
4845		igb_print_hw_stats(adapter);
4846	}
4847
4848	return (error);
4849}
4850
4851static void
4852igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4853	const char *description, int *limit, int value)
4854{
4855	*limit = value;
4856	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4857	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4858	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4859}
4860
4861#ifdef IGB_IEEE1588
4862/*
4863** igb_hwtstamp_ioctl - control hardware time stamping
4864**
4865** Outgoing time stamping can be enabled and disabled. Play nice and
4866** disable it when requested, although it shouldn't case any overhead
4867** when no packet needs it. At most one packet in the queue may be
4868** marked for time stamping, otherwise it would be impossible to tell
4869** for sure to which packet the hardware time stamp belongs.
4870**
4871** Incoming time stamping has to be configured via the hardware
4872** filters. Not all combinations are supported, in particular event
4873** type has to be specified. Matching the kind of event packet is
4874** not supported, with the exception of "all V2 events regardless of
4875** level 2 or 4".
4876**
4877*/
4878static int
4879igb_hwtstamp_ioctl(struct adapter *adapter, struct ifreq *ifr)
4880{
4881	struct e1000_hw *hw = &adapter->hw;
4882	struct hwtstamp_ctrl *config;
4883	u32 tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4884	u32 tsync_rx_ctl_bit = E1000_TSYNCRXCTL_ENABLED;
4885	u32 tsync_rx_ctl_type = 0;
4886	u32 tsync_rx_cfg = 0;
4887	int is_l4 = 0;
4888	int is_l2 = 0;
4889	u16 port = 319; /* PTP */
4890	u32 regval;
4891
4892	config = (struct hwtstamp_ctrl *) ifr->ifr_data;
4893
4894	/* reserved for future extensions */
4895	if (config->flags)
4896		return (EINVAL);
4897
4898	switch (config->tx_type) {
4899	case HWTSTAMP_TX_OFF:
4900		tsync_tx_ctl_bit = 0;
4901		break;
4902	case HWTSTAMP_TX_ON:
4903		tsync_tx_ctl_bit = E1000_TSYNCTXCTL_ENABLED;
4904		break;
4905	default:
4906		return (ERANGE);
4907	}
4908
4909	switch (config->rx_filter) {
4910	case HWTSTAMP_FILTER_NONE:
4911		tsync_rx_ctl_bit = 0;
4912		break;
4913	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
4914	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
4915	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
4916	case HWTSTAMP_FILTER_ALL:
4917		/*
4918		 * register TSYNCRXCFG must be set, therefore it is not
4919		 * possible to time stamp both Sync and Delay_Req messages
4920		 * => fall back to time stamping all packets
4921		 */
4922		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_ALL;
4923		config->rx_filter = HWTSTAMP_FILTER_ALL;
4924		break;
4925	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
4926		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4927		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
4928		is_l4 = 1;
4929		break;
4930	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
4931		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L4_V1;
4932		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
4933		is_l4 = 1;
4934		break;
4935	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
4936	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
4937		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4938		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
4939		is_l2 = 1;
4940		is_l4 = 1;
4941		config->rx_filter = HWTSTAMP_FILTER_SOME;
4942		break;
4943	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
4944	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
4945		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
4946		tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
4947		is_l2 = 1;
4948		is_l4 = 1;
4949		config->rx_filter = HWTSTAMP_FILTER_SOME;
4950		break;
4951	case HWTSTAMP_FILTER_PTP_V2_EVENT:
4952	case HWTSTAMP_FILTER_PTP_V2_SYNC:
4953	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
4954		tsync_rx_ctl_type = E1000_TSYNCRXCTL_TYPE_EVENT_V2;
4955		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
4956		is_l2 = 1;
4957		break;
4958	default:
4959		return -ERANGE;
4960	}
4961
4962	/* enable/disable TX */
4963	regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL);
4964	regval = (regval & ~E1000_TSYNCTXCTL_ENABLED) | tsync_tx_ctl_bit;
4965	E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval);
4966
4967	/* enable/disable RX, define which PTP packets are time stamped */
4968	regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL);
4969	regval = (regval & ~E1000_TSYNCRXCTL_ENABLED) | tsync_rx_ctl_bit;
4970	regval = (regval & ~0xE) | tsync_rx_ctl_type;
4971	E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval);
4972	E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg);
4973
4974	/*
4975	 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7
4976	 *                                          (Ethertype to filter on)
4977	 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
4978	 * Ethertype Filter Queue Filter[0][30] = 0x1 (Enable Timestamping)
4979	 */
4980	E1000_WRITE_REG(hw, E1000_ETQF0, is_l2 ? 0x440088f7 : 0);
4981
4982	/* L4 Queue Filter[0]: only filter by source and destination port */
4983	E1000_WRITE_REG(hw, E1000_SPQF0, htons(port));
4984	E1000_WRITE_REG(hw, E1000_IMIREXT(0), is_l4 ?
4985	     ((1<<12) | (1<<19) /* bypass size and control flags */) : 0);
4986	E1000_WRITE_REG(hw, E1000_IMIR(0), is_l4 ?
4987	     (htons(port)
4988	      | (0<<16) /* immediate interrupt disabled */
4989	      | 0 /* (1<<17) bit cleared: do not bypass
4990		     destination port check */)
4991		: 0);
4992	E1000_WRITE_REG(hw, E1000_FTQF0, is_l4 ?
4993	     (0x11 /* UDP */
4994	      | (1<<15) /* VF not compared */
4995	      | (1<<27) /* Enable Timestamping */
4996	      | (7<<28) /* only source port filter enabled,
4997			   source/target address and protocol
4998			   masked */)
4999	     : ((1<<15) | (15<<28) /* all mask bits set = filter not
5000				      enabled */));
5001
5002	wrfl();
5003
5004	adapter->hwtstamp_ctrl = config;
5005
5006	/* clear TX/RX time stamp registers, just to be sure */
5007	regval = E1000_READ_REG(hw, E1000_TXSTMPH);
5008	regval = E1000_READ_REG(hw, E1000_RXSTMPH);
5009
5010	return (error);
5011}
5012
5013/*
5014** igb_read_clock - read raw cycle counter (to be used by time counter)
5015*/
5016static cycle_t igb_read_clock(const struct cyclecounter *tc)
5017{
5018       struct igb_adapter *adapter =
5019               container_of(tc, struct igb_adapter, cycles);
5020       struct e1000_hw *hw = &adapter->hw;
5021       u64 stamp;
5022
5023       stamp =  E1000_READ_REG(hw, E1000_SYSTIML);
5024       stamp |= (u64)E1000_READ_REG(hw, E1000_SYSTIMH) << 32ULL;
5025
5026       return (stamp);
5027}
5028
5029#endif /* IGB_IEEE1588 */
5030