if_igb.c revision 185353
1/******************************************************************************
2
3  Copyright (c) 2001-2008, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 185353 2008-11-26 23:57:23Z jfv $*/
34
35#ifdef HAVE_KERNEL_OPTION_HEADERS
36#include "opt_device_polling.h"
37#include "opt_inet.h"
38#endif
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/endian.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/malloc.h>
47#include <sys/mbuf.h>
48#include <sys/module.h>
49#include <sys/rman.h>
50#include <sys/socket.h>
51#include <sys/sockio.h>
52#include <sys/sysctl.h>
53#include <sys/taskqueue.h>
54#include <sys/eventhandler.h>
55#include <sys/pcpu.h>
56#ifdef IGB_TIMESYNC
57#include <sys/ioccom.h>
58#include <sys/time.h>
59#endif
60#include <machine/bus.h>
61#include <machine/resource.h>
62
63#include <net/bpf.h>
64#include <net/ethernet.h>
65#include <net/if.h>
66#include <net/if_arp.h>
67#include <net/if_dl.h>
68#include <net/if_media.h>
69
70#include <net/if_types.h>
71#include <net/if_vlan_var.h>
72
73#include <netinet/in_systm.h>
74#include <netinet/in.h>
75#include <netinet/if_ether.h>
76#include <netinet/ip.h>
77#include <netinet/ip6.h>
78#include <netinet/tcp.h>
79#include <netinet/tcp_lro.h>
80#include <netinet/udp.h>
81
82#include <machine/in_cksum.h>
83#include <dev/pci/pcivar.h>
84#include <dev/pci/pcireg.h>
85
86#include "e1000_api.h"
87#include "e1000_82575.h"
88#include "if_igb.h"
89
90/*********************************************************************
91 *  Set this to one to display debug statistics
92 *********************************************************************/
93int	igb_display_debug_stats = 0;
94
95/*********************************************************************
96 *  Driver version:
97 *********************************************************************/
98char igb_driver_version[] = "version - 1.4.1";
99
100
101/*********************************************************************
102 *  PCI Device ID Table
103 *
104 *  Used by probe to select devices to load on
105 *  Last field stores an index into e1000_strings
106 *  Last entry must be all 0s
107 *
108 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
109 *********************************************************************/
110
111static igb_vendor_info_t igb_vendor_info_array[] =
112{
113	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
115						PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
117						PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
120	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
121	/* required last entry */
122	{ 0, 0, 0, 0, 0}
123};
124
125/*********************************************************************
126 *  Table of branding strings for all supported NICs.
127 *********************************************************************/
128
129static char *igb_strings[] = {
130	"Intel(R) PRO/1000 Network Connection"
131};
132
133/*********************************************************************
134 *  Function prototypes
135 *********************************************************************/
136static int	igb_probe(device_t);
137static int	igb_attach(device_t);
138static int	igb_detach(device_t);
139static int	igb_shutdown(device_t);
140static int	igb_suspend(device_t);
141static int	igb_resume(device_t);
142static void	igb_start(struct ifnet *);
143static void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
144static int	igb_ioctl(struct ifnet *, u_long, caddr_t);
145static void	igb_watchdog(struct adapter *);
146static void	igb_init(void *);
147static void	igb_init_locked(struct adapter *);
148static void	igb_stop(void *);
149static void	igb_media_status(struct ifnet *, struct ifmediareq *);
150static int	igb_media_change(struct ifnet *);
151static void	igb_identify_hardware(struct adapter *);
152static int	igb_allocate_pci_resources(struct adapter *);
153static int	igb_allocate_msix(struct adapter *);
154static int	igb_allocate_legacy(struct adapter *);
155static int	igb_setup_msix(struct adapter *);
156static void	igb_free_pci_resources(struct adapter *);
157static void	igb_local_timer(void *);
158static int	igb_hardware_init(struct adapter *);
159static void	igb_setup_interface(device_t, struct adapter *);
160static int	igb_allocate_queues(struct adapter *);
161static void	igb_configure_queues(struct adapter *);
162
163static int	igb_allocate_transmit_buffers(struct tx_ring *);
164static void	igb_setup_transmit_structures(struct adapter *);
165static void	igb_setup_transmit_ring(struct tx_ring *);
166static void	igb_initialize_transmit_units(struct adapter *);
167static void	igb_free_transmit_structures(struct adapter *);
168static void	igb_free_transmit_buffers(struct tx_ring *);
169
170static int	igb_allocate_receive_buffers(struct rx_ring *);
171static int	igb_setup_receive_structures(struct adapter *);
172static int	igb_setup_receive_ring(struct rx_ring *);
173static void	igb_initialize_receive_units(struct adapter *);
174static void	igb_free_receive_structures(struct adapter *);
175static void	igb_free_receive_buffers(struct rx_ring *);
176
177static void	igb_enable_intr(struct adapter *);
178static void	igb_disable_intr(struct adapter *);
179static void	igb_update_stats_counters(struct adapter *);
180static bool	igb_txeof(struct tx_ring *);
181static bool	igb_rxeof(struct rx_ring *, int);
182#ifndef __NO_STRICT_ALIGNMENT
183static int	igb_fixup_rx(struct rx_ring *);
184#endif
185static void	igb_rx_checksum(u32, struct mbuf *);
186static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
187static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
188static void	igb_set_promisc(struct adapter *);
189static void	igb_disable_promisc(struct adapter *);
190static void	igb_set_multi(struct adapter *);
191static void	igb_print_hw_stats(struct adapter *);
192static void	igb_update_link_status(struct adapter *);
193static int	igb_get_buf(struct rx_ring *, int);
194
195static void	igb_register_vlan(void *, struct ifnet *, u16);
196static void	igb_unregister_vlan(void *, struct ifnet *, u16);
197
198static int	igb_xmit(struct tx_ring *, struct mbuf **);
199static int	igb_dma_malloc(struct adapter *, bus_size_t,
200		    struct igb_dma_alloc *, int);
201static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
202static void	igb_print_debug_info(struct adapter *);
203static void	igb_print_nvm_info(struct adapter *);
204static int 	igb_is_valid_ether_addr(u8 *);
205static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
206static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
207/* Management and WOL Support */
208static void	igb_init_manageability(struct adapter *);
209static void	igb_release_manageability(struct adapter *);
210static void     igb_get_hw_control(struct adapter *);
211static void     igb_release_hw_control(struct adapter *);
212static void     igb_enable_wakeup(device_t);
213
214#ifdef IGB_TIMESYNC
215/* Precision Time sync support */
216static int igb_tsync_init(struct adapter *);
217static void igb_tsync_disable(struct adapter *);
218#endif
219
220static int	igb_irq_fast(void *);
221static void	igb_add_rx_process_limit(struct adapter *, const char *,
222		    const char *, int *, int);
223static void	igb_handle_rxtx(void *context, int pending);
224static void	igb_handle_tx(void *context, int pending);
225static void	igb_handle_rx(void *context, int pending);
226static void	igb_handle_link(void *context, int pending);
227
228/* These are MSIX only irq handlers */
229static void	igb_msix_rx(void *);
230static void	igb_msix_tx(void *);
231static void	igb_msix_link(void *);
232
233/* Adaptive Interrupt Moderation */
234static void	igb_update_aim(struct rx_ring *);
235
236/*********************************************************************
237 *  FreeBSD Device Interface Entry Points
238 *********************************************************************/
239
240static device_method_t igb_methods[] = {
241	/* Device interface */
242	DEVMETHOD(device_probe, igb_probe),
243	DEVMETHOD(device_attach, igb_attach),
244	DEVMETHOD(device_detach, igb_detach),
245	DEVMETHOD(device_shutdown, igb_shutdown),
246	DEVMETHOD(device_suspend, igb_suspend),
247	DEVMETHOD(device_resume, igb_resume),
248	{0, 0}
249};
250
251static driver_t igb_driver = {
252	"igb", igb_methods, sizeof(struct adapter),
253};
254
255static devclass_t igb_devclass;
256DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
257MODULE_DEPEND(igb, pci, 1, 1, 1);
258MODULE_DEPEND(igb, ether, 1, 1, 1);
259
260/*********************************************************************
261 *  Tunable default values.
262 *********************************************************************/
263
264/* Descriptor defaults */
265static int igb_rxd = IGB_DEFAULT_RXD;
266static int igb_txd = IGB_DEFAULT_TXD;
267TUNABLE_INT("hw.igb.rxd", &igb_rxd);
268TUNABLE_INT("hw.igb.txd", &igb_txd);
269
270/*
271** These parameters are used in Adaptive
272** Interrupt Moderation. The value is set
273** into EITR and controls the interrupt
274** frequency. They can be modified but
275** be careful in tuning them.
276*/
277static int igb_enable_aim = TRUE;
278TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
279static int igb_low_latency = IGB_LOW_LATENCY;
280TUNABLE_INT("hw.igb.low_latency", &igb_low_latency);
281static int igb_ave_latency = IGB_AVE_LATENCY;
282TUNABLE_INT("hw.igb.ave_latency", &igb_low_latency);
283static int igb_bulk_latency = IGB_BULK_LATENCY;
284TUNABLE_INT("hw.igb.bulk_latency", &igb_bulk_latency);
285
286/*
287** IF YOU CHANGE THESE: be sure and change IGB_MSIX_VEC in
288** if_igb.h to match. These can be autoconfigured if set to
289** 0, it will then be based on number of cpus.
290*/
291static int igb_tx_queues = 1;
292static int igb_rx_queues = 4;
293TUNABLE_INT("hw.igb.tx_queues", &igb_tx_queues);
294TUNABLE_INT("hw.igb.rx_queues", &igb_rx_queues);
295
296/* How many packets rxeof tries to clean at a time */
297static int igb_rx_process_limit = 100;
298TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
299
300/* Flow control setting - default to none */
301static int igb_fc_setting = 0;
302TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
303
304/*
305 * Should the driver do LRO on the RX end
306 *  this can be toggled on the fly, but the
307 *  interface must be reset (down/up) for it
308 *  to take effect.
309 */
310static int igb_enable_lro = 1;
311TUNABLE_INT("hw.igb.enable_lro", &igb_enable_lro);
312
313extern int mp_ncpus;
314/*********************************************************************
315 *  Device identification routine
316 *
317 *  igb_probe determines if the driver should be loaded on
318 *  adapter based on PCI vendor/device id of the adapter.
319 *
320 *  return BUS_PROBE_DEFAULT on success, positive on failure
321 *********************************************************************/
322
323static int
324igb_probe(device_t dev)
325{
326	char		adapter_name[60];
327	uint16_t	pci_vendor_id = 0;
328	uint16_t	pci_device_id = 0;
329	uint16_t	pci_subvendor_id = 0;
330	uint16_t	pci_subdevice_id = 0;
331	igb_vendor_info_t *ent;
332
333	INIT_DEBUGOUT("igb_probe: begin");
334
335	pci_vendor_id = pci_get_vendor(dev);
336	if (pci_vendor_id != IGB_VENDOR_ID)
337		return (ENXIO);
338
339	pci_device_id = pci_get_device(dev);
340	pci_subvendor_id = pci_get_subvendor(dev);
341	pci_subdevice_id = pci_get_subdevice(dev);
342
343	ent = igb_vendor_info_array;
344	while (ent->vendor_id != 0) {
345		if ((pci_vendor_id == ent->vendor_id) &&
346		    (pci_device_id == ent->device_id) &&
347
348		    ((pci_subvendor_id == ent->subvendor_id) ||
349		    (ent->subvendor_id == PCI_ANY_ID)) &&
350
351		    ((pci_subdevice_id == ent->subdevice_id) ||
352		    (ent->subdevice_id == PCI_ANY_ID))) {
353			sprintf(adapter_name, "%s %s",
354				igb_strings[ent->index],
355				igb_driver_version);
356			device_set_desc_copy(dev, adapter_name);
357			return (BUS_PROBE_DEFAULT);
358		}
359		ent++;
360	}
361
362	return (ENXIO);
363}
364
365/*********************************************************************
366 *  Device initialization routine
367 *
368 *  The attach entry point is called when the driver is being loaded.
369 *  This routine identifies the type of hardware, allocates all resources
370 *  and initializes the hardware.
371 *
372 *  return 0 on success, positive on failure
373 *********************************************************************/
374
375static int
376igb_attach(device_t dev)
377{
378	struct adapter	*adapter;
379	int		error = 0;
380	u16		eeprom_data;
381
382	INIT_DEBUGOUT("igb_attach: begin");
383
384	adapter = device_get_softc(dev);
385	adapter->dev = adapter->osdep.dev = dev;
386	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
387
388	/* SYSCTL stuff */
389	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
390	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
391	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
392	    igb_sysctl_debug_info, "I", "Debug Information");
393
394	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
395	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
396	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
397	    igb_sysctl_stats, "I", "Statistics");
398
399	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
400	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
401	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
402	    &igb_fc_setting, 0, "Flow Control");
403
404	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
405	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
406	    OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW,
407	    &igb_enable_lro, 0, "Large Receive Offload");
408
409	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
410	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
411	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
412	    &igb_enable_aim, 1, "Interrupt Moderation");
413
414	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
415	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
416	    OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
417	    &igb_low_latency, 1, "Low Latency");
418
419	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
420	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
421	    OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
422	    &igb_ave_latency, 1, "Average Latency");
423
424	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
425	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
426	    OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
427	    &igb_bulk_latency, 1, "Bulk Latency");
428
429	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
430
431	/* Determine hardware and mac info */
432	igb_identify_hardware(adapter);
433
434	/* Setup PCI resources */
435	if (igb_allocate_pci_resources(adapter)) {
436		device_printf(dev, "Allocation of PCI resources failed\n");
437		error = ENXIO;
438		goto err_pci;
439	}
440
441	/* Do Shared Code initialization */
442	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
443		device_printf(dev, "Setup of Shared code failed\n");
444		error = ENXIO;
445		goto err_pci;
446	}
447
448	e1000_get_bus_info(&adapter->hw);
449
450	/* Sysctls for limiting the amount of work done in the taskqueue */
451	igb_add_rx_process_limit(adapter, "rx_processing_limit",
452	    "max number of rx packets to process", &adapter->rx_process_limit,
453	    igb_rx_process_limit);
454
455	/*
456	 * Validate number of transmit and receive descriptors. It
457	 * must not exceed hardware maximum, and must be multiple
458	 * of E1000_DBA_ALIGN.
459	 */
460	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
461	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
462		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
463		    IGB_DEFAULT_TXD, igb_txd);
464		adapter->num_tx_desc = IGB_DEFAULT_TXD;
465	} else
466		adapter->num_tx_desc = igb_txd;
467	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
468	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
469		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
470		    IGB_DEFAULT_RXD, igb_rxd);
471		adapter->num_rx_desc = IGB_DEFAULT_RXD;
472	} else
473		adapter->num_rx_desc = igb_rxd;
474
475	adapter->hw.mac.autoneg = DO_AUTO_NEG;
476	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
477	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
478	adapter->rx_buffer_len = 2048;
479
480	/* Copper options */
481	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
482		adapter->hw.phy.mdix = AUTO_ALL_MODES;
483		adapter->hw.phy.disable_polarity_correction = FALSE;
484		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
485	}
486
487	/*
488	 * Set the frame limits assuming
489	 * standard ethernet sized frames.
490	 */
491	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
492	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
493
494	/*
495	** Allocate and Setup Queues
496	*/
497	if (igb_allocate_queues(adapter)) {
498		error = ENOMEM;
499		goto err_pci;
500	}
501
502	/* Make sure we have a good EEPROM before we read from it */
503	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
504		/*
505		** Some PCI-E parts fail the first check due to
506		** the link being in sleep state, call it again,
507		** if it fails a second time its a real issue.
508		*/
509		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
510			device_printf(dev,
511			    "The EEPROM Checksum Is Not Valid\n");
512			error = EIO;
513			goto err_late;
514		}
515	}
516
517	/* Initialize the hardware */
518	if (igb_hardware_init(adapter)) {
519		device_printf(dev, "Unable to initialize the hardware\n");
520		error = EIO;
521		goto err_late;
522	}
523
524	/* Copy the permanent MAC address out of the EEPROM */
525	if (e1000_read_mac_addr(&adapter->hw) < 0) {
526		device_printf(dev, "EEPROM read error while reading MAC"
527		    " address\n");
528		error = EIO;
529		goto err_late;
530	}
531
532	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
533		device_printf(dev, "Invalid MAC address\n");
534		error = EIO;
535		goto err_late;
536	}
537
538	/*
539	** Configure Interrupts
540	*/
541	if (adapter->msix > 1) /* MSIX */
542		error = igb_allocate_msix(adapter);
543	else /* MSI or Legacy */
544		error = igb_allocate_legacy(adapter);
545	if (error)
546		goto err_late;
547
548	/* Setup OS specific network interface */
549	igb_setup_interface(dev, adapter);
550
551	/* Initialize statistics */
552	igb_update_stats_counters(adapter);
553
554	adapter->hw.mac.get_link_status = 1;
555	igb_update_link_status(adapter);
556
557	/* Indicate SOL/IDER usage */
558	if (e1000_check_reset_block(&adapter->hw))
559		device_printf(dev,
560		    "PHY reset is blocked due to SOL/IDER session.\n");
561
562	/* Determine if we have to control management hardware */
563	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
564
565	/*
566	 * Setup Wake-on-Lan
567	 */
568	/* APME bit in EEPROM is mapped to WUC.APME */
569	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
570	if (eeprom_data)
571		adapter->wol = E1000_WUFC_MAG;
572
573	/* Register for VLAN events */
574	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
575	     igb_register_vlan, 0, EVENTHANDLER_PRI_FIRST);
576	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
577	     igb_unregister_vlan, 0, EVENTHANDLER_PRI_FIRST);
578
579	/* Tell the stack that the interface is not active */
580	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
581
582	INIT_DEBUGOUT("igb_attach: end");
583
584	return (0);
585
586err_late:
587	igb_free_transmit_structures(adapter);
588	igb_free_receive_structures(adapter);
589	igb_release_hw_control(adapter);
590err_pci:
591	igb_free_pci_resources(adapter);
592	IGB_CORE_LOCK_DESTROY(adapter);
593
594	return (error);
595}
596
597/*********************************************************************
598 *  Device removal routine
599 *
600 *  The detach entry point is called when the driver is being removed.
601 *  This routine stops the adapter and deallocates all the resources
602 *  that were allocated for driver operation.
603 *
604 *  return 0 on success, positive on failure
605 *********************************************************************/
606
607static int
608igb_detach(device_t dev)
609{
610	struct adapter	*adapter = device_get_softc(dev);
611	struct ifnet	*ifp = adapter->ifp;
612
613	INIT_DEBUGOUT("igb_detach: begin");
614
615	/* Make sure VLANS are not using driver */
616	if (adapter->ifp->if_vlantrunk != NULL) {
617		device_printf(dev,"Vlan in use, detach first\n");
618		return (EBUSY);
619	}
620
621	IGB_CORE_LOCK(adapter);
622	adapter->in_detach = 1;
623	igb_stop(adapter);
624	IGB_CORE_UNLOCK(adapter);
625
626	e1000_phy_hw_reset(&adapter->hw);
627
628	/* Give control back to firmware */
629	igb_release_manageability(adapter);
630	igb_release_hw_control(adapter);
631
632	if (adapter->wol) {
633		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
634		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
635		igb_enable_wakeup(dev);
636	}
637
638	/* Unregister VLAN events */
639	if (adapter->vlan_attach != NULL)
640		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
641	if (adapter->vlan_detach != NULL)
642		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
643
644	ether_ifdetach(adapter->ifp);
645
646	callout_drain(&adapter->timer);
647
648	igb_free_pci_resources(adapter);
649	bus_generic_detach(dev);
650	if_free(ifp);
651
652	igb_free_transmit_structures(adapter);
653	igb_free_receive_structures(adapter);
654
655	IGB_CORE_LOCK_DESTROY(adapter);
656
657	return (0);
658}
659
660/*********************************************************************
661 *
662 *  Shutdown entry point
663 *
664 **********************************************************************/
665
666static int
667igb_shutdown(device_t dev)
668{
669	return igb_suspend(dev);
670}
671
672/*
673 * Suspend/resume device methods.
674 */
675static int
676igb_suspend(device_t dev)
677{
678	struct adapter *adapter = device_get_softc(dev);
679
680	IGB_CORE_LOCK(adapter);
681
682	igb_stop(adapter);
683
684        igb_release_manageability(adapter);
685	igb_release_hw_control(adapter);
686
687        if (adapter->wol) {
688                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
689                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
690                igb_enable_wakeup(dev);
691        }
692
693	IGB_CORE_UNLOCK(adapter);
694
695	return bus_generic_suspend(dev);
696}
697
698static int
699igb_resume(device_t dev)
700{
701	struct adapter *adapter = device_get_softc(dev);
702	struct ifnet *ifp = adapter->ifp;
703
704	IGB_CORE_LOCK(adapter);
705	igb_init_locked(adapter);
706	igb_init_manageability(adapter);
707
708	if ((ifp->if_flags & IFF_UP) &&
709	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
710		igb_start(ifp);
711
712	IGB_CORE_UNLOCK(adapter);
713
714	return bus_generic_resume(dev);
715}
716
717
718/*********************************************************************
719 *  Transmit entry point
720 *
721 *  igb_start is called by the stack to initiate a transmit.
722 *  The driver will remain in this routine as long as there are
723 *  packets to transmit and transmit resources are available.
724 *  In case resources are not available stack is notified and
725 *  the packet is requeued.
726 **********************************************************************/
727
728static void
729igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
730{
731	struct adapter	*adapter = ifp->if_softc;
732	struct mbuf	*m_head;
733
734	IGB_TX_LOCK_ASSERT(txr);
735
736	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
737	    IFF_DRV_RUNNING)
738		return;
739	if (!adapter->link_active)
740		return;
741
742	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
743
744		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
745		if (m_head == NULL)
746			break;
747		/*
748		 *  Encapsulation can modify our pointer, and or make it
749		 *  NULL on failure.  In that event, we can't requeue.
750		 */
751		if (igb_xmit(txr, &m_head)) {
752			if (m_head == NULL)
753				break;
754			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
755			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
756			break;
757		}
758
759		/* Send a copy of the frame to the BPF listener */
760		ETHER_BPF_MTAP(ifp, m_head);
761
762		/* Set timeout in case hardware has problems transmitting. */
763		txr->watchdog_timer = IGB_TX_TIMEOUT;
764	}
765}
766
767static void
768igb_start(struct ifnet *ifp)
769{
770	struct adapter	*adapter = ifp->if_softc;
771	struct tx_ring	*txr;
772	u32		queue = 0;
773
774	/*
775	** This is really just here for testing
776	** TX multiqueue, ultimately what is
777	** needed is the flow support in the stack
778	** and appropriate logic here to deal with
779	** it. -jfv
780	*/
781	if (adapter->num_tx_queues > 1)
782		queue = (curcpu % adapter->num_tx_queues);
783
784	txr = &adapter->tx_rings[queue];
785	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
786		IGB_TX_LOCK(txr);
787		igb_start_locked(txr, ifp);
788		IGB_TX_UNLOCK(txr);
789	}
790}
791
792/*********************************************************************
793 *  Ioctl entry point
794 *
795 *  igb_ioctl is called when the user wants to configure the
796 *  interface.
797 *
798 *  return 0 on success, positive on failure
799 **********************************************************************/
800
801static int
802igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
803{
804	struct adapter	*adapter = ifp->if_softc;
805	struct ifreq *ifr = (struct ifreq *)data;
806#ifdef INET
807	struct ifaddr *ifa = (struct ifaddr *)data;
808#endif
809	int error = 0;
810
811	if (adapter->in_detach)
812		return (error);
813
814	switch (command) {
815	case SIOCSIFADDR:
816#ifdef INET
817		if (ifa->ifa_addr->sa_family == AF_INET) {
818			/*
819			 * XXX
820			 * Since resetting hardware takes a very long time
821			 * and results in link renegotiation we only
822			 * initialize the hardware only when it is absolutely
823			 * required.
824			 */
825			ifp->if_flags |= IFF_UP;
826			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
827				IGB_CORE_LOCK(adapter);
828				igb_init_locked(adapter);
829				IGB_CORE_UNLOCK(adapter);
830			}
831			arp_ifinit(ifp, ifa);
832		} else
833#endif
834			error = ether_ioctl(ifp, command, data);
835		break;
836	case SIOCSIFMTU:
837	    {
838		int max_frame_size;
839
840		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
841
842		IGB_CORE_LOCK(adapter);
843		max_frame_size = 9234;
844		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
845		    ETHER_CRC_LEN) {
846			IGB_CORE_UNLOCK(adapter);
847			error = EINVAL;
848			break;
849		}
850
851		ifp->if_mtu = ifr->ifr_mtu;
852		adapter->max_frame_size =
853		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
854		igb_init_locked(adapter);
855		IGB_CORE_UNLOCK(adapter);
856		break;
857	    }
858	case SIOCSIFFLAGS:
859		IOCTL_DEBUGOUT("ioctl rcv'd:\
860		    SIOCSIFFLAGS (Set Interface Flags)");
861		IGB_CORE_LOCK(adapter);
862		if (ifp->if_flags & IFF_UP) {
863			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
864				if ((ifp->if_flags ^ adapter->if_flags) &
865				    (IFF_PROMISC | IFF_ALLMULTI)) {
866					igb_disable_promisc(adapter);
867					igb_set_promisc(adapter);
868				}
869			} else
870				igb_init_locked(adapter);
871		} else
872			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
873				igb_stop(adapter);
874		adapter->if_flags = ifp->if_flags;
875		IGB_CORE_UNLOCK(adapter);
876		break;
877	case SIOCADDMULTI:
878	case SIOCDELMULTI:
879		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
880		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
881			IGB_CORE_LOCK(adapter);
882			igb_disable_intr(adapter);
883			igb_set_multi(adapter);
884				igb_enable_intr(adapter);
885			IGB_CORE_UNLOCK(adapter);
886		}
887		break;
888	case SIOCSIFMEDIA:
889		/* Check SOL/IDER usage */
890		IGB_CORE_LOCK(adapter);
891		if (e1000_check_reset_block(&adapter->hw)) {
892			IGB_CORE_UNLOCK(adapter);
893			device_printf(adapter->dev, "Media change is"
894			    " blocked due to SOL/IDER session.\n");
895			break;
896		}
897		IGB_CORE_UNLOCK(adapter);
898	case SIOCGIFMEDIA:
899		IOCTL_DEBUGOUT("ioctl rcv'd: \
900		    SIOCxIFMEDIA (Get/Set Interface Media)");
901		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
902		break;
903	case SIOCSIFCAP:
904	    {
905		int mask, reinit;
906
907		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
908		reinit = 0;
909		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
910		if (mask & IFCAP_HWCSUM) {
911			ifp->if_capenable ^= IFCAP_HWCSUM;
912			reinit = 1;
913		}
914		if (mask & IFCAP_TSO4) {
915			ifp->if_capenable ^= IFCAP_TSO4;
916			reinit = 1;
917		}
918		if (mask & IFCAP_VLAN_HWTAGGING) {
919			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
920			reinit = 1;
921		}
922		if (mask & IFCAP_VLAN_HWFILTER) {
923			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
924			reinit = 1;
925		}
926		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
927			igb_init(adapter);
928		VLAN_CAPABILITIES(ifp);
929		break;
930	    }
931
932#ifdef IGB_TIMESYNC
933	/*
934	** IOCTL support for Precision Time (IEEE 1588) Support
935	*/
936	case IGB_TIMESYNC_READTS:
937	    {
938		u32 rx_ctl, tx_ctl;
939		struct igb_tsync_read *tdata;
940
941		tdata = (struct igb_tsync_read *) ifr->ifr_data;
942
943		if (tdata->read_current_time) {
944			getnanotime(&tdata->system_time);
945			tdata->network_time = E1000_READ_REG(&adapter->hw,
946			    E1000_SYSTIML);
947			tdata->network_time |=
948			    (u64)E1000_READ_REG(&adapter->hw,
949			    E1000_SYSTIMH ) << 32;
950                }
951
952		rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
953		tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
954
955		if (rx_ctl & 0x1) {
956			u32 tmp;
957			unsigned char *tmp_cp;
958
959			tdata->rx_valid = 1;
960			tdata->rx_stamp = E1000_READ_REG(&adapter->hw, E1000_RXSTMPL);
961			tdata->rx_stamp |= (u64)E1000_READ_REG(&adapter->hw,
962			    E1000_RXSTMPH) << 32;
963
964			tmp = E1000_READ_REG(&adapter->hw, E1000_RXSATRL);
965			tmp_cp = (unsigned char *) &tmp;
966			tdata->srcid[0] = tmp_cp[0];
967			tdata->srcid[1] = tmp_cp[1];
968			tdata->srcid[2] = tmp_cp[2];
969			tdata->srcid[3] = tmp_cp[3];
970			tmp = E1000_READ_REG(&adapter->hw, E1000_RXSATRH);
971			tmp_cp = (unsigned char *) &tmp;
972			tdata->srcid[4] = tmp_cp[0];
973			tdata->srcid[5] = tmp_cp[1];
974			tdata->seqid = tmp >> 16;
975			tdata->seqid = htons(tdata->seqid);
976		} else
977			tdata->rx_valid = 0;
978
979		if (tx_ctl & 0x1) {
980			tdata->tx_valid = 1;
981			tdata->tx_stamp = E1000_READ_REG(&adapter->hw, E1000_TXSTMPL);
982			tdata->tx_stamp |= (u64) E1000_READ_REG(&adapter->hw,
983			    E1000_TXSTMPH) << 32;
984		} else
985			tdata->tx_valid = 0;
986
987		return (0);
988	    }
989#endif	/* IGB_TIMESYNC */
990
991	default:
992		error = ether_ioctl(ifp, command, data);
993		break;
994	}
995
996	return (error);
997}
998
999/*********************************************************************
1000 *  Watchdog timer:
1001 *
1002 *  This routine is called from the local timer every second.
1003 *  As long as transmit descriptors are being cleaned the value
1004 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1005 *  and we then reset the device.
1006 *
1007 **********************************************************************/
1008
1009static void
1010igb_watchdog(struct adapter *adapter)
1011{
1012	struct tx_ring	*txr = adapter->tx_rings;
1013	bool		tx_hang = FALSE;
1014
1015	IGB_CORE_LOCK_ASSERT(adapter);
1016
1017	/*
1018	** The timer is set to 5 every time start() queues a packet.
1019	** Then txeof keeps resetting it as long as it cleans at
1020	** least one descriptor.
1021	** Finally, anytime all descriptors are clean the timer is
1022	** set to 0.
1023	**
1024	** With TX Multiqueue we need to check every queue's timer,
1025	** if any time out we do the reset.
1026	*/
1027	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1028		IGB_TX_LOCK(txr);
1029		if (txr->watchdog_timer == 0 ||
1030		    (--txr->watchdog_timer)) {
1031			IGB_TX_UNLOCK(txr);
1032			continue;
1033		} else {
1034			tx_hang = TRUE;
1035			IGB_TX_UNLOCK(txr);
1036			break;
1037		}
1038	}
1039	if (tx_hang == FALSE)
1040		return;
1041
1042	/* If we are in this routine because of pause frames, then
1043	 * don't reset the hardware.
1044	 */
1045	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1046	    E1000_STATUS_TXOFF) {
1047		txr = adapter->tx_rings; /* reset pointer */
1048		for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1049			IGB_TX_LOCK(txr);
1050			txr->watchdog_timer = IGB_TX_TIMEOUT;
1051			IGB_TX_UNLOCK(txr);
1052		}
1053		return;
1054	}
1055
1056	if (e1000_check_for_link(&adapter->hw) == 0)
1057		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1058
1059	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1060		device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
1061		    i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
1062		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
1063		device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
1064		    " Next Desc to Clean = %d\n", i, txr->tx_avail,
1065		    txr->next_to_clean);
1066	}
1067
1068	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1069	adapter->watchdog_events++;
1070
1071	igb_init_locked(adapter);
1072}
1073
1074/*********************************************************************
1075 *  Init entry point
1076 *
1077 *  This routine is used in two ways. It is used by the stack as
1078 *  init entry point in network interface structure. It is also used
1079 *  by the driver as a hw/sw initialization routine to get to a
1080 *  consistent state.
1081 *
1082 *  return 0 on success, positive on failure
1083 **********************************************************************/
1084
1085static void
1086igb_init_locked(struct adapter *adapter)
1087{
1088	struct rx_ring *rxr = adapter->rx_rings;
1089	struct tx_ring *txr = adapter->tx_rings;
1090	struct ifnet	*ifp = adapter->ifp;
1091	device_t	dev = adapter->dev;
1092	u32		pba = 0;
1093
1094	INIT_DEBUGOUT("igb_init: begin");
1095
1096	IGB_CORE_LOCK_ASSERT(adapter);
1097
1098	igb_stop(adapter);
1099
1100	/*
1101	 * Packet Buffer Allocation (PBA)
1102	 * Writing PBA sets the receive portion of the buffer
1103	 * the remainder is used for the transmit buffer.
1104	 */
1105	if (adapter->hw.mac.type == e1000_82575) {
1106		INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1107		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1108		E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1109	}
1110
1111	/* Get the latest mac address, User can use a LAA */
1112        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1113              ETHER_ADDR_LEN);
1114
1115	/* Put the address into the Receive Address Array */
1116	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1117
1118	/* Initialize the hardware */
1119	if (igb_hardware_init(adapter)) {
1120		device_printf(dev, "Unable to initialize the hardware\n");
1121		return;
1122	}
1123	igb_update_link_status(adapter);
1124
1125	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1126
1127	/* Vlan's enabled but HW Filtering off */
1128	if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) &&
1129	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) {
1130		u32 ctrl;
1131		ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1132		ctrl |= E1000_CTRL_VME;
1133		E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1134	}
1135
1136	/* Set hardware offload abilities */
1137	ifp->if_hwassist = 0;
1138	if (ifp->if_capenable & IFCAP_TXCSUM)
1139		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1140	if (ifp->if_capenable & IFCAP_TSO4)
1141		ifp->if_hwassist |= CSUM_TSO;
1142
1143	/* Configure for OS presence */
1144	igb_init_manageability(adapter);
1145
1146	/* Prepare transmit descriptors and buffers */
1147	igb_setup_transmit_structures(adapter);
1148	igb_initialize_transmit_units(adapter);
1149
1150	/* Setup Multicast table */
1151	igb_set_multi(adapter);
1152
1153	/* Prepare receive descriptors and buffers */
1154	if (igb_setup_receive_structures(adapter)) {
1155		device_printf(dev, "Could not setup receive structures\n");
1156		igb_stop(adapter);
1157		return;
1158	}
1159	igb_initialize_receive_units(adapter);
1160
1161	/* Don't lose promiscuous settings */
1162	igb_set_promisc(adapter);
1163
1164	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1165	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1166
1167	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1168	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1169
1170	if (adapter->msix > 1) /* Set up queue routing */
1171		igb_configure_queues(adapter);
1172
1173        /* Set default RX interrupt moderation */
1174	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
1175		E1000_WRITE_REG(&adapter->hw,
1176		    E1000_EITR(rxr->msix), igb_ave_latency);
1177		rxr->eitr_setting = igb_ave_latency;
1178	}
1179
1180	/* Set TX interrupt rate & reset TX watchdog */
1181	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1182		E1000_WRITE_REG(&adapter->hw,
1183		    E1000_EITR(txr->msix), igb_ave_latency);
1184		txr->watchdog_timer = FALSE;
1185	}
1186
1187	/* this clears any pending interrupts */
1188	E1000_READ_REG(&adapter->hw, E1000_ICR);
1189	igb_enable_intr(adapter);
1190	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1191
1192#ifdef IGB_TIMESYNC
1193	/* Initialize IEEE 1588 Time sync if available */
1194	if (adapter->hw.mac.type == e1000_82576)
1195		igb_tsync_init(adapter);
1196#endif
1197
1198	/* Don't reset the phy next time init gets called */
1199	adapter->hw.phy.reset_disable = TRUE;
1200}
1201
1202static void
1203igb_init(void *arg)
1204{
1205	struct adapter *adapter = arg;
1206
1207	IGB_CORE_LOCK(adapter);
1208	igb_init_locked(adapter);
1209	IGB_CORE_UNLOCK(adapter);
1210}
1211
1212
1213static void
1214igb_handle_link(void *context, int pending)
1215{
1216	struct adapter	*adapter = context;
1217	struct ifnet *ifp;
1218
1219	ifp = adapter->ifp;
1220
1221	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1222		return;
1223
1224	IGB_CORE_LOCK(adapter);
1225	callout_stop(&adapter->timer);
1226	igb_update_link_status(adapter);
1227	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1228	IGB_CORE_UNLOCK(adapter);
1229}
1230
1231static void
1232igb_handle_rxtx(void *context, int pending)
1233{
1234	struct adapter	*adapter = context;
1235	struct tx_ring	*txr = adapter->tx_rings;
1236	struct rx_ring	*rxr = adapter->rx_rings;
1237	struct ifnet	*ifp;
1238
1239	ifp = adapter->ifp;
1240
1241	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1242		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1243			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1244		IGB_TX_LOCK(txr);
1245		igb_txeof(txr);
1246
1247		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1248			igb_start_locked(txr, ifp);
1249		IGB_TX_UNLOCK(txr);
1250	}
1251
1252	igb_enable_intr(adapter);
1253}
1254
1255static void
1256igb_handle_rx(void *context, int pending)
1257{
1258	struct rx_ring	*rxr = context;
1259	struct adapter	*adapter = rxr->adapter;
1260	struct ifnet	*ifp = adapter->ifp;
1261
1262	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1263		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1264			/* More to clean, schedule another task */
1265			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1266
1267}
1268
1269static void
1270igb_handle_tx(void *context, int pending)
1271{
1272	struct tx_ring	*txr = context;
1273	struct adapter	*adapter = txr->adapter;
1274	struct ifnet	*ifp = adapter->ifp;
1275
1276	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1277		IGB_TX_LOCK(txr);
1278		igb_txeof(txr);
1279		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1280			igb_start_locked(txr, ifp);
1281		IGB_TX_UNLOCK(txr);
1282	}
1283}
1284
1285
1286/*********************************************************************
1287 *
1288 *  MSI/Legacy Deferred
1289 *  Interrupt Service routine
1290 *
1291 *********************************************************************/
1292static int
1293igb_irq_fast(void *arg)
1294{
1295	struct adapter	*adapter = arg;
1296	uint32_t	reg_icr;
1297
1298
1299	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1300
1301	/* Hot eject?  */
1302	if (reg_icr == 0xffffffff)
1303		return FILTER_STRAY;
1304
1305	/* Definitely not our interrupt.  */
1306	if (reg_icr == 0x0)
1307		return FILTER_STRAY;
1308
1309	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1310		return FILTER_STRAY;
1311
1312	/*
1313	 * Mask interrupts until the taskqueue is finished running.  This is
1314	 * cheap, just assume that it is needed.  This also works around the
1315	 * MSI message reordering errata on certain systems.
1316	 */
1317	igb_disable_intr(adapter);
1318	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1319
1320	/* Link status change */
1321	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1322		adapter->hw.mac.get_link_status = 1;
1323		taskqueue_enqueue(adapter->tq, &adapter->link_task);
1324	}
1325
1326	if (reg_icr & E1000_ICR_RXO)
1327		adapter->rx_overruns++;
1328	return FILTER_HANDLED;
1329}
1330
1331
1332/*********************************************************************
1333 *
1334 *  MSIX TX Interrupt Service routine
1335 *
1336 **********************************************************************/
1337
1338static void
1339igb_msix_tx(void *arg)
1340{
1341	struct tx_ring *txr = arg;
1342	struct adapter *adapter = txr->adapter;
1343	struct ifnet	*ifp = adapter->ifp;
1344
1345	++txr->tx_irq;
1346	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1347		IGB_TX_LOCK(txr);
1348		igb_txeof(txr);
1349		IGB_TX_UNLOCK(txr);
1350		taskqueue_enqueue(adapter->tq, &txr->tx_task);
1351	}
1352	/* Reenable this interrupt */
1353	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1354	return;
1355}
1356
1357/*********************************************************************
1358 *
1359 *  MSIX RX Interrupt Service routine
1360 *
1361 **********************************************************************/
1362
1363static void
1364igb_msix_rx(void *arg)
1365{
1366	struct rx_ring *rxr = arg;
1367	struct adapter *adapter = rxr->adapter;
1368	u32		more, loop = 5;
1369
1370	++rxr->rx_irq;
1371	do {
1372		more = igb_rxeof(rxr, adapter->rx_process_limit);
1373	} while (loop-- || more != 0);
1374
1375	taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1376
1377	/* Update interrupt rate */
1378	if (igb_enable_aim == TRUE)
1379		igb_update_aim(rxr);
1380
1381	/* Reenable this interrupt */
1382	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1383	return;
1384}
1385
1386
1387/*
1388** Routine to adjust the RX EITR value based on traffic,
1389** its a simple three state model, but seems to help.
1390**
1391** Note that the three EITR values are tuneable using
1392** sysctl in real time. The feature can be effectively
1393** nullified by setting them equal.
1394*/
1395#define BULK_THRESHOLD	10000
1396#define AVE_THRESHOLD	1600
1397
1398static void
1399igb_update_aim(struct rx_ring *rxr)
1400{
1401	struct adapter	*adapter = rxr->adapter;
1402	u32		olditr, newitr;
1403
1404	/* Update interrupt moderation based on traffic */
1405	olditr = rxr->eitr_setting;
1406	newitr = olditr;
1407
1408	/* Idle, don't change setting */
1409	if (rxr->bytes == 0)
1410		return;
1411
1412	if (olditr == igb_low_latency) {
1413		if (rxr->bytes > AVE_THRESHOLD)
1414			newitr = igb_ave_latency;
1415	} else if (olditr == igb_ave_latency) {
1416		if (rxr->bytes < AVE_THRESHOLD)
1417			newitr = igb_low_latency;
1418		else if (rxr->bytes > BULK_THRESHOLD)
1419			newitr = igb_bulk_latency;
1420	} else if (olditr == igb_bulk_latency) {
1421		if (rxr->bytes < BULK_THRESHOLD)
1422			newitr = igb_ave_latency;
1423	}
1424
1425	if (olditr != newitr) {
1426		/* Change interrupt rate */
1427		rxr->eitr_setting = newitr;
1428		E1000_WRITE_REG(&adapter->hw, E1000_EITR(rxr->me),
1429		    newitr | (newitr << 16));
1430	}
1431
1432	rxr->bytes = 0;
1433        return;
1434}
1435
1436
1437/*********************************************************************
1438 *
1439 *  MSIX Link Interrupt Service routine
1440 *
1441 **********************************************************************/
1442
1443static void
1444igb_msix_link(void *arg)
1445{
1446	struct adapter	*adapter = arg;
1447	u32       	icr;
1448
1449	++adapter->link_irq;
1450	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1451	if (!(icr & E1000_ICR_LSC))
1452		goto spurious;
1453	adapter->hw.mac.get_link_status = 1;
1454	taskqueue_enqueue(adapter->tq, &adapter->link_task);
1455
1456spurious:
1457	/* Rearm */
1458	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1459	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1460	return;
1461}
1462
1463
1464/*********************************************************************
1465 *
1466 *  Media Ioctl callback
1467 *
1468 *  This routine is called whenever the user queries the status of
1469 *  the interface using ifconfig.
1470 *
1471 **********************************************************************/
1472static void
1473igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1474{
1475	struct adapter *adapter = ifp->if_softc;
1476	u_char fiber_type = IFM_1000_SX;
1477
1478	INIT_DEBUGOUT("igb_media_status: begin");
1479
1480	IGB_CORE_LOCK(adapter);
1481	igb_update_link_status(adapter);
1482
1483	ifmr->ifm_status = IFM_AVALID;
1484	ifmr->ifm_active = IFM_ETHER;
1485
1486	if (!adapter->link_active) {
1487		IGB_CORE_UNLOCK(adapter);
1488		return;
1489	}
1490
1491	ifmr->ifm_status |= IFM_ACTIVE;
1492
1493	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1494	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1495		ifmr->ifm_active |= fiber_type | IFM_FDX;
1496	else {
1497		switch (adapter->link_speed) {
1498		case 10:
1499			ifmr->ifm_active |= IFM_10_T;
1500			break;
1501		case 100:
1502			ifmr->ifm_active |= IFM_100_TX;
1503			break;
1504		case 1000:
1505			ifmr->ifm_active |= IFM_1000_T;
1506			break;
1507		}
1508		if (adapter->link_duplex == FULL_DUPLEX)
1509			ifmr->ifm_active |= IFM_FDX;
1510		else
1511			ifmr->ifm_active |= IFM_HDX;
1512	}
1513	IGB_CORE_UNLOCK(adapter);
1514}
1515
1516/*********************************************************************
1517 *
1518 *  Media Ioctl callback
1519 *
1520 *  This routine is called when the user changes speed/duplex using
1521 *  media/mediopt option with ifconfig.
1522 *
1523 **********************************************************************/
1524static int
1525igb_media_change(struct ifnet *ifp)
1526{
1527	struct adapter *adapter = ifp->if_softc;
1528	struct ifmedia  *ifm = &adapter->media;
1529
1530	INIT_DEBUGOUT("igb_media_change: begin");
1531
1532	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1533		return (EINVAL);
1534
1535	IGB_CORE_LOCK(adapter);
1536	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1537	case IFM_AUTO:
1538		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1539		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1540		break;
1541	case IFM_1000_LX:
1542	case IFM_1000_SX:
1543	case IFM_1000_T:
1544		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1545		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1546		break;
1547	case IFM_100_TX:
1548		adapter->hw.mac.autoneg = FALSE;
1549		adapter->hw.phy.autoneg_advertised = 0;
1550		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1551			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1552		else
1553			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1554		break;
1555	case IFM_10_T:
1556		adapter->hw.mac.autoneg = FALSE;
1557		adapter->hw.phy.autoneg_advertised = 0;
1558		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1559			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1560		else
1561			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1562		break;
1563	default:
1564		device_printf(adapter->dev, "Unsupported media type\n");
1565	}
1566
1567	/* As the speed/duplex settings my have changed we need to
1568	 * reset the PHY.
1569	 */
1570	adapter->hw.phy.reset_disable = FALSE;
1571
1572	igb_init_locked(adapter);
1573	IGB_CORE_UNLOCK(adapter);
1574
1575	return (0);
1576}
1577
1578
1579/*********************************************************************
1580 *
1581 *  This routine maps the mbufs to Advanced TX descriptors.
1582 *  used by the 82575 adapter.
1583 *
1584 **********************************************************************/
1585
1586static int
1587igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1588{
1589	struct adapter		*adapter = txr->adapter;
1590	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1591	bus_dmamap_t		map;
1592	struct igb_buffer	*tx_buffer, *tx_buffer_mapped;
1593	union e1000_adv_tx_desc	*txd = NULL;
1594	struct mbuf		*m_head;
1595	u32			olinfo_status = 0, cmd_type_len = 0;
1596	int			nsegs, i, j, error, first, last = 0;
1597	u32			hdrlen = 0, offload = 0;
1598
1599	m_head = *m_headp;
1600
1601
1602	/* Set basic descriptor constants */
1603	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1604	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1605	if (m_head->m_flags & M_VLANTAG)
1606		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1607
1608        /*
1609         * Force a cleanup if number of TX descriptors
1610         * available hits the threshold
1611         */
1612	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1613		igb_txeof(txr);
1614		/* Now do we at least have a minimal? */
1615		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1616			txr->no_desc_avail++;
1617			return (ENOBUFS);
1618		}
1619	}
1620
1621	/*
1622         * Map the packet for DMA.
1623	 *
1624	 * Capture the first descriptor index,
1625	 * this descriptor will have the index
1626	 * of the EOP which is the only one that
1627	 * now gets a DONE bit writeback.
1628	 */
1629	first = txr->next_avail_desc;
1630	tx_buffer = &txr->tx_buffers[first];
1631	tx_buffer_mapped = tx_buffer;
1632	map = tx_buffer->map;
1633
1634	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1635	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1636
1637	if (error == EFBIG) {
1638		struct mbuf *m;
1639
1640		m = m_defrag(*m_headp, M_DONTWAIT);
1641		if (m == NULL) {
1642			adapter->mbuf_alloc_failed++;
1643			m_freem(*m_headp);
1644			*m_headp = NULL;
1645			return (ENOBUFS);
1646		}
1647		*m_headp = m;
1648
1649		/* Try it again */
1650		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1651		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1652
1653		if (error == ENOMEM) {
1654			adapter->no_tx_dma_setup++;
1655			return (error);
1656		} else if (error != 0) {
1657			adapter->no_tx_dma_setup++;
1658			m_freem(*m_headp);
1659			*m_headp = NULL;
1660			return (error);
1661		}
1662	} else if (error == ENOMEM) {
1663		adapter->no_tx_dma_setup++;
1664		return (error);
1665	} else if (error != 0) {
1666		adapter->no_tx_dma_setup++;
1667		m_freem(*m_headp);
1668		*m_headp = NULL;
1669		return (error);
1670	}
1671
1672	/* Check again to be sure we have enough descriptors */
1673        if (nsegs > (txr->tx_avail - 2)) {
1674                txr->no_desc_avail++;
1675		bus_dmamap_unload(txr->txtag, map);
1676		return (ENOBUFS);
1677        }
1678	m_head = *m_headp;
1679
1680        /*
1681         * Set up the context descriptor:
1682         * used when any hardware offload is done.
1683	 * This includes CSUM, VLAN, and TSO. It
1684	 * will use the first descriptor.
1685         */
1686        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1687		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1688			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1689			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1690			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1691		} else
1692			return (ENXIO);
1693	} else
1694		/* Do all other context descriptor setup */
1695	offload = igb_tx_ctx_setup(txr, m_head);
1696	if (offload == TRUE)
1697		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1698#ifdef IGB_TIMESYNC
1699	if (offload == IGB_TIMESTAMP)
1700		cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
1701#endif
1702	/* Calculate payload length */
1703	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1704	    << E1000_ADVTXD_PAYLEN_SHIFT);
1705
1706	/* Set up our transmit descriptors */
1707	i = txr->next_avail_desc;
1708	for (j = 0; j < nsegs; j++) {
1709		bus_size_t seg_len;
1710		bus_addr_t seg_addr;
1711
1712		tx_buffer = &txr->tx_buffers[i];
1713		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1714		seg_addr = segs[j].ds_addr;
1715		seg_len  = segs[j].ds_len;
1716
1717		txd->read.buffer_addr = htole64(seg_addr);
1718		txd->read.cmd_type_len = htole32(
1719		    adapter->txd_cmd | cmd_type_len | seg_len);
1720		txd->read.olinfo_status = htole32(olinfo_status);
1721		last = i;
1722		if (++i == adapter->num_tx_desc)
1723			i = 0;
1724		tx_buffer->m_head = NULL;
1725		tx_buffer->next_eop = -1;
1726	}
1727
1728	txr->next_avail_desc = i;
1729	txr->tx_avail -= nsegs;
1730
1731        tx_buffer->m_head = m_head;
1732	tx_buffer_mapped->map = tx_buffer->map;
1733	tx_buffer->map = map;
1734        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1735
1736        /*
1737         * Last Descriptor of Packet
1738	 * needs End Of Packet (EOP)
1739	 * and Report Status (RS)
1740         */
1741        txd->read.cmd_type_len |=
1742	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1743	/*
1744	 * Keep track in the first buffer which
1745	 * descriptor will be written back
1746	 */
1747	tx_buffer = &txr->tx_buffers[first];
1748	tx_buffer->next_eop = last;
1749
1750	/*
1751	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1752	 * that this frame is available to transmit.
1753	 */
1754	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1755	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1756	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1757	++txr->tx_packets;
1758
1759	return (0);
1760
1761}
1762
1763static void
1764igb_set_promisc(struct adapter *adapter)
1765{
1766	struct ifnet	*ifp = adapter->ifp;
1767	uint32_t	reg_rctl;
1768
1769	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1770
1771	if (ifp->if_flags & IFF_PROMISC) {
1772		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1773		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1774	} else if (ifp->if_flags & IFF_ALLMULTI) {
1775		reg_rctl |= E1000_RCTL_MPE;
1776		reg_rctl &= ~E1000_RCTL_UPE;
1777		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1778	}
1779}
1780
1781static void
1782igb_disable_promisc(struct adapter *adapter)
1783{
1784	uint32_t	reg_rctl;
1785
1786	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1787
1788	reg_rctl &=  (~E1000_RCTL_UPE);
1789	reg_rctl &=  (~E1000_RCTL_MPE);
1790	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1791}
1792
1793
1794/*********************************************************************
1795 *  Multicast Update
1796 *
1797 *  This routine is called whenever multicast address list is updated.
1798 *
1799 **********************************************************************/
1800
1801static void
1802igb_set_multi(struct adapter *adapter)
1803{
1804	struct ifnet	*ifp = adapter->ifp;
1805	struct ifmultiaddr *ifma;
1806	u32 reg_rctl = 0;
1807	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1808
1809	int mcnt = 0;
1810
1811	IOCTL_DEBUGOUT("igb_set_multi: begin");
1812
1813	IF_ADDR_LOCK(ifp);
1814	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1815		if (ifma->ifma_addr->sa_family != AF_LINK)
1816			continue;
1817
1818		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1819			break;
1820
1821		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1822		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1823		mcnt++;
1824	}
1825	IF_ADDR_UNLOCK(ifp);
1826
1827	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1828		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1829		reg_rctl |= E1000_RCTL_MPE;
1830		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1831	} else
1832		e1000_update_mc_addr_list(&adapter->hw, mta,
1833		    mcnt, 1, adapter->hw.mac.rar_entry_count);
1834}
1835
1836
1837/*********************************************************************
1838 *  Timer routine
1839 *
1840 *  This routine checks for link status and updates statistics.
1841 *
1842 **********************************************************************/
1843
1844static void
1845igb_local_timer(void *arg)
1846{
1847	struct adapter	*adapter = arg;
1848	struct ifnet	*ifp = adapter->ifp;
1849
1850	IGB_CORE_LOCK_ASSERT(adapter);
1851
1852	igb_update_link_status(adapter);
1853	igb_update_stats_counters(adapter);
1854
1855	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1856		igb_print_hw_stats(adapter);
1857
1858	/*
1859	 * Each second we check the watchdog to
1860	 * protect against hardware hangs.
1861	 */
1862	igb_watchdog(adapter);
1863
1864	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1865
1866}
1867
1868static void
1869igb_update_link_status(struct adapter *adapter)
1870{
1871	struct e1000_hw *hw = &adapter->hw;
1872	struct ifnet *ifp = adapter->ifp;
1873	device_t dev = adapter->dev;
1874	struct tx_ring *txr = adapter->tx_rings;
1875	u32 link_check = 0;
1876
1877	/* Get the cached link value or read for real */
1878        switch (hw->phy.media_type) {
1879        case e1000_media_type_copper:
1880                if (hw->mac.get_link_status) {
1881			/* Do the work to read phy */
1882                        e1000_check_for_link(hw);
1883                        link_check = !hw->mac.get_link_status;
1884                } else
1885                        link_check = TRUE;
1886                break;
1887        case e1000_media_type_fiber:
1888                e1000_check_for_link(hw);
1889                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1890                                 E1000_STATUS_LU);
1891                break;
1892        case e1000_media_type_internal_serdes:
1893                e1000_check_for_link(hw);
1894                link_check = adapter->hw.mac.serdes_has_link;
1895                break;
1896        default:
1897        case e1000_media_type_unknown:
1898                break;
1899        }
1900
1901	/* Now we check if a transition has happened */
1902	if (link_check && (adapter->link_active == 0)) {
1903		e1000_get_speed_and_duplex(&adapter->hw,
1904		    &adapter->link_speed, &adapter->link_duplex);
1905		if (bootverbose)
1906			device_printf(dev, "Link is up %d Mbps %s\n",
1907			    adapter->link_speed,
1908			    ((adapter->link_duplex == FULL_DUPLEX) ?
1909			    "Full Duplex" : "Half Duplex"));
1910		adapter->link_active = 1;
1911		ifp->if_baudrate = adapter->link_speed * 1000000;
1912		if_link_state_change(ifp, LINK_STATE_UP);
1913	} else if (!link_check && (adapter->link_active == 1)) {
1914		ifp->if_baudrate = adapter->link_speed = 0;
1915		adapter->link_duplex = 0;
1916		if (bootverbose)
1917			device_printf(dev, "Link is Down\n");
1918		adapter->link_active = 0;
1919		if_link_state_change(ifp, LINK_STATE_DOWN);
1920		/* Turn off watchdogs */
1921		for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
1922			txr->watchdog_timer = FALSE;
1923	}
1924}
1925
1926/*********************************************************************
1927 *
1928 *  This routine disables all traffic on the adapter by issuing a
1929 *  global reset on the MAC and deallocates TX/RX buffers.
1930 *
1931 **********************************************************************/
1932
1933static void
1934igb_stop(void *arg)
1935{
1936	struct adapter	*adapter = arg;
1937	struct ifnet	*ifp = adapter->ifp;
1938
1939	IGB_CORE_LOCK_ASSERT(adapter);
1940
1941	INIT_DEBUGOUT("igb_stop: begin");
1942
1943	igb_disable_intr(adapter);
1944
1945	callout_stop(&adapter->timer);
1946
1947	/* Tell the stack that the interface is no longer active */
1948	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1949
1950#ifdef IGB_TIMESYNC
1951	/* Disable IEEE 1588 Time sync */
1952	if (adapter->hw.mac.type == e1000_82576)
1953		igb_tsync_disable(adapter);
1954#endif
1955
1956	e1000_reset_hw(&adapter->hw);
1957	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
1958}
1959
1960
1961/*********************************************************************
1962 *
1963 *  Determine hardware revision.
1964 *
1965 **********************************************************************/
1966static void
1967igb_identify_hardware(struct adapter *adapter)
1968{
1969	device_t dev = adapter->dev;
1970
1971	/* Make sure our PCI config space has the necessary stuff set */
1972	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
1973	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
1974	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
1975		device_printf(dev, "Memory Access and/or Bus Master bits "
1976		    "were not set!\n");
1977		adapter->hw.bus.pci_cmd_word |=
1978		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
1979		pci_write_config(dev, PCIR_COMMAND,
1980		    adapter->hw.bus.pci_cmd_word, 2);
1981	}
1982
1983	/* Save off the information about this board */
1984	adapter->hw.vendor_id = pci_get_vendor(dev);
1985	adapter->hw.device_id = pci_get_device(dev);
1986	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
1987	adapter->hw.subsystem_vendor_id =
1988	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
1989	adapter->hw.subsystem_device_id =
1990	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
1991
1992	/* Do Shared Code Init and Setup */
1993	if (e1000_set_mac_type(&adapter->hw)) {
1994		device_printf(dev, "Setup init failure\n");
1995		return;
1996	}
1997}
1998
1999static int
2000igb_allocate_pci_resources(struct adapter *adapter)
2001{
2002	device_t	dev = adapter->dev;
2003	int		rid, error = 0;
2004
2005	rid = PCIR_BAR(0);
2006	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2007	    &rid, RF_ACTIVE);
2008	if (adapter->pci_mem == NULL) {
2009		device_printf(dev, "Unable to allocate bus resource: memory\n");
2010		return (ENXIO);
2011	}
2012	adapter->osdep.mem_bus_space_tag =
2013	    rman_get_bustag(adapter->pci_mem);
2014	adapter->osdep.mem_bus_space_handle =
2015	    rman_get_bushandle(adapter->pci_mem);
2016	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2017
2018	/*
2019	** Init the resource arrays
2020	*/
2021	for (int i = 0; i < IGB_MSIX_VEC; i++) {
2022		adapter->rid[i] = i + 1; /* MSI/X RID starts at 1 */
2023		adapter->tag[i] = NULL;
2024		adapter->res[i] = NULL;
2025	}
2026
2027	adapter->num_tx_queues = 1; /* Defaults for Legacy or MSI */
2028	adapter->num_rx_queues = 1;
2029
2030	/* This will setup either MSI/X or MSI */
2031	adapter->msix = igb_setup_msix(adapter);
2032
2033	adapter->hw.back = &adapter->osdep;
2034
2035	return (error);
2036}
2037
2038/*********************************************************************
2039 *
2040 *  Setup the Legacy or MSI Interrupt handler
2041 *
2042 **********************************************************************/
2043static int
2044igb_allocate_legacy(struct adapter *adapter)
2045{
2046	device_t dev = adapter->dev;
2047	int error;
2048
2049	/* Turn off all interrupts */
2050	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2051
2052	/* Legacy RID at 0 */
2053	if (adapter->msix == 0)
2054		adapter->rid[0] = 0;
2055
2056	/* We allocate a single interrupt resource */
2057	adapter->res[0] = bus_alloc_resource_any(dev,
2058	    SYS_RES_IRQ, &adapter->rid[0], RF_SHAREABLE | RF_ACTIVE);
2059	if (adapter->res[0] == NULL) {
2060		device_printf(dev, "Unable to allocate bus resource: "
2061		    "interrupt\n");
2062		return (ENXIO);
2063	}
2064
2065	/*
2066	 * Try allocating a fast interrupt and the associated deferred
2067	 * processing contexts.
2068	 */
2069	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2070	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2071	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2072	    taskqueue_thread_enqueue, &adapter->tq);
2073	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2074	    device_get_nameunit(adapter->dev));
2075	if ((error = bus_setup_intr(dev, adapter->res[0],
2076	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, adapter,
2077	    &adapter->tag[0])) != 0) {
2078		device_printf(dev, "Failed to register fast interrupt "
2079			    "handler: %d\n", error);
2080		taskqueue_free(adapter->tq);
2081		adapter->tq = NULL;
2082		return (error);
2083	}
2084
2085	return (0);
2086}
2087
2088
2089/*********************************************************************
2090 *
2091 *  Setup the MSIX Interrupt handlers:
2092 *
2093 **********************************************************************/
2094static int
2095igb_allocate_msix(struct adapter *adapter)
2096{
2097	device_t dev = adapter->dev;
2098	struct tx_ring *txr = adapter->tx_rings;
2099	struct rx_ring *rxr = adapter->rx_rings;
2100	int error, vector = 0;
2101
2102	/*
2103	 * Setup the interrupt handlers
2104	 */
2105
2106	/* TX Setup */
2107	for (int i = 0; i < adapter->num_tx_queues; i++, vector++, txr++) {
2108		adapter->res[vector] = bus_alloc_resource_any(dev,
2109		    SYS_RES_IRQ, &adapter->rid[vector],
2110		    RF_SHAREABLE | RF_ACTIVE);
2111		if (adapter->res[vector] == NULL) {
2112			device_printf(dev,
2113			    "Unable to allocate bus resource: "
2114			    "MSIX TX Interrupt\n");
2115			return (ENXIO);
2116		}
2117		error = bus_setup_intr(dev, adapter->res[vector],
2118	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_tx,
2119		    txr, &adapter->tag[vector]);
2120		if (error) {
2121			adapter->res[vector] = NULL;
2122			device_printf(dev, "Failed to register TX handler");
2123			return (error);
2124		}
2125		/* Make tasklet for deferred handling - one per queue */
2126		TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2127		if (adapter->hw.mac.type == e1000_82575) {
2128			txr->eims = E1000_EICR_TX_QUEUE0 << i;
2129			/* MSIXBM registers start at 0 */
2130			txr->msix = adapter->rid[vector] - 1;
2131		} else {
2132			txr->eims = 1 << vector;
2133			txr->msix = vector;
2134		}
2135	}
2136
2137	/* RX Setup */
2138	for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rxr++) {
2139		adapter->res[vector] = bus_alloc_resource_any(dev,
2140		    SYS_RES_IRQ, &adapter->rid[vector],
2141		    RF_SHAREABLE | RF_ACTIVE);
2142		if (adapter->res[vector] == NULL) {
2143			device_printf(dev,
2144			    "Unable to allocate bus resource: "
2145			    "MSIX RX Interrupt\n");
2146			return (ENXIO);
2147		}
2148		error = bus_setup_intr(dev, adapter->res[vector],
2149	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_rx,
2150		    rxr, &adapter->tag[vector]);
2151		if (error) {
2152			adapter->res[vector] = NULL;
2153			device_printf(dev, "Failed to register RX handler");
2154			return (error);
2155		}
2156		TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2157		if (adapter->hw.mac.type == e1000_82575) {
2158			rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2159			rxr->msix = adapter->rid[vector] - 1;
2160		} else {
2161			rxr->eims = 1 << vector;
2162			rxr->msix = vector;
2163		}
2164	}
2165
2166	/* And Link */
2167	adapter->res[vector] = bus_alloc_resource_any(dev,
2168	    SYS_RES_IRQ, &adapter->rid[vector],
2169		    RF_SHAREABLE | RF_ACTIVE);
2170	if (adapter->res[vector] == NULL) {
2171		device_printf(dev,
2172		    "Unable to allocate bus resource: "
2173		    "MSIX Link Interrupt\n");
2174		return (ENXIO);
2175	}
2176	if ((error = bus_setup_intr(dev, adapter->res[vector],
2177	    INTR_TYPE_NET | INTR_MPSAFE, NULL, igb_msix_link,
2178	    adapter, &adapter->tag[vector])) != 0) {
2179		device_printf(dev, "Failed to register Link handler");
2180		return (error);
2181	}
2182	if (adapter->hw.mac.type == e1000_82575)
2183		adapter->linkvec = adapter->rid[vector] - 1;
2184	else
2185		adapter->linkvec = vector;
2186
2187	/* Make tasklet for deferred link interrupt handling */
2188	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2189
2190	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2191	    taskqueue_thread_enqueue, &adapter->tq);
2192	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2193	    device_get_nameunit(adapter->dev));
2194
2195	return (0);
2196}
2197
2198static void
2199igb_configure_queues(struct adapter *adapter)
2200{
2201	struct	e1000_hw *hw = &adapter->hw;
2202	struct	tx_ring	*txr;
2203	struct	rx_ring	*rxr;
2204
2205	/* Turn on MSIX */
2206	/*
2207	** 82576 uses IVARs to route MSI/X
2208	** interrupts, its not very intuitive,
2209	** study the code carefully :)
2210	*/
2211	if (adapter->hw.mac.type == e1000_82576) {
2212		u32	ivar = 0;
2213		/* First turn on the capability */
2214		E1000_WRITE_REG(hw, E1000_GPIE,
2215		    E1000_GPIE_MSIX_MODE |
2216		    E1000_GPIE_EIAME |
2217		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2218		/* RX */
2219		for (int i = 0; i < adapter->num_rx_queues; i++) {
2220			u32 index = i & 0x7; /* Each IVAR has two entries */
2221			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2222			rxr = &adapter->rx_rings[i];
2223			if (i < 8) {
2224				ivar &= 0xFFFFFF00;
2225				ivar |= rxr->msix | E1000_IVAR_VALID;
2226			} else {
2227				ivar &= 0xFF00FFFF;
2228				ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2229			}
2230			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2231			adapter->eims_mask |= rxr->eims;
2232		}
2233		/* TX */
2234		for (int i = 0; i < adapter->num_tx_queues; i++) {
2235			u32 index = i & 0x7; /* Each IVAR has two entries */
2236			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2237			txr = &adapter->tx_rings[i];
2238			if (i < 8) {
2239				ivar &= 0xFFFF00FF;
2240				ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2241			} else {
2242				ivar &= 0x00FFFFFF;
2243				ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2244			}
2245			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2246			adapter->eims_mask |= txr->eims;
2247		}
2248
2249		/* And for the link interrupt */
2250		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2251		adapter->link_mask = 1 << adapter->linkvec;
2252		adapter->eims_mask |= adapter->link_mask;
2253		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2254	} else
2255	{ /* 82575 */
2256		int tmp;
2257
2258                /* enable MSI-X PBA support*/
2259		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2260                tmp |= E1000_CTRL_EXT_PBA_CLR;
2261                /* Auto-Mask interrupts upon ICR read. */
2262                tmp |= E1000_CTRL_EXT_EIAME;
2263                tmp |= E1000_CTRL_EXT_IRCA;
2264                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2265
2266		/* TX */
2267		for (int i = 0; i < adapter->num_tx_queues; i++) {
2268			txr = &adapter->tx_rings[i];
2269			E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2270			    txr->eims);
2271			adapter->eims_mask |= txr->eims;
2272		}
2273
2274		/* RX */
2275		for (int i = 0; i < adapter->num_rx_queues; i++) {
2276			rxr = &adapter->rx_rings[i];
2277			E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2278			    rxr->eims);
2279			adapter->eims_mask |= rxr->eims;
2280		}
2281
2282		/* Link */
2283		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2284		    E1000_EIMS_OTHER);
2285		adapter->link_mask |= E1000_EIMS_OTHER;
2286		adapter->eims_mask |= adapter->link_mask;
2287	}
2288	return;
2289}
2290
2291
2292static void
2293igb_free_pci_resources(struct adapter *adapter)
2294{
2295	device_t dev = adapter->dev;
2296
2297	/* Make sure the for loop below runs once */
2298	if (adapter->msix == 0)
2299		adapter->msix = 1;
2300
2301	/*
2302	 * First release all the interrupt resources:
2303	 *      notice that since these are just kept
2304	 *      in an array we can do the same logic
2305	 *      whether its MSIX or just legacy.
2306	 */
2307	for (int i = 0; i < adapter->msix; i++) {
2308		if (adapter->tag[i] != NULL) {
2309			bus_teardown_intr(dev, adapter->res[i],
2310			    adapter->tag[i]);
2311			adapter->tag[i] = NULL;
2312		}
2313		if (adapter->res[i] != NULL) {
2314			bus_release_resource(dev, SYS_RES_IRQ,
2315			    adapter->rid[i], adapter->res[i]);
2316		}
2317	}
2318
2319	if (adapter->msix)
2320		pci_release_msi(dev);
2321
2322	if (adapter->msix_mem != NULL)
2323		bus_release_resource(dev, SYS_RES_MEMORY,
2324		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2325
2326	if (adapter->pci_mem != NULL)
2327		bus_release_resource(dev, SYS_RES_MEMORY,
2328		    PCIR_BAR(0), adapter->pci_mem);
2329
2330}
2331
2332/*
2333 * Setup Either MSI/X or MSI
2334 */
2335static int
2336igb_setup_msix(struct adapter *adapter)
2337{
2338	device_t dev = adapter->dev;
2339	int rid, want, queues, msgs;
2340
2341	/* First try MSI/X */
2342	rid = PCIR_BAR(IGB_MSIX_BAR);
2343	adapter->msix_mem = bus_alloc_resource_any(dev,
2344	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2345       	if (!adapter->msix_mem) {
2346		/* May not be enabled */
2347		device_printf(adapter->dev,
2348		    "Unable to map MSIX table \n");
2349		goto msi;
2350	}
2351
2352	msgs = pci_msix_count(dev);
2353	if (msgs == 0) { /* system has msix disabled */
2354		bus_release_resource(dev, SYS_RES_MEMORY,
2355		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2356		adapter->msix_mem = NULL;
2357		goto msi;
2358	}
2359
2360	/* Limit by the number set in header */
2361	if (msgs > IGB_MSIX_VEC)
2362		msgs = IGB_MSIX_VEC;
2363
2364	/* Figure out a reasonable auto config value */
2365	queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2366
2367	if (igb_tx_queues == 0)
2368		igb_tx_queues = queues;
2369	if (igb_rx_queues == 0)
2370		igb_rx_queues = queues;
2371	want = igb_tx_queues + igb_rx_queues + 1;
2372	if (msgs >= want)
2373		msgs = want;
2374	else {
2375               	device_printf(adapter->dev,
2376		    "MSIX Configuration Problem, "
2377		    "%d vectors configured, but %d queues wanted!\n",
2378		    msgs, want);
2379		return (ENXIO);
2380	}
2381	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2382               	device_printf(adapter->dev,
2383		    "Using MSIX interrupts with %d vectors\n", msgs);
2384		adapter->num_tx_queues = igb_tx_queues;
2385		adapter->num_rx_queues = igb_rx_queues;
2386		return (msgs);
2387	}
2388msi:
2389       	msgs = pci_msi_count(dev);
2390       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2391               	device_printf(adapter->dev,"Using MSI interrupt\n");
2392	return (msgs);
2393}
2394
2395/*********************************************************************
2396 *
2397 *  Initialize the hardware to a configuration
2398 *  as specified by the adapter structure.
2399 *
2400 **********************************************************************/
2401static int
2402igb_hardware_init(struct adapter *adapter)
2403{
2404	device_t	dev = adapter->dev;
2405	u32		rx_buffer_size;
2406
2407	INIT_DEBUGOUT("igb_hardware_init: begin");
2408
2409	/* Issue a global reset */
2410	e1000_reset_hw(&adapter->hw);
2411
2412	/* Let the firmware know the OS is in control */
2413	igb_get_hw_control(adapter);
2414
2415	/*
2416	 * These parameters control the automatic generation (Tx) and
2417	 * response (Rx) to Ethernet PAUSE frames.
2418	 * - High water mark should allow for at least two frames to be
2419	 *   received after sending an XOFF.
2420	 * - Low water mark works best when it is very near the high water mark.
2421	 *   This allows the receiver to restart by sending XON when it has
2422	 *   drained a bit. Here we use an arbitary value of 1500 which will
2423	 *   restart after one full frame is pulled from the buffer. There
2424	 *   could be several smaller frames in the buffer and if so they will
2425	 *   not trigger the XON until their total number reduces the buffer
2426	 *   by 1500.
2427	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2428	 */
2429	if (adapter->hw.mac.type == e1000_82576)
2430		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2431		    E1000_RXPBS) & 0xffff) << 10 );
2432	else
2433		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2434		    E1000_PBA) & 0xffff) << 10 );
2435
2436	adapter->hw.fc.high_water = rx_buffer_size -
2437	    roundup2(adapter->max_frame_size, 1024);
2438	adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2439
2440	adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2441	adapter->hw.fc.send_xon = TRUE;
2442
2443	/* Set Flow control, use the tunable location if sane */
2444	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2445		adapter->hw.fc.requested_mode = igb_fc_setting;
2446	else
2447		adapter->hw.fc.requested_mode = e1000_fc_none;
2448
2449	if (e1000_init_hw(&adapter->hw) < 0) {
2450		device_printf(dev, "Hardware Initialization Failed\n");
2451		return (EIO);
2452	}
2453
2454	e1000_check_for_link(&adapter->hw);
2455
2456	return (0);
2457}
2458
2459/*********************************************************************
2460 *
2461 *  Setup networking device structure and register an interface.
2462 *
2463 **********************************************************************/
2464static void
2465igb_setup_interface(device_t dev, struct adapter *adapter)
2466{
2467	struct ifnet   *ifp;
2468
2469	INIT_DEBUGOUT("igb_setup_interface: begin");
2470
2471	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2472	if (ifp == NULL)
2473		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2474	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2475	ifp->if_mtu = ETHERMTU;
2476	ifp->if_init =  igb_init;
2477	ifp->if_softc = adapter;
2478	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2479	ifp->if_ioctl = igb_ioctl;
2480	ifp->if_start = igb_start;
2481	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2482	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2483	IFQ_SET_READY(&ifp->if_snd);
2484
2485	ether_ifattach(ifp, adapter->hw.mac.addr);
2486
2487	ifp->if_capabilities = ifp->if_capenable = 0;
2488
2489	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2490	ifp->if_capabilities |= IFCAP_TSO4;
2491	ifp->if_capenable = ifp->if_capabilities;
2492
2493	/*
2494	 * Tell the upper layer(s) what we support.
2495	 */
2496	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2497	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER;
2498	ifp->if_capabilities |= IFCAP_VLAN_MTU;
2499	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER;
2500	ifp->if_capenable |= IFCAP_VLAN_MTU;
2501
2502	/*
2503	 * Specify the media types supported by this adapter and register
2504	 * callbacks to update media and link information
2505	 */
2506	ifmedia_init(&adapter->media, IFM_IMASK,
2507	    igb_media_change, igb_media_status);
2508	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2509	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2510		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2511			    0, NULL);
2512		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2513	} else {
2514		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2515		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2516			    0, NULL);
2517		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2518			    0, NULL);
2519		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2520			    0, NULL);
2521		if (adapter->hw.phy.type != e1000_phy_ife) {
2522			ifmedia_add(&adapter->media,
2523				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2524			ifmedia_add(&adapter->media,
2525				IFM_ETHER | IFM_1000_T, 0, NULL);
2526		}
2527	}
2528	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2529	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2530}
2531
2532
2533/*
2534 * Manage DMA'able memory.
2535 */
2536static void
2537igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2538{
2539	if (error)
2540		return;
2541	*(bus_addr_t *) arg = segs[0].ds_addr;
2542}
2543
2544static int
2545igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2546        struct igb_dma_alloc *dma, int mapflags)
2547{
2548	int error;
2549
2550	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2551				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2552				BUS_SPACE_MAXADDR,	/* lowaddr */
2553				BUS_SPACE_MAXADDR,	/* highaddr */
2554				NULL, NULL,		/* filter, filterarg */
2555				size,			/* maxsize */
2556				1,			/* nsegments */
2557				size,			/* maxsegsize */
2558				0,			/* flags */
2559				NULL,			/* lockfunc */
2560				NULL,			/* lockarg */
2561				&dma->dma_tag);
2562	if (error) {
2563		device_printf(adapter->dev,
2564		    "%s: bus_dma_tag_create failed: %d\n",
2565		    __func__, error);
2566		goto fail_0;
2567	}
2568
2569	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2570	    BUS_DMA_NOWAIT, &dma->dma_map);
2571	if (error) {
2572		device_printf(adapter->dev,
2573		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2574		    __func__, (uintmax_t)size, error);
2575		goto fail_2;
2576	}
2577
2578	dma->dma_paddr = 0;
2579	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2580	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2581	if (error || dma->dma_paddr == 0) {
2582		device_printf(adapter->dev,
2583		    "%s: bus_dmamap_load failed: %d\n",
2584		    __func__, error);
2585		goto fail_3;
2586	}
2587
2588	return (0);
2589
2590fail_3:
2591	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2592fail_2:
2593	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2594	bus_dma_tag_destroy(dma->dma_tag);
2595fail_0:
2596	dma->dma_map = NULL;
2597	dma->dma_tag = NULL;
2598
2599	return (error);
2600}
2601
2602static void
2603igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2604{
2605	if (dma->dma_tag == NULL)
2606		return;
2607	if (dma->dma_map != NULL) {
2608		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2609		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2610		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2611		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2612		dma->dma_map = NULL;
2613	}
2614	bus_dma_tag_destroy(dma->dma_tag);
2615	dma->dma_tag = NULL;
2616}
2617
2618
2619/*********************************************************************
2620 *
2621 *  Allocate memory for the transmit and receive rings, and then
2622 *  the descriptors associated with each, called only once at attach.
2623 *
2624 **********************************************************************/
2625static int
2626igb_allocate_queues(struct adapter *adapter)
2627{
2628	device_t dev = adapter->dev;
2629	struct tx_ring *txr;
2630	struct rx_ring *rxr;
2631	int rsize, tsize, error = E1000_SUCCESS;
2632	int txconf = 0, rxconf = 0;
2633
2634	/* First allocate the TX ring struct memory */
2635	if (!(adapter->tx_rings =
2636	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2637	    adapter->num_tx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2638		device_printf(dev, "Unable to allocate TX ring memory\n");
2639		error = ENOMEM;
2640		goto fail;
2641	}
2642	txr = adapter->tx_rings;
2643
2644	/* Next allocate the RX */
2645	if (!(adapter->rx_rings =
2646	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2647	    adapter->num_rx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2648		device_printf(dev, "Unable to allocate RX ring memory\n");
2649		error = ENOMEM;
2650		goto rx_fail;
2651	}
2652	rxr = adapter->rx_rings;
2653
2654	tsize = roundup2(adapter->num_tx_desc *
2655	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2656	/*
2657	 * Now set up the TX queues, txconf is needed to handle the
2658	 * possibility that things fail midcourse and we need to
2659	 * undo memory gracefully
2660	 */
2661	for (int i = 0; i < adapter->num_tx_queues; i++, txconf++) {
2662		/* Set up some basics */
2663		txr = &adapter->tx_rings[i];
2664		txr->adapter = adapter;
2665		txr->me = i;
2666
2667		/* Initialize the TX lock */
2668		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2669		    device_get_nameunit(dev), txr->me);
2670		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2671
2672		if (igb_dma_malloc(adapter, tsize,
2673			&txr->txdma, BUS_DMA_NOWAIT)) {
2674			device_printf(dev,
2675			    "Unable to allocate TX Descriptor memory\n");
2676			error = ENOMEM;
2677			goto err_tx_desc;
2678		}
2679		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2680		bzero((void *)txr->tx_base, tsize);
2681
2682        	/* Now allocate transmit buffers for the ring */
2683        	if (igb_allocate_transmit_buffers(txr)) {
2684			device_printf(dev,
2685			    "Critical Failure setting up transmit buffers\n");
2686			error = ENOMEM;
2687			goto err_tx_desc;
2688        	}
2689
2690	}
2691
2692	/*
2693	 * Next the RX queues...
2694	 */
2695	rsize = roundup2(adapter->num_rx_desc *
2696	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2697	for (int i = 0; i < adapter->num_rx_queues; i++, rxconf++) {
2698		rxr = &adapter->rx_rings[i];
2699		rxr->adapter = adapter;
2700		rxr->me = i;
2701
2702		/* Initialize the RX lock */
2703		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2704		    device_get_nameunit(dev), txr->me);
2705		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2706
2707		if (igb_dma_malloc(adapter, rsize,
2708			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2709			device_printf(dev,
2710			    "Unable to allocate RxDescriptor memory\n");
2711			error = ENOMEM;
2712			goto err_rx_desc;
2713		}
2714		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2715		bzero((void *)rxr->rx_base, rsize);
2716
2717        	/* Allocate receive buffers for the ring*/
2718		if (igb_allocate_receive_buffers(rxr)) {
2719			device_printf(dev,
2720			    "Critical Failure setting up receive buffers\n");
2721			error = ENOMEM;
2722			goto err_rx_desc;
2723		}
2724	}
2725
2726	return (0);
2727
2728err_rx_desc:
2729	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2730		igb_dma_free(adapter, &rxr->rxdma);
2731err_tx_desc:
2732	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2733		igb_dma_free(adapter, &txr->txdma);
2734	free(adapter->rx_rings, M_DEVBUF);
2735rx_fail:
2736	free(adapter->tx_rings, M_DEVBUF);
2737fail:
2738	return (error);
2739}
2740
2741/*********************************************************************
2742 *
2743 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2744 *  the information needed to transmit a packet on the wire. This is
2745 *  called only once at attach, setup is done every reset.
2746 *
2747 **********************************************************************/
2748static int
2749igb_allocate_transmit_buffers(struct tx_ring *txr)
2750{
2751	struct adapter *adapter = txr->adapter;
2752	device_t dev = adapter->dev;
2753	struct igb_buffer *txbuf;
2754	int error, i;
2755
2756	/*
2757	 * Setup DMA descriptor areas.
2758	 */
2759	if ((error = bus_dma_tag_create(NULL,		/* parent */
2760			       PAGE_SIZE, 0,		/* alignment, bounds */
2761			       BUS_SPACE_MAXADDR,	/* lowaddr */
2762			       BUS_SPACE_MAXADDR,	/* highaddr */
2763			       NULL, NULL,		/* filter, filterarg */
2764			       IGB_TSO_SIZE,		/* maxsize */
2765			       IGB_MAX_SCATTER,		/* nsegments */
2766			       PAGE_SIZE,		/* maxsegsize */
2767			       0,			/* flags */
2768			       NULL,			/* lockfunc */
2769			       NULL,			/* lockfuncarg */
2770			       &txr->txtag))) {
2771		device_printf(dev,"Unable to allocate TX DMA tag\n");
2772		goto fail;
2773	}
2774
2775	if (!(txr->tx_buffers =
2776	    (struct igb_buffer *) malloc(sizeof(struct igb_buffer) *
2777	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2778		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2779		error = ENOMEM;
2780		goto fail;
2781	}
2782
2783        /* Create the descriptor buffer dma maps */
2784	txbuf = txr->tx_buffers;
2785	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2786		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2787		if (error != 0) {
2788			device_printf(dev, "Unable to create TX DMA map\n");
2789			goto fail;
2790		}
2791	}
2792
2793	return 0;
2794fail:
2795	/* We free all, it handles case where we are in the middle */
2796	igb_free_transmit_structures(adapter);
2797	return (error);
2798}
2799
2800/*********************************************************************
2801 *
2802 *  Initialize a transmit ring.
2803 *
2804 **********************************************************************/
2805static void
2806igb_setup_transmit_ring(struct tx_ring *txr)
2807{
2808	struct adapter *adapter = txr->adapter;
2809	struct igb_buffer *txbuf;
2810	int i;
2811
2812	/* Clear the old ring contents */
2813	bzero((void *)txr->tx_base,
2814	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2815	/* Reset indices */
2816	txr->next_avail_desc = 0;
2817	txr->next_to_clean = 0;
2818
2819	/* Free any existing tx buffers. */
2820        txbuf = txr->tx_buffers;
2821	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2822		if (txbuf->m_head != NULL) {
2823			bus_dmamap_sync(txr->txtag, txbuf->map,
2824			    BUS_DMASYNC_POSTWRITE);
2825			bus_dmamap_unload(txr->txtag, txbuf->map);
2826			m_freem(txbuf->m_head);
2827			txbuf->m_head = NULL;
2828		}
2829		/* clear the watch index */
2830		txbuf->next_eop = -1;
2831        }
2832
2833	/* Set number of descriptors available */
2834	txr->tx_avail = adapter->num_tx_desc;
2835
2836	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2837	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2838
2839}
2840
2841/*********************************************************************
2842 *
2843 *  Initialize all transmit rings.
2844 *
2845 **********************************************************************/
2846static void
2847igb_setup_transmit_structures(struct adapter *adapter)
2848{
2849	struct tx_ring *txr = adapter->tx_rings;
2850
2851	for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
2852		igb_setup_transmit_ring(txr);
2853
2854	return;
2855}
2856
2857/*********************************************************************
2858 *
2859 *  Enable transmit unit.
2860 *
2861 **********************************************************************/
2862static void
2863igb_initialize_transmit_units(struct adapter *adapter)
2864{
2865	struct tx_ring	*txr = adapter->tx_rings;
2866	u32		tctl, txdctl;
2867
2868	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2869
2870	/* Setup the Base and Length of the Tx Descriptor Rings */
2871	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2872		u64 bus_addr = txr->txdma.dma_paddr;
2873
2874		E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2875		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2876		E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2877		    (uint32_t)(bus_addr >> 32));
2878		E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2879		    (uint32_t)bus_addr);
2880
2881		/* Setup the HW Tx Head and Tail descriptor pointers */
2882		E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2883		E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2884
2885		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2886		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2887		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2888
2889		/* Setup Transmit Descriptor Base Settings */
2890		adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2891
2892		txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
2893		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2894		E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
2895	}
2896
2897	/* Program the Transmit Control Register */
2898	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2899	tctl &= ~E1000_TCTL_CT;
2900	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2901		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2902
2903	e1000_config_collision_dist(&adapter->hw);
2904
2905	/* This write will effectively turn on the transmit unit. */
2906	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2907
2908}
2909
2910/*********************************************************************
2911 *
2912 *  Free all transmit rings.
2913 *
2914 **********************************************************************/
2915static void
2916igb_free_transmit_structures(struct adapter *adapter)
2917{
2918	struct tx_ring *txr = adapter->tx_rings;
2919
2920	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2921		IGB_TX_LOCK(txr);
2922		igb_free_transmit_buffers(txr);
2923		igb_dma_free(adapter, &txr->txdma);
2924		IGB_TX_UNLOCK(txr);
2925		IGB_TX_LOCK_DESTROY(txr);
2926	}
2927	free(adapter->tx_rings, M_DEVBUF);
2928}
2929
2930/*********************************************************************
2931 *
2932 *  Free transmit ring related data structures.
2933 *
2934 **********************************************************************/
2935static void
2936igb_free_transmit_buffers(struct tx_ring *txr)
2937{
2938	struct adapter *adapter = txr->adapter;
2939	struct igb_buffer *tx_buffer;
2940	int             i;
2941
2942	INIT_DEBUGOUT("free_transmit_ring: begin");
2943
2944	if (txr->tx_buffers == NULL)
2945		return;
2946
2947	tx_buffer = txr->tx_buffers;
2948	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2949		if (tx_buffer->m_head != NULL) {
2950			bus_dmamap_sync(txr->txtag, tx_buffer->map,
2951			    BUS_DMASYNC_POSTWRITE);
2952			bus_dmamap_unload(txr->txtag,
2953			    tx_buffer->map);
2954			m_freem(tx_buffer->m_head);
2955			tx_buffer->m_head = NULL;
2956			if (tx_buffer->map != NULL) {
2957				bus_dmamap_destroy(txr->txtag,
2958				    tx_buffer->map);
2959				tx_buffer->map = NULL;
2960			}
2961		} else if (tx_buffer->map != NULL) {
2962			bus_dmamap_unload(txr->txtag,
2963			    tx_buffer->map);
2964			bus_dmamap_destroy(txr->txtag,
2965			    tx_buffer->map);
2966			tx_buffer->map = NULL;
2967		}
2968	}
2969
2970	if (txr->tx_buffers != NULL) {
2971		free(txr->tx_buffers, M_DEVBUF);
2972		txr->tx_buffers = NULL;
2973	}
2974	if (txr->txtag != NULL) {
2975		bus_dma_tag_destroy(txr->txtag);
2976		txr->txtag = NULL;
2977	}
2978	return;
2979}
2980
2981/**********************************************************************
2982 *
2983 *  Setup work for hardware segmentation offload (TSO) on
2984 *  adapters using advanced tx descriptors (82575)
2985 *
2986 **********************************************************************/
2987static boolean_t
2988igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
2989{
2990	struct adapter *adapter = txr->adapter;
2991	struct e1000_adv_tx_context_desc *TXD;
2992	struct igb_buffer        *tx_buffer;
2993	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
2994	u32 mss_l4len_idx = 0;
2995	u16 vtag = 0;
2996	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
2997	struct ether_vlan_header *eh;
2998	struct ip *ip;
2999	struct tcphdr *th;
3000
3001
3002	/*
3003	 * Determine where frame payload starts.
3004	 * Jump over vlan headers if already present
3005	 */
3006	eh = mtod(mp, struct ether_vlan_header *);
3007	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3008		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3009	else
3010		ehdrlen = ETHER_HDR_LEN;
3011
3012	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3013	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3014		return FALSE;
3015
3016	/* Only supports IPV4 for now */
3017	ctxd = txr->next_avail_desc;
3018	tx_buffer = &txr->tx_buffers[ctxd];
3019	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3020
3021	ip = (struct ip *)(mp->m_data + ehdrlen);
3022	if (ip->ip_p != IPPROTO_TCP)
3023                return FALSE;   /* 0 */
3024	ip->ip_sum = 0;
3025	ip_hlen = ip->ip_hl << 2;
3026	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3027	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3028	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3029	tcp_hlen = th->th_off << 2;
3030	/*
3031	 * Calculate header length, this is used
3032	 * in the transmit desc in igb_xmit
3033	 */
3034	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3035
3036	/* VLAN MACLEN IPLEN */
3037	if (mp->m_flags & M_VLANTAG) {
3038		vtag = htole16(mp->m_pkthdr.ether_vtag);
3039		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3040	}
3041
3042	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3043	vlan_macip_lens |= ip_hlen;
3044	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3045
3046	/* ADV DTYPE TUCMD */
3047	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3048	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3049	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3050	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3051
3052	/* MSS L4LEN IDX */
3053	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3054	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3055	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3056
3057	TXD->seqnum_seed = htole32(0);
3058	tx_buffer->m_head = NULL;
3059	tx_buffer->next_eop = -1;
3060
3061	if (++ctxd == adapter->num_tx_desc)
3062		ctxd = 0;
3063
3064	txr->tx_avail--;
3065	txr->next_avail_desc = ctxd;
3066	return TRUE;
3067}
3068
3069
3070/*********************************************************************
3071 *
3072 *  Context Descriptor setup for VLAN or CSUM
3073 *
3074 **********************************************************************/
3075
3076static int
3077igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3078{
3079	struct adapter *adapter = txr->adapter;
3080	struct e1000_adv_tx_context_desc *TXD;
3081	struct igb_buffer        *tx_buffer;
3082	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3083	struct ether_vlan_header *eh;
3084	struct ip *ip = NULL;
3085	struct ip6_hdr *ip6;
3086	int  ehdrlen, ip_hlen = 0;
3087	u16	etype;
3088	u8	ipproto = 0;
3089	bool	offload = TRUE;
3090	u16 vtag = 0;
3091
3092	int ctxd = txr->next_avail_desc;
3093	tx_buffer = &txr->tx_buffers[ctxd];
3094	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3095
3096	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3097		offload = FALSE; /* Only here to handle VLANs */
3098	/*
3099	** In advanced descriptors the vlan tag must
3100	** be placed into the descriptor itself.
3101	*/
3102	if (mp->m_flags & M_VLANTAG) {
3103		vtag = htole16(mp->m_pkthdr.ether_vtag);
3104		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3105	} else if (offload == FALSE)
3106		return FALSE;
3107	/*
3108	 * Determine where frame payload starts.
3109	 * Jump over vlan headers if already present,
3110	 * helpful for QinQ too.
3111	 */
3112	eh = mtod(mp, struct ether_vlan_header *);
3113	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3114		etype = ntohs(eh->evl_proto);
3115		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3116	} else {
3117		etype = ntohs(eh->evl_encap_proto);
3118		ehdrlen = ETHER_HDR_LEN;
3119	}
3120
3121	/* Set the ether header length */
3122	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3123
3124	switch (etype) {
3125		case ETHERTYPE_IP:
3126			ip = (struct ip *)(mp->m_data + ehdrlen);
3127			ip_hlen = ip->ip_hl << 2;
3128			if (mp->m_len < ehdrlen + ip_hlen) {
3129				offload = FALSE;
3130				break;
3131			}
3132			ipproto = ip->ip_p;
3133			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3134			break;
3135		case ETHERTYPE_IPV6:
3136			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3137			ip_hlen = sizeof(struct ip6_hdr);
3138			if (mp->m_len < ehdrlen + ip_hlen)
3139				return FALSE; /* failure */
3140			ipproto = ip6->ip6_nxt;
3141			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3142			break;
3143#ifdef IGB_TIMESYNC
3144		case ETHERTYPE_IEEE1588:
3145			offload = IGB_TIMESTAMP;
3146			break;
3147#endif
3148		default:
3149			offload = FALSE;
3150			break;
3151	}
3152
3153	vlan_macip_lens |= ip_hlen;
3154	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3155
3156	switch (ipproto) {
3157		case IPPROTO_TCP:
3158			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3159				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3160			break;
3161		case IPPROTO_UDP:
3162		{
3163#ifdef IGB_TIMESYNC
3164			void *hdr = (caddr_t) ip + ip_hlen;
3165			struct udphdr *uh = (struct udphdr *)hdr;
3166
3167			if (uh->uh_dport == htons(TSYNC_PORT))
3168				offload = IGB_TIMESTAMP;
3169#endif
3170			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3171				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3172			break;
3173		}
3174		default:
3175			offload = FALSE;
3176			break;
3177	}
3178
3179	/* Now copy bits into descriptor */
3180	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3181	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3182	TXD->seqnum_seed = htole32(0);
3183	TXD->mss_l4len_idx = htole32(0);
3184
3185	tx_buffer->m_head = NULL;
3186	tx_buffer->next_eop = -1;
3187
3188	/* We've consumed the first desc, adjust counters */
3189	if (++ctxd == adapter->num_tx_desc)
3190		ctxd = 0;
3191	txr->next_avail_desc = ctxd;
3192	--txr->tx_avail;
3193
3194        return (offload);
3195}
3196
3197
3198/**********************************************************************
3199 *
3200 *  Examine each tx_buffer in the used queue. If the hardware is done
3201 *  processing the packet then free associated resources. The
3202 *  tx_buffer is put back on the free queue.
3203 *
3204 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3205 **********************************************************************/
3206static bool
3207igb_txeof(struct tx_ring *txr)
3208{
3209	struct adapter	*adapter = txr->adapter;
3210        int first, last, done, num_avail;
3211        struct igb_buffer *tx_buffer;
3212        struct e1000_tx_desc   *tx_desc, *eop_desc;
3213	struct ifnet   *ifp = adapter->ifp;
3214
3215	IGB_TX_LOCK_ASSERT(txr);
3216
3217        if (txr->tx_avail == adapter->num_tx_desc)
3218                return FALSE;
3219
3220        num_avail = txr->tx_avail;
3221        first = txr->next_to_clean;
3222        tx_desc = &txr->tx_base[first];
3223        tx_buffer = &txr->tx_buffers[first];
3224	last = tx_buffer->next_eop;
3225        eop_desc = &txr->tx_base[last];
3226
3227	/*
3228	 * What this does is get the index of the
3229	 * first descriptor AFTER the EOP of the
3230	 * first packet, that way we can do the
3231	 * simple comparison on the inner while loop.
3232	 */
3233	if (++last == adapter->num_tx_desc)
3234 		last = 0;
3235	done = last;
3236
3237        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3238            BUS_DMASYNC_POSTREAD);
3239
3240        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3241		/* We clean the range of the packet */
3242		while (first != done) {
3243                	tx_desc->upper.data = 0;
3244                	tx_desc->lower.data = 0;
3245                	tx_desc->buffer_addr = 0;
3246                	num_avail++;
3247
3248			if (tx_buffer->m_head) {
3249				ifp->if_opackets++;
3250				bus_dmamap_sync(txr->txtag,
3251				    tx_buffer->map,
3252				    BUS_DMASYNC_POSTWRITE);
3253				bus_dmamap_unload(txr->txtag,
3254				    tx_buffer->map);
3255
3256                        	m_freem(tx_buffer->m_head);
3257                        	tx_buffer->m_head = NULL;
3258                	}
3259			tx_buffer->next_eop = -1;
3260
3261	                if (++first == adapter->num_tx_desc)
3262				first = 0;
3263
3264	                tx_buffer = &txr->tx_buffers[first];
3265			tx_desc = &txr->tx_base[first];
3266		}
3267		/* See if we can continue to the next packet */
3268		last = tx_buffer->next_eop;
3269		if (last != -1) {
3270        		eop_desc = &txr->tx_base[last];
3271			/* Get new done point */
3272			if (++last == adapter->num_tx_desc) last = 0;
3273			done = last;
3274		} else
3275			break;
3276        }
3277        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3278            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3279
3280        txr->next_to_clean = first;
3281
3282        /*
3283         * If we have enough room, clear IFF_DRV_OACTIVE to tell the stack
3284         * that it is OK to send packets.
3285         * If there are no pending descriptors, clear the timeout. Otherwise,
3286         * if some descriptors have been freed, restart the timeout.
3287         */
3288        if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3289                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3290		/* All clean, turn off the timer */
3291                if (num_avail == adapter->num_tx_desc) {
3292			txr->watchdog_timer = 0;
3293        		txr->tx_avail = num_avail;
3294			return FALSE;
3295		}
3296		/* Some cleaned, reset the timer */
3297                else if (num_avail != txr->tx_avail)
3298			txr->watchdog_timer = IGB_TX_TIMEOUT;
3299        }
3300        txr->tx_avail = num_avail;
3301        return TRUE;
3302}
3303
3304
3305/*********************************************************************
3306 *
3307 *  Get a buffer from system mbuf buffer pool.
3308 *
3309 **********************************************************************/
3310static int
3311igb_get_buf(struct rx_ring *rxr, int i)
3312{
3313	struct adapter		*adapter = rxr->adapter;
3314	struct mbuf		*m;
3315	bus_dma_segment_t	segs[1];
3316	bus_dmamap_t		map;
3317	struct igb_buffer	*rx_buffer;
3318	int			error, nsegs;
3319
3320	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3321	if (m == NULL) {
3322		adapter->mbuf_cluster_failed++;
3323		return (ENOBUFS);
3324	}
3325	m->m_len = m->m_pkthdr.len = MCLBYTES;
3326
3327	if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3328		m_adj(m, ETHER_ALIGN);
3329
3330	/*
3331	 * Using memory from the mbuf cluster pool, invoke the
3332	 * bus_dma machinery to arrange the memory mapping.
3333	 */
3334	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3335	    rxr->rx_spare_map, m, segs, &nsegs, BUS_DMA_NOWAIT);
3336	if (error != 0) {
3337		m_free(m);
3338		return (error);
3339	}
3340
3341	/* If nsegs is wrong then the stack is corrupt. */
3342	KASSERT(nsegs == 1, ("Too many segments returned!"));
3343
3344	rx_buffer = &rxr->rx_buffers[i];
3345	if (rx_buffer->m_head != NULL)
3346		bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3347
3348	map = rx_buffer->map;
3349	rx_buffer->map = rxr->rx_spare_map;
3350	rxr->rx_spare_map = map;
3351	bus_dmamap_sync(rxr->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3352	rx_buffer->m_head = m;
3353
3354	rxr->rx_base[i].read.pkt_addr = htole64(segs[0].ds_addr);
3355	return (0);
3356}
3357
3358
3359/*********************************************************************
3360 *
3361 *  Allocate memory for rx_buffer structures. Since we use one
3362 *  rx_buffer per received packet, the maximum number of rx_buffer's
3363 *  that we'll need is equal to the number of receive descriptors
3364 *  that we've allocated.
3365 *
3366 **********************************************************************/
3367static int
3368igb_allocate_receive_buffers(struct rx_ring *rxr)
3369{
3370	struct	adapter 	*adapter = rxr->adapter;
3371	device_t 		dev = adapter->dev;
3372	struct igb_buffer 	*rxbuf;
3373	int             	i, bsize, error;
3374
3375	bsize = sizeof(struct igb_buffer) * adapter->num_rx_desc;
3376	if (!(rxr->rx_buffers =
3377	    (struct igb_buffer *) malloc(bsize,
3378	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3379		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3380		error = ENOMEM;
3381		goto fail;
3382	}
3383
3384	if ((error = bus_dma_tag_create(NULL,		/* parent */
3385				   PAGE_SIZE, 0,	/* alignment, bounds */
3386				   BUS_SPACE_MAXADDR,	/* lowaddr */
3387				   BUS_SPACE_MAXADDR,	/* highaddr */
3388				   NULL, NULL,		/* filter, filterarg */
3389				   MCLBYTES,		/* maxsize */
3390				   1,			/* nsegments */
3391				   MCLBYTES,		/* maxsegsize */
3392				   0,			/* flags */
3393				   NULL,		/* lockfunc */
3394				   NULL,		/* lockfuncarg */
3395				   &rxr->rxtag))) {
3396		device_printf(dev, "Unable to create RX Small DMA tag\n");
3397		goto fail;
3398	}
3399
3400	/* Create the spare map (used by getbuf) */
3401        error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3402	     &rxr->rx_spare_map);
3403	if (error) {
3404		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3405		    __func__, error);
3406		goto fail;
3407	}
3408
3409	for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3410		rxbuf = &rxr->rx_buffers[i];
3411		error = bus_dmamap_create(rxr->rxtag,
3412		    BUS_DMA_NOWAIT, &rxbuf->map);
3413		if (error) {
3414			device_printf(dev, "Unable to create Small RX DMA map\n");
3415			goto fail;
3416		}
3417	}
3418
3419	return (0);
3420
3421fail:
3422	/* Frees all, but can handle partial completion */
3423	igb_free_receive_structures(adapter);
3424	return (error);
3425}
3426
3427/*********************************************************************
3428 *
3429 *  Initialize a receive ring and its buffers.
3430 *
3431 **********************************************************************/
3432static int
3433igb_setup_receive_ring(struct rx_ring *rxr)
3434{
3435	struct	adapter		*adapter;
3436	device_t		dev;
3437	struct igb_buffer	*rxbuf;
3438	struct lro_ctrl		*lro = &rxr->lro;
3439	int j, rsize;
3440
3441	adapter = rxr->adapter;
3442	dev = adapter->dev;
3443	rsize = roundup2(adapter->num_rx_desc *
3444	    sizeof(union e1000_adv_rx_desc), 4096);
3445	/* Clear the ring contents */
3446	bzero((void *)rxr->rx_base, rsize);
3447
3448	/*
3449	** Free current RX buffers: the size buffer
3450	** that is loaded is indicated by the buffer
3451	** bigbuf value.
3452	*/
3453	for (int i = 0; i < adapter->num_rx_desc; i++) {
3454		rxbuf = &rxr->rx_buffers[i];
3455		if (rxbuf->m_head != NULL) {
3456			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3457			    BUS_DMASYNC_POSTREAD);
3458			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3459			m_freem(rxbuf->m_head);
3460			rxbuf->m_head = NULL;
3461		}
3462	}
3463
3464	for (j = 0; j < adapter->num_rx_desc; j++) {
3465		if (igb_get_buf(rxr, j) == ENOBUFS) {
3466			rxr->rx_buffers[j].m_head = NULL;
3467			rxr->rx_base[j].read.pkt_addr = 0;
3468			goto fail;
3469		}
3470	}
3471
3472	/* Setup our descriptor indices */
3473	rxr->next_to_check = 0;
3474	rxr->last_cleaned = 0;
3475
3476	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3477	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3478
3479        /* Now set up the LRO interface */
3480	if (igb_enable_lro) {
3481		int err = tcp_lro_init(lro);
3482		if (err) {
3483			device_printf(dev,"LRO Initialization failed!\n");
3484			goto fail;
3485		}
3486		INIT_DEBUGOUT("RX LRO Initialized\n");
3487		lro->ifp = adapter->ifp;
3488	}
3489
3490	return (0);
3491fail:
3492	/*
3493	 * We need to clean up any buffers allocated so far
3494	 * 'j' is the failing index, decrement it to get the
3495	 * last success.
3496	 */
3497	for (--j; j < 0; j--) {
3498		rxbuf = &rxr->rx_buffers[j];
3499		if (rxbuf->m_head != NULL) {
3500			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3501			    BUS_DMASYNC_POSTREAD);
3502			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3503			m_freem(rxbuf->m_head);
3504			rxbuf->m_head = NULL;
3505		}
3506	}
3507	return (ENOBUFS);
3508}
3509
3510/*********************************************************************
3511 *
3512 *  Initialize all receive rings.
3513 *
3514 **********************************************************************/
3515static int
3516igb_setup_receive_structures(struct adapter *adapter)
3517{
3518	struct rx_ring *rxr = adapter->rx_rings;
3519	int i, j;
3520
3521	for (i = 0; i < adapter->num_rx_queues; i++, rxr++)
3522		if (igb_setup_receive_ring(rxr))
3523			goto fail;
3524
3525	return (0);
3526fail:
3527	/*
3528	 * Free RX buffers allocated so far, we will only handle
3529	 * the rings that completed, the failing case will have
3530	 * cleaned up for itself. The value of 'i' will be the
3531	 * failed ring so we must pre-decrement it.
3532	 */
3533	rxr = adapter->rx_rings;
3534	for (--i; i > 0; i--, rxr++) {
3535		for (j = 0; j < adapter->num_rx_desc; j++) {
3536			struct igb_buffer *rxbuf;
3537			rxbuf = &rxr->rx_buffers[j];
3538			if (rxbuf->m_head != NULL) {
3539				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3540			  	  BUS_DMASYNC_POSTREAD);
3541				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3542				m_freem(rxbuf->m_head);
3543				rxbuf->m_head = NULL;
3544			}
3545		}
3546	}
3547
3548	return (ENOBUFS);
3549}
3550
3551/*********************************************************************
3552 *
3553 *  Enable receive unit.
3554 *
3555 **********************************************************************/
3556static void
3557igb_initialize_receive_units(struct adapter *adapter)
3558{
3559	struct rx_ring	*rxr = adapter->rx_rings;
3560	struct ifnet	*ifp = adapter->ifp;
3561	u32		rctl, rxcsum, psize;
3562
3563	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3564
3565	/*
3566	 * Make sure receives are disabled while setting
3567	 * up the descriptor ring
3568	 */
3569	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3570	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3571
3572	/* Setup the Base and Length of the Rx Descriptor Rings */
3573	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3574		u64 bus_addr = rxr->rxdma.dma_paddr;
3575		u32 rxdctl, srrctl;
3576
3577		E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3578		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3579		E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3580		    (uint32_t)(bus_addr >> 32));
3581		E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3582		    (uint32_t)bus_addr);
3583		/* Use Advanced Descriptor type */
3584		srrctl = E1000_READ_REG(&adapter->hw, E1000_SRRCTL(i));
3585		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3586		E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3587		/* Enable this Queue */
3588		rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3589		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3590		rxdctl &= 0xFFF00000;
3591		rxdctl |= IGB_RX_PTHRESH;
3592		rxdctl |= IGB_RX_HTHRESH << 8;
3593		rxdctl |= IGB_RX_WTHRESH << 16;
3594		E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3595	}
3596
3597	/*
3598	** Setup for RX MultiQueue
3599	*/
3600	if (adapter->num_rx_queues >1) {
3601		u32 random[10], mrqc, shift = 0;
3602		union igb_reta {
3603			u32 dword;
3604			u8  bytes[4];
3605		} reta;
3606
3607		arc4rand(&random, sizeof(random), 0);
3608		if (adapter->hw.mac.type == e1000_82575)
3609			shift = 6;
3610		/* Warning FM follows */
3611		for (int i = 0; i < 128; i++) {
3612			reta.bytes[i & 3] =
3613			    (i % adapter->num_rx_queues) << shift;
3614			if ((i & 3) == 3)
3615				E1000_WRITE_REG(&adapter->hw,
3616				    E1000_RETA(i & ~3), reta.dword);
3617		}
3618		/* Now fill in hash table */
3619		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3620		for (int i = 0; i < 10; i++)
3621			E1000_WRITE_REG_ARRAY(&adapter->hw,
3622			    E1000_RSSRK(0), i, random[i]);
3623
3624		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3625		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3626		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3627		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3628		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3629		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3630		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3631		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3632
3633		E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3634
3635		/*
3636		** NOTE: Receive Full-Packet Checksum Offload
3637		** is mutually exclusive with Multiqueue. However
3638		** this is not the same as TCP/IP checksums which
3639		** still work.
3640		*/
3641		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3642		rxcsum |= E1000_RXCSUM_PCSD;
3643		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3644	} else if (ifp->if_capenable & IFCAP_RXCSUM) {
3645		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3646		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3647		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3648	}
3649
3650	/* Setup the Receive Control Register */
3651	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3652	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3653		   E1000_RCTL_RDMTS_HALF |
3654		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3655
3656	/* Make sure VLAN Filters are off */
3657	rctl &= ~E1000_RCTL_VFE;
3658
3659	rctl &= ~E1000_RCTL_SBP;
3660
3661	switch (adapter->rx_buffer_len) {
3662	default:
3663	case 2048:
3664		rctl |= E1000_RCTL_SZ_2048;
3665		break;
3666	case 4096:
3667		rctl |= E1000_RCTL_SZ_4096 |
3668		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3669		break;
3670	case 8192:
3671		rctl |= E1000_RCTL_SZ_8192 |
3672		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3673		break;
3674	case 16384:
3675		rctl |= E1000_RCTL_SZ_16384 |
3676		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3677		break;
3678	}
3679
3680	if (ifp->if_mtu > ETHERMTU) {
3681		/* Set maximum packet len */
3682		psize = adapter->max_frame_size;
3683		/* are we on a vlan? */
3684		if (adapter->ifp->if_vlantrunk != NULL)
3685			psize += VLAN_TAG_SIZE;
3686		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3687		rctl |= E1000_RCTL_LPE;
3688	} else
3689		rctl &= ~E1000_RCTL_LPE;
3690
3691	/* Enable Receives */
3692	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3693
3694	/*
3695	 * Setup the HW Rx Head and Tail Descriptor Pointers
3696	 *   - needs to be after enable
3697	 */
3698	for (int i = 0; i < adapter->num_rx_queues; i++) {
3699		E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3700		E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3701		     adapter->num_rx_desc - 1);
3702	}
3703	return;
3704}
3705
3706/*********************************************************************
3707 *
3708 *  Free receive rings.
3709 *
3710 **********************************************************************/
3711static void
3712igb_free_receive_structures(struct adapter *adapter)
3713{
3714	struct rx_ring *rxr = adapter->rx_rings;
3715
3716	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3717		struct lro_ctrl	*lro = &rxr->lro;
3718		igb_free_receive_buffers(rxr);
3719		tcp_lro_free(lro);
3720		igb_dma_free(adapter, &rxr->rxdma);
3721	}
3722
3723	free(adapter->rx_rings, M_DEVBUF);
3724}
3725
3726/*********************************************************************
3727 *
3728 *  Free receive ring data structures.
3729 *
3730 **********************************************************************/
3731static void
3732igb_free_receive_buffers(struct rx_ring *rxr)
3733{
3734	struct adapter	*adapter = rxr->adapter;
3735	struct igb_buffer *rx_buffer;
3736
3737	INIT_DEBUGOUT("free_receive_structures: begin");
3738
3739	if (rxr->rx_spare_map) {
3740		bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3741		rxr->rx_spare_map = NULL;
3742	}
3743
3744	/* Cleanup any existing buffers */
3745	if (rxr->rx_buffers != NULL) {
3746		rx_buffer = &rxr->rx_buffers[0];
3747		for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3748			if (rx_buffer->m_head != NULL) {
3749				bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3750				    BUS_DMASYNC_POSTREAD);
3751				bus_dmamap_unload(rxr->rxtag,
3752				    rx_buffer->map);
3753				m_freem(rx_buffer->m_head);
3754				rx_buffer->m_head = NULL;
3755			} else if (rx_buffer->map != NULL)
3756				bus_dmamap_unload(rxr->rxtag,
3757				    rx_buffer->map);
3758			if (rx_buffer->map != NULL) {
3759				bus_dmamap_destroy(rxr->rxtag,
3760				    rx_buffer->map);
3761				rx_buffer->map = NULL;
3762			}
3763		}
3764	}
3765
3766	if (rxr->rx_buffers != NULL) {
3767		free(rxr->rx_buffers, M_DEVBUF);
3768		rxr->rx_buffers = NULL;
3769	}
3770
3771	if (rxr->rxtag != NULL) {
3772		bus_dma_tag_destroy(rxr->rxtag);
3773		rxr->rxtag = NULL;
3774	}
3775}
3776/*********************************************************************
3777 *
3778 *  This routine executes in interrupt context. It replenishes
3779 *  the mbufs in the descriptor and sends data which has been
3780 *  dma'ed into host memory to upper layer.
3781 *
3782 *  We loop at most count times if count is > 0, or until done if
3783 *  count < 0.
3784 *
3785 *  Return TRUE if all clean, FALSE otherwise
3786 *********************************************************************/
3787static bool
3788igb_rxeof(struct rx_ring *rxr, int count)
3789{
3790	struct adapter		*adapter = rxr->adapter;
3791	struct ifnet		*ifp;
3792	struct lro_ctrl		*lro = &rxr->lro;
3793	struct lro_entry	*queued;
3794	struct mbuf		*mp;
3795	uint8_t			accept_frame = 0;
3796	uint8_t			eop = 0;
3797	uint16_t 		len, desc_len, prev_len_adj;
3798	int			i;
3799	u32			staterr;
3800	union e1000_adv_rx_desc	*cur;
3801
3802	IGB_RX_LOCK(rxr);
3803	ifp = adapter->ifp;
3804	i = rxr->next_to_check;
3805	cur = &rxr->rx_base[i];
3806	staterr = cur->wb.upper.status_error;
3807
3808	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3809	    BUS_DMASYNC_POSTREAD);
3810
3811	if (!(staterr & E1000_RXD_STAT_DD)) {
3812		IGB_RX_UNLOCK(rxr);
3813		return FALSE;
3814	}
3815
3816	while ((staterr & E1000_RXD_STAT_DD) &&
3817	    (count != 0) &&
3818	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3819		struct mbuf *m = NULL;
3820
3821		mp = rxr->rx_buffers[i].m_head;
3822		/*
3823		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3824		 * needs to access the last received byte in the mbuf.
3825		 */
3826		bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
3827		    BUS_DMASYNC_POSTREAD);
3828
3829		accept_frame = 1;
3830		prev_len_adj = 0;
3831		desc_len = le16toh(cur->wb.upper.length);
3832		if (staterr & E1000_RXD_STAT_EOP) {
3833			count--;
3834			eop = 1;
3835			if (desc_len < ETHER_CRC_LEN) {
3836				len = 0;
3837				prev_len_adj = ETHER_CRC_LEN - desc_len;
3838			} else
3839				len = desc_len - ETHER_CRC_LEN;
3840		} else {
3841			eop = 0;
3842			len = desc_len;
3843		}
3844
3845		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
3846			u32	pkt_len = desc_len;
3847
3848			if (rxr->fmp != NULL)
3849				pkt_len += rxr->fmp->m_pkthdr.len;
3850
3851			accept_frame = 0;
3852		}
3853
3854		if (accept_frame) {
3855			if (igb_get_buf(rxr, i) != 0) {
3856				ifp->if_iqdrops++;
3857				goto discard;
3858			}
3859
3860			/* Assign correct length to the current fragment */
3861			mp->m_len = len;
3862
3863			if (rxr->fmp == NULL) {
3864				mp->m_pkthdr.len = len;
3865				rxr->fmp = mp; /* Store the first mbuf */
3866				rxr->lmp = mp;
3867			} else {
3868				/* Chain mbuf's together */
3869				mp->m_flags &= ~M_PKTHDR;
3870				/*
3871				 * Adjust length of previous mbuf in chain if
3872				 * we received less than 4 bytes in the last
3873				 * descriptor.
3874				 */
3875				if (prev_len_adj > 0) {
3876					rxr->lmp->m_len -= prev_len_adj;
3877					rxr->fmp->m_pkthdr.len -=
3878					    prev_len_adj;
3879				}
3880				rxr->lmp->m_next = mp;
3881				rxr->lmp = rxr->lmp->m_next;
3882				rxr->fmp->m_pkthdr.len += len;
3883			}
3884
3885			if (eop) {
3886				rxr->fmp->m_pkthdr.rcvif = ifp;
3887				ifp->if_ipackets++;
3888				rxr->rx_packets++;
3889				rxr->bytes += rxr->fmp->m_pkthdr.len;
3890				rxr->rx_bytes += rxr->bytes;
3891
3892				igb_rx_checksum(staterr, rxr->fmp);
3893#ifndef __NO_STRICT_ALIGNMENT
3894				if (adapter->max_frame_size >
3895				    (MCLBYTES - ETHER_ALIGN) &&
3896				    igb_fixup_rx(rxr) != 0)
3897					goto skip;
3898#endif
3899				if (staterr & E1000_RXD_STAT_VP) {
3900					rxr->fmp->m_pkthdr.ether_vtag =
3901					    le16toh(cur->wb.upper.vlan);
3902					rxr->fmp->m_flags |= M_VLANTAG;
3903				}
3904#ifndef __NO_STRICT_ALIGNMENT
3905skip:
3906#endif
3907				m = rxr->fmp;
3908				rxr->fmp = NULL;
3909				rxr->lmp = NULL;
3910			}
3911		} else {
3912			ifp->if_ierrors++;
3913discard:
3914			/* Reuse loaded DMA map and just update mbuf chain */
3915			mp = rxr->rx_buffers[i].m_head;
3916			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3917			mp->m_data = mp->m_ext.ext_buf;
3918			mp->m_next = NULL;
3919			if (adapter->max_frame_size <=
3920			    (MCLBYTES - ETHER_ALIGN))
3921				m_adj(mp, ETHER_ALIGN);
3922			if (rxr->fmp != NULL) {
3923				m_freem(rxr->fmp);
3924				rxr->fmp = NULL;
3925				rxr->lmp = NULL;
3926			}
3927			m = NULL;
3928		}
3929
3930		/* Zero out the receive descriptors status. */
3931		cur->wb.upper.status_error = 0;
3932		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3933		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3934
3935		rxr->last_cleaned = i; /* For updating tail */
3936
3937		/* Advance our pointers to the next descriptor. */
3938		if (++i == adapter->num_rx_desc)
3939			i = 0;
3940
3941		if (m != NULL) {
3942			rxr->next_to_check = i;
3943			/* Use LRO if possible */
3944			if ((!lro->lro_cnt) || (tcp_lro_rx(lro, m, 0))) {
3945				/* Pass up to the stack */
3946				(*ifp->if_input)(ifp, m);
3947				i = rxr->next_to_check;
3948			}
3949		}
3950		/* Get the next descriptor */
3951		cur = &rxr->rx_base[i];
3952		staterr = cur->wb.upper.status_error;
3953	}
3954	rxr->next_to_check = i;
3955
3956	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3957	E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
3958
3959	/*
3960	 * Flush any outstanding LRO work
3961	 */
3962	while (!SLIST_EMPTY(&lro->lro_active)) {
3963		queued = SLIST_FIRST(&lro->lro_active);
3964		SLIST_REMOVE_HEAD(&lro->lro_active, next);
3965		tcp_lro_flush(lro, queued);
3966	}
3967
3968	IGB_RX_UNLOCK(rxr);
3969
3970	if (!((staterr) & E1000_RXD_STAT_DD))
3971		return FALSE;
3972
3973	return TRUE;
3974}
3975
3976#ifndef __NO_STRICT_ALIGNMENT
3977/*
3978 * When jumbo frames are enabled we should realign entire payload on
3979 * architecures with strict alignment. This is serious design mistake of 8254x
3980 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3981 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3982 * payload. On architecures without strict alignment restrictions 8254x still
3983 * performs unaligned memory access which would reduce the performance too.
3984 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3985 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3986 * existing mbuf chain.
3987 *
3988 * Be aware, best performance of the 8254x is achived only when jumbo frame is
3989 * not used at all on architectures with strict alignment.
3990 */
3991static int
3992igb_fixup_rx(struct rx_ring *rxr)
3993{
3994	struct adapter *adapter = rxr->adapter;
3995	struct mbuf *m, *n;
3996	int error;
3997
3998	error = 0;
3999	m = rxr->fmp;
4000	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
4001		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
4002		m->m_data += ETHER_HDR_LEN;
4003	} else {
4004		MGETHDR(n, M_DONTWAIT, MT_DATA);
4005		if (n != NULL) {
4006			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
4007			m->m_data += ETHER_HDR_LEN;
4008			m->m_len -= ETHER_HDR_LEN;
4009			n->m_len = ETHER_HDR_LEN;
4010			M_MOVE_PKTHDR(n, m);
4011			n->m_next = m;
4012			rxr->fmp = n;
4013		} else {
4014			adapter->dropped_pkts++;
4015			m_freem(rxr->fmp);
4016			rxr->fmp = NULL;
4017			error = ENOMEM;
4018		}
4019	}
4020
4021	return (error);
4022}
4023#endif
4024
4025/*********************************************************************
4026 *
4027 *  Verify that the hardware indicated that the checksum is valid.
4028 *  Inform the stack about the status of checksum so that stack
4029 *  doesn't spend time verifying the checksum.
4030 *
4031 *********************************************************************/
4032static void
4033igb_rx_checksum(u32 staterr, struct mbuf *mp)
4034{
4035	u16 status = (u16)staterr;
4036	u8  errors = (u8) (staterr >> 24);
4037
4038	/* Ignore Checksum bit is set */
4039	if (status & E1000_RXD_STAT_IXSM) {
4040		mp->m_pkthdr.csum_flags = 0;
4041		return;
4042	}
4043
4044	if (status & E1000_RXD_STAT_IPCS) {
4045		/* Did it pass? */
4046		if (!(errors & E1000_RXD_ERR_IPE)) {
4047			/* IP Checksum Good */
4048			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4049			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4050
4051		} else
4052			mp->m_pkthdr.csum_flags = 0;
4053	}
4054
4055	if (status & E1000_RXD_STAT_TCPCS) {
4056		/* Did it pass? */
4057		if (!(errors & E1000_RXD_ERR_TCPE)) {
4058			mp->m_pkthdr.csum_flags |=
4059			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4060			mp->m_pkthdr.csum_data = htons(0xffff);
4061		}
4062	}
4063	return;
4064}
4065
4066/*
4067 * This routine is run via an vlan
4068 * config EVENT
4069 */
4070static void
4071igb_register_vlan(void *unused, struct ifnet *ifp, u16 vtag)
4072{
4073	struct adapter	*adapter = ifp->if_softc;
4074	u32		ctrl, rctl, index, vfta;
4075
4076	/* Shouldn't happen */
4077	if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
4078		return;
4079
4080	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4081	ctrl |= E1000_CTRL_VME;
4082	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4083
4084	/* Setup for Hardware Filter */
4085	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4086	rctl |= E1000_RCTL_VFE;
4087	rctl &= ~E1000_RCTL_CFIEN;
4088	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4089
4090	/* Make entry in the hardware filter table */
4091	index = ((vtag >> 5) & 0x7F);
4092	vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index);
4093	vfta |= (1 << (vtag & 0x1F));
4094	E1000_WRITE_REG_ARRAY(&adapter->hw, E1000_VFTA, index, vfta);
4095
4096	/* Update the frame size */
4097	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4098	    adapter->max_frame_size + VLAN_TAG_SIZE);
4099
4100}
4101
4102/*
4103 * This routine is run via an vlan
4104 * unconfig EVENT
4105 */
4106static void
4107igb_unregister_vlan(void *unused, struct ifnet *ifp, u16 vtag)
4108{
4109	struct adapter	*adapter = ifp->if_softc;
4110	u32		index, vfta;
4111
4112	/* Shouldn't happen */
4113	if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
4114		return;
4115
4116	/* Remove entry in the hardware filter table */
4117	index = ((vtag >> 5) & 0x7F);
4118	vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index);
4119	vfta &= ~(1 << (vtag & 0x1F));
4120	E1000_WRITE_REG_ARRAY(&adapter->hw, E1000_VFTA, index, vfta);
4121	/* Have all vlans unregistered? */
4122	if (adapter->ifp->if_vlantrunk == NULL) {
4123		u32 rctl;
4124		/* Turn off the filter table */
4125		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4126		rctl &= ~E1000_RCTL_VFE;
4127		rctl |= E1000_RCTL_CFIEN;
4128		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4129		/* Reset the frame size */
4130		E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4131		    adapter->max_frame_size);
4132	}
4133}
4134
4135static void
4136igb_enable_intr(struct adapter *adapter)
4137{
4138	/* With RSS set up what to auto clear */
4139	if (adapter->msix_mem) {
4140		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4141		    adapter->eims_mask);
4142		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4143		    adapter->eims_mask);
4144		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4145		    adapter->eims_mask);
4146		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4147		    E1000_IMS_LSC);
4148	} else {
4149		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4150		    IMS_ENABLE_MASK);
4151	}
4152	E1000_WRITE_FLUSH(&adapter->hw);
4153
4154	return;
4155}
4156
4157static void
4158igb_disable_intr(struct adapter *adapter)
4159{
4160	if (adapter->msix_mem) {
4161		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4162		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4163	}
4164	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4165	E1000_WRITE_FLUSH(&adapter->hw);
4166	return;
4167}
4168
4169/*
4170 * Bit of a misnomer, what this really means is
4171 * to enable OS management of the system... aka
4172 * to disable special hardware management features
4173 */
4174static void
4175igb_init_manageability(struct adapter *adapter)
4176{
4177	/* A shared code workaround */
4178#define E1000_82542_MANC2H E1000_MANC2H
4179	if (adapter->has_manage) {
4180		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4181		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4182
4183		/* disable hardware interception of ARP */
4184		manc &= ~(E1000_MANC_ARP_EN);
4185
4186                /* enable receiving management packets to the host */
4187		manc |= E1000_MANC_EN_MNG2HOST;
4188#define E1000_MNG2HOST_PORT_623 (1 << 5)
4189#define E1000_MNG2HOST_PORT_664 (1 << 6)
4190		manc2h |= E1000_MNG2HOST_PORT_623;
4191		manc2h |= E1000_MNG2HOST_PORT_664;
4192		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4193
4194		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4195	}
4196}
4197
4198/*
4199 * Give control back to hardware management
4200 * controller if there is one.
4201 */
4202static void
4203igb_release_manageability(struct adapter *adapter)
4204{
4205	if (adapter->has_manage) {
4206		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4207
4208		/* re-enable hardware interception of ARP */
4209		manc |= E1000_MANC_ARP_EN;
4210		manc &= ~E1000_MANC_EN_MNG2HOST;
4211
4212		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4213	}
4214}
4215
4216/*
4217 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4218 * For ASF and Pass Through versions of f/w this means that
4219 * the driver is loaded.
4220 *
4221 */
4222static void
4223igb_get_hw_control(struct adapter *adapter)
4224{
4225	u32 ctrl_ext;
4226
4227	/* Let firmware know the driver has taken over */
4228	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4229	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4230	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4231}
4232
4233/*
4234 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4235 * For ASF and Pass Through versions of f/w this means that the
4236 * driver is no longer loaded.
4237 *
4238 */
4239static void
4240igb_release_hw_control(struct adapter *adapter)
4241{
4242	u32 ctrl_ext;
4243
4244	/* Let firmware taken over control of h/w */
4245	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4246	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4247	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4248}
4249
4250static int
4251igb_is_valid_ether_addr(uint8_t *addr)
4252{
4253	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4254
4255	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4256		return (FALSE);
4257	}
4258
4259	return (TRUE);
4260}
4261
4262
4263/*
4264 * Enable PCI Wake On Lan capability
4265 */
4266void
4267igb_enable_wakeup(device_t dev)
4268{
4269	u16     cap, status;
4270	u8      id;
4271
4272	/* First find the capabilities pointer*/
4273	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4274	/* Read the PM Capabilities */
4275	id = pci_read_config(dev, cap, 1);
4276	if (id != PCIY_PMG)     /* Something wrong */
4277		return;
4278	/* OK, we have the power capabilities, so
4279	   now get the status register */
4280	cap += PCIR_POWER_STATUS;
4281	status = pci_read_config(dev, cap, 2);
4282	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4283	pci_write_config(dev, cap, status, 2);
4284	return;
4285}
4286
4287
4288/**********************************************************************
4289 *
4290 *  Update the board statistics counters.
4291 *
4292 **********************************************************************/
4293static void
4294igb_update_stats_counters(struct adapter *adapter)
4295{
4296	struct ifnet   *ifp;
4297
4298	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4299	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4300		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4301		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4302	}
4303	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4304	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4305	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4306	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4307
4308	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4309	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4310	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4311	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4312	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4313	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4314	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4315	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4316	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4317	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4318	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4319	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4320	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4321	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4322	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4323	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4324	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4325	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4326	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4327	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4328
4329	/* For the 64-bit byte counters the low dword must be read first. */
4330	/* Both registers clear on the read of the high dword */
4331
4332	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4333	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4334
4335	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4336	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4337	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4338	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4339	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4340
4341	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4342	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4343
4344	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4345	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4346	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4347	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4348	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4349	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4350	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4351	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4352	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4353	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4354
4355	adapter->stats.algnerrc +=
4356		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4357	adapter->stats.rxerrc +=
4358		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4359	adapter->stats.tncrs +=
4360		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4361	adapter->stats.cexterr +=
4362		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4363	adapter->stats.tsctc +=
4364		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4365	adapter->stats.tsctfc +=
4366		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4367	ifp = adapter->ifp;
4368
4369	ifp->if_collisions = adapter->stats.colc;
4370
4371	/* Rx Errors */
4372	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4373	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4374	    adapter->stats.ruc + adapter->stats.roc +
4375	    adapter->stats.mpc + adapter->stats.cexterr;
4376
4377	/* Tx Errors */
4378	ifp->if_oerrors = adapter->stats.ecol +
4379	    adapter->stats.latecol + adapter->watchdog_events;
4380}
4381
4382
4383/**********************************************************************
4384 *
4385 *  This routine is called only when igb_display_debug_stats is enabled.
4386 *  This routine provides a way to take a look at important statistics
4387 *  maintained by the driver and hardware.
4388 *
4389 **********************************************************************/
4390static void
4391igb_print_debug_info(struct adapter *adapter)
4392{
4393	device_t dev = adapter->dev;
4394	struct rx_ring *rxr = adapter->rx_rings;
4395	struct tx_ring *txr = adapter->tx_rings;
4396	uint8_t *hw_addr = adapter->hw.hw_addr;
4397
4398	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4399	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4400	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4401	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4402
4403#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4404	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4405	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4406	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4407#endif
4408
4409	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4410	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4411	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4412	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4413	    adapter->hw.fc.high_water,
4414	    adapter->hw.fc.low_water);
4415
4416	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
4417		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4418		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4419		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4420		device_printf(dev, "no descriptors avail event = %lld\n",
4421		    (long long)txr->no_desc_avail);
4422		device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
4423		    (long long)txr->tx_irq);
4424		device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4425		    (long long)txr->tx_packets);
4426	}
4427
4428	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
4429		struct lro_ctrl *lro = &rxr->lro;
4430		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4431		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4432		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4433		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4434		    (long long)rxr->rx_packets);
4435		device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4436		    (long long)rxr->rx_bytes);
4437		device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
4438		    (long long)rxr->rx_irq);
4439		device_printf(dev,"RX(%d) LRO Queued= %d\n",
4440		    rxr->me, lro->lro_queued);
4441		device_printf(dev,"RX(%d) LRO Flushed= %d\n",
4442		    rxr->me, lro->lro_flushed);
4443	}
4444
4445	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4446
4447	device_printf(dev, "Std mbuf failed = %ld\n",
4448	    adapter->mbuf_alloc_failed);
4449	device_printf(dev, "Std mbuf cluster failed = %ld\n",
4450	    adapter->mbuf_cluster_failed);
4451	device_printf(dev, "Driver dropped packets = %ld\n",
4452	    adapter->dropped_pkts);
4453	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4454		adapter->no_tx_dma_setup);
4455}
4456
4457static void
4458igb_print_hw_stats(struct adapter *adapter)
4459{
4460	device_t dev = adapter->dev;
4461
4462	device_printf(dev, "Excessive collisions = %lld\n",
4463	    (long long)adapter->stats.ecol);
4464#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4465	device_printf(dev, "Symbol errors = %lld\n",
4466	    (long long)adapter->stats.symerrs);
4467#endif
4468	device_printf(dev, "Sequence errors = %lld\n",
4469	    (long long)adapter->stats.sec);
4470	device_printf(dev, "Defer count = %lld\n",
4471	    (long long)adapter->stats.dc);
4472	device_printf(dev, "Missed Packets = %lld\n",
4473	    (long long)adapter->stats.mpc);
4474	device_printf(dev, "Receive No Buffers = %lld\n",
4475	    (long long)adapter->stats.rnbc);
4476	/* RLEC is inaccurate on some hardware, calculate our own. */
4477	device_printf(dev, "Receive Length Errors = %lld\n",
4478	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4479	device_printf(dev, "Receive errors = %lld\n",
4480	    (long long)adapter->stats.rxerrc);
4481	device_printf(dev, "Crc errors = %lld\n",
4482	    (long long)adapter->stats.crcerrs);
4483	device_printf(dev, "Alignment errors = %lld\n",
4484	    (long long)adapter->stats.algnerrc);
4485	/* On 82575 these are collision counts */
4486	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4487	    (long long)adapter->stats.cexterr);
4488	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4489	device_printf(dev, "watchdog timeouts = %ld\n",
4490	    adapter->watchdog_events);
4491	device_printf(dev, "XON Rcvd = %lld\n",
4492	    (long long)adapter->stats.xonrxc);
4493	device_printf(dev, "XON Xmtd = %lld\n",
4494	    (long long)adapter->stats.xontxc);
4495	device_printf(dev, "XOFF Rcvd = %lld\n",
4496	    (long long)adapter->stats.xoffrxc);
4497	device_printf(dev, "XOFF Xmtd = %lld\n",
4498	    (long long)adapter->stats.xofftxc);
4499	device_printf(dev, "Good Packets Rcvd = %lld\n",
4500	    (long long)adapter->stats.gprc);
4501	device_printf(dev, "Good Packets Xmtd = %lld\n",
4502	    (long long)adapter->stats.gptc);
4503	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4504	    (long long)adapter->stats.tsctc);
4505	device_printf(dev, "TSO Contexts Failed = %lld\n",
4506	    (long long)adapter->stats.tsctfc);
4507}
4508
4509/**********************************************************************
4510 *
4511 *  This routine provides a way to dump out the adapter eeprom,
4512 *  often a useful debug/service tool. This only dumps the first
4513 *  32 words, stuff that matters is in that extent.
4514 *
4515 **********************************************************************/
4516static void
4517igb_print_nvm_info(struct adapter *adapter)
4518{
4519	u16	eeprom_data;
4520	int	i, j, row = 0;
4521
4522	/* Its a bit crude, but it gets the job done */
4523	printf("\nInterface EEPROM Dump:\n");
4524	printf("Offset\n0x0000  ");
4525	for (i = 0, j = 0; i < 32; i++, j++) {
4526		if (j == 8) { /* Make the offset block */
4527			j = 0; ++row;
4528			printf("\n0x00%x0  ",row);
4529		}
4530		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4531		printf("%04x ", eeprom_data);
4532	}
4533	printf("\n");
4534}
4535
4536static int
4537igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4538{
4539	struct adapter *adapter;
4540	int error;
4541	int result;
4542
4543	result = -1;
4544	error = sysctl_handle_int(oidp, &result, 0, req);
4545
4546	if (error || !req->newptr)
4547		return (error);
4548
4549	if (result == 1) {
4550		adapter = (struct adapter *)arg1;
4551		igb_print_debug_info(adapter);
4552	}
4553	/*
4554	 * This value will cause a hex dump of the
4555	 * first 32 16-bit words of the EEPROM to
4556	 * the screen.
4557	 */
4558	if (result == 2) {
4559		adapter = (struct adapter *)arg1;
4560		igb_print_nvm_info(adapter);
4561        }
4562
4563	return (error);
4564}
4565
4566
4567static int
4568igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4569{
4570	struct adapter *adapter;
4571	int error;
4572	int result;
4573
4574	result = -1;
4575	error = sysctl_handle_int(oidp, &result, 0, req);
4576
4577	if (error || !req->newptr)
4578		return (error);
4579
4580	if (result == 1) {
4581		adapter = (struct adapter *)arg1;
4582		igb_print_hw_stats(adapter);
4583	}
4584
4585	return (error);
4586}
4587
4588static void
4589igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4590	const char *description, int *limit, int value)
4591{
4592	*limit = value;
4593	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4594	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4595	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4596}
4597
4598#ifdef IGB_TIMESYNC
4599/*
4600 * Initialize the Time Sync Feature
4601 */
4602static int
4603igb_tsync_init(struct adapter *adapter)
4604{
4605	device_t	dev = adapter->dev;
4606	u32		tx_ctl, rx_ctl, val;
4607
4608
4609	E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
4610	    20833/PICOSECS_PER_TICK);
4611
4612	adapter->last_stamp =  E1000_READ_REG(&adapter->hw, E1000_SYSTIML);
4613	adapter->last_stamp |= (u64)E1000_READ_REG(&adapter->hw,
4614	    E1000_SYSTIMH) << 32ULL;
4615
4616	/* Enable the TX side */
4617	tx_ctl =  E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4618	tx_ctl |= 0x10;
4619	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCTXCTL, tx_ctl);
4620	E1000_WRITE_FLUSH(&adapter->hw);
4621
4622	tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4623	if ((tx_ctl & 0x10) == 0) {
4624     		device_printf(dev, "Failed to enable TX timestamping\n");
4625		return (ENXIO);
4626	}
4627
4628	/* Enable RX */
4629	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4630	rx_ctl |= 0x10; /* Enable the feature */
4631	rx_ctl |= 0x04; /* This value turns on Ver 1 and 2 */
4632	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCTL, rx_ctl);
4633
4634	/*
4635	 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7 (Ethertype)
4636	 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
4637	 * Ethertype Filter Queue Filter[0][31] = 0x1 (Enable Timestamping)
4638	 */
4639	E1000_WRITE_REG(&adapter->hw, E1000_ETQF(0), 0x440088f7);
4640	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCFG, 0x0);
4641
4642	/*
4643	 * Source Port Queue Filter Setup:
4644	 *  this is for UDP port filtering
4645	 */
4646	E1000_WRITE_REG(&adapter->hw, E1000_SPQF(0), TSYNC_PORT);
4647	/* Protocol = UDP, enable Timestamp, and filter on source/protocol */
4648	val = (0x11 | (1 << 27) | (6 << 28));
4649	E1000_WRITE_REG(&adapter->hw, E1000_FTQF(0), val);
4650
4651	E1000_WRITE_FLUSH(&adapter->hw);
4652
4653	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4654	if ((rx_ctl & 0x10) == 0) {
4655     		device_printf(dev, "Failed to enable RX timestamping\n");
4656		return (ENXIO);
4657	}
4658
4659	device_printf(dev, "IEEE 1588 Precision Time Protocol enabled\n");
4660
4661	return (0);
4662}
4663
4664/*
4665 * Disable the Time Sync Feature
4666 */
4667static void
4668igb_tsync_disable(struct adapter *adapter)
4669{
4670	u32		tx_ctl, rx_ctl;
4671
4672	tx_ctl =  E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4673	tx_ctl &= ~0x10;
4674	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCTXCTL, tx_ctl);
4675	E1000_WRITE_FLUSH(&adapter->hw);
4676
4677	/* Invalidate TX Timestamp */
4678	E1000_READ_REG(&adapter->hw, E1000_TXSTMPH);
4679
4680	tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4681	if (tx_ctl & 0x10)
4682     		HW_DEBUGOUT("Failed to disable TX timestamping\n");
4683
4684	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4685	rx_ctl &= ~0x10;
4686
4687	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCTL, rx_ctl);
4688	E1000_WRITE_FLUSH(&adapter->hw);
4689
4690	/* Invalidate RX Timestamp */
4691	E1000_READ_REG(&adapter->hw, E1000_RXSATRH);
4692
4693	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4694	if (rx_ctl & 0x10)
4695		HW_DEBUGOUT("Failed to disable RX timestamping\n");
4696
4697	return;
4698}
4699
4700#endif /* IGB_TIMESYNC */
4701