if_igb.c revision 193862
1226584Sdim/******************************************************************************
2226584Sdim
3226584Sdim  Copyright (c) 2001-2009, Intel Corporation
4226584Sdim  All rights reserved.
5226584Sdim
6226584Sdim  Redistribution and use in source and binary forms, with or without
7226584Sdim  modification, are permitted provided that the following conditions are met:
8226584Sdim
9226584Sdim   1. Redistributions of source code must retain the above copyright notice,
10226584Sdim      this list of conditions and the following disclaimer.
11226584Sdim
12226584Sdim   2. Redistributions in binary form must reproduce the above copyright
13226584Sdim      notice, this list of conditions and the following disclaimer in the
14243830Sdim      documentation and/or other materials provided with the distribution.
15226584Sdim
16226584Sdim   3. Neither the name of the Intel Corporation nor the names of its
17226584Sdim      contributors may be used to endorse or promote products derived from
18226584Sdim      this software without specific prior written permission.
19226584Sdim
20226584Sdim  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21263508Sdim  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22226584Sdim  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23226584Sdim  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24263508Sdim  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25226584Sdim  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26226584Sdim  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27226584Sdim  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28226584Sdim  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29234353Sdim  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30226584Sdim  POSSIBILITY OF SUCH DAMAGE.
31226584Sdim
32226584Sdim******************************************************************************/
33226584Sdim/*$FreeBSD: head/sys/dev/e1000/if_igb.c 193862 2009-06-09 21:43:04Z bz $*/
34226584Sdim
35226584Sdim#ifdef HAVE_KERNEL_OPTION_HEADERS
36226584Sdim#include "opt_device_polling.h"
37226584Sdim#include "opt_inet.h"
38226584Sdim#endif
39226584Sdim
40226584Sdim#include <sys/param.h>
41226584Sdim#include <sys/systm.h>
42226584Sdim#include <sys/bus.h>
43263508Sdim#include <sys/endian.h>
44263508Sdim#include <sys/kernel.h>
45263508Sdim#include <sys/kthread.h>
46263508Sdim#include <sys/malloc.h>
47226584Sdim#include <sys/mbuf.h>
48263508Sdim#include <sys/module.h>
49251662Sdim#include <sys/rman.h>
50251662Sdim#include <sys/socket.h>
51263508Sdim#include <sys/sockio.h>
52226584Sdim#include <sys/sysctl.h>
53226584Sdim#include <sys/taskqueue.h>
54226584Sdim#include <sys/eventhandler.h>
55243830Sdim#include <sys/pcpu.h>
56243830Sdim#ifdef IGB_TIMESYNC
57243830Sdim#include <sys/ioccom.h>
58243830Sdim#include <sys/time.h>
59243830Sdim#endif
60243830Sdim#include <machine/bus.h>
61243830Sdim#include <machine/resource.h>
62234353Sdim
63226584Sdim#include <net/bpf.h>
64226584Sdim#include <net/ethernet.h>
65226584Sdim#include <net/if.h>
66226584Sdim#include <net/if_arp.h>
67226584Sdim#include <net/if_dl.h>
68226584Sdim#include <net/if_media.h>
69226584Sdim
70226584Sdim#include <net/if_types.h>
71226584Sdim#include <net/if_vlan_var.h>
72226584Sdim
73226584Sdim#include <netinet/in_systm.h>
74226584Sdim#include <netinet/in.h>
75226584Sdim#include <netinet/if_ether.h>
76226584Sdim#include <netinet/ip.h>
77226584Sdim#include <netinet/ip6.h>
78226584Sdim#include <netinet/tcp.h>
79226584Sdim#include <netinet/tcp_lro.h>
80226584Sdim#include <netinet/udp.h>
81226584Sdim
82226584Sdim#include <machine/in_cksum.h>
83226584Sdim#include <dev/pci/pcivar.h>
84226584Sdim#include <dev/pci/pcireg.h>
85226584Sdim
86226584Sdim#include "e1000_api.h"
87226584Sdim#include "e1000_82575.h"
88226584Sdim#include "if_igb.h"
89226584Sdim
90226584Sdim/*********************************************************************
91226584Sdim *  Set this to one to display debug statistics
92226584Sdim *********************************************************************/
93226584Sdimint	igb_display_debug_stats = 0;
94226584Sdim
95226584Sdim/*********************************************************************
96226584Sdim *  Driver version:
97226584Sdim *********************************************************************/
98226584Sdimchar igb_driver_version[] = "version - 1.5.3";
99226584Sdim
100226584Sdim
101226584Sdim/*********************************************************************
102226584Sdim *  PCI Device ID Table
103226584Sdim *
104226584Sdim *  Used by probe to select devices to load on
105226584Sdim *  Last field stores an index into e1000_strings
106226584Sdim *  Last entry must be all 0s
107226584Sdim *
108226584Sdim *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
109226584Sdim *********************************************************************/
110226584Sdim
111226584Sdimstatic igb_vendor_info_t igb_vendor_info_array[] =
112226584Sdim{
113226584Sdim	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
114226584Sdim	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
115226584Sdim						PCI_ANY_ID, PCI_ANY_ID, 0},
116263508Sdim	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
117263508Sdim						PCI_ANY_ID, PCI_ANY_ID, 0},
118226584Sdim	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
119263508Sdim	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
120263508Sdim	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
121226584Sdim	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
122263508Sdim	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
123263508Sdim						PCI_ANY_ID, PCI_ANY_ID, 0},
124226584Sdim	/* required last entry */
125263508Sdim	{ 0, 0, 0, 0, 0}
126263508Sdim};
127263508Sdim
128226584Sdim/*********************************************************************
129263508Sdim *  Table of branding strings for all supported NICs.
130263508Sdim *********************************************************************/
131226584Sdim
132263508Sdimstatic char *igb_strings[] = {
133263508Sdim	"Intel(R) PRO/1000 Network Connection"
134263508Sdim};
135263508Sdim
136243830Sdim/*********************************************************************
137243830Sdim *  Function prototypes
138263508Sdim *********************************************************************/
139263508Sdimstatic int	igb_probe(device_t);
140239462Sdimstatic int	igb_attach(device_t);
141263508Sdimstatic int	igb_detach(device_t);
142263508Sdimstatic int	igb_shutdown(device_t);
143263508Sdimstatic int	igb_suspend(device_t);
144239462Sdimstatic int	igb_resume(device_t);
145263508Sdimstatic void	igb_start(struct ifnet *);
146243830Sdimstatic void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
147243830Sdimstatic int	igb_ioctl(struct ifnet *, u_long, caddr_t);
148243830Sdimstatic void	igb_watchdog(struct adapter *);
149243830Sdimstatic void	igb_init(void *);
150243830Sdimstatic void	igb_init_locked(struct adapter *);
151243830Sdimstatic void	igb_stop(void *);
152263508Sdimstatic void	igb_media_status(struct ifnet *, struct ifmediareq *);
153243830Sdimstatic int	igb_media_change(struct ifnet *);
154243830Sdimstatic void	igb_identify_hardware(struct adapter *);
155243830Sdimstatic int	igb_allocate_pci_resources(struct adapter *);
156263508Sdimstatic int	igb_allocate_msix(struct adapter *);
157243830Sdimstatic int	igb_allocate_legacy(struct adapter *);
158243830Sdim#if __FreeBSD_version >= 602105
159243830Sdimstatic int	igb_setup_msix(struct adapter *);
160243830Sdim#endif
161243830Sdimstatic void	igb_free_pci_resources(struct adapter *);
162263508Sdimstatic void	igb_local_timer(void *);
163263508Sdimstatic int	igb_hardware_init(struct adapter *);
164226584Sdimstatic void	igb_setup_interface(device_t, struct adapter *);
165226584Sdimstatic int	igb_allocate_queues(struct adapter *);
166263508Sdimstatic void	igb_configure_queues(struct adapter *);
167263508Sdim
168263508Sdimstatic int	igb_allocate_transmit_buffers(struct tx_ring *);
169263508Sdimstatic void	igb_setup_transmit_structures(struct adapter *);
170263508Sdimstatic void	igb_setup_transmit_ring(struct tx_ring *);
171263508Sdimstatic void	igb_initialize_transmit_units(struct adapter *);
172263508Sdimstatic void	igb_free_transmit_structures(struct adapter *);
173263508Sdimstatic void	igb_free_transmit_buffers(struct tx_ring *);
174263508Sdim
175263508Sdimstatic int	igb_allocate_receive_buffers(struct rx_ring *);
176263508Sdimstatic int	igb_setup_receive_structures(struct adapter *);
177226584Sdimstatic int	igb_setup_receive_ring(struct rx_ring *);
178226584Sdimstatic void	igb_initialize_receive_units(struct adapter *);
179226584Sdimstatic void	igb_free_receive_structures(struct adapter *);
180static void	igb_free_receive_buffers(struct rx_ring *);
181
182static void	igb_enable_intr(struct adapter *);
183static void	igb_disable_intr(struct adapter *);
184static void	igb_update_stats_counters(struct adapter *);
185static bool	igb_txeof(struct tx_ring *);
186static bool	igb_rxeof(struct rx_ring *, int);
187static void	igb_rx_checksum(u32, struct mbuf *, bool);
188static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
189static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, u32 *);
190static void	igb_set_promisc(struct adapter *);
191static void	igb_disable_promisc(struct adapter *);
192static void	igb_set_multi(struct adapter *);
193static void	igb_print_hw_stats(struct adapter *);
194static void	igb_update_link_status(struct adapter *);
195static int	igb_get_buf(struct rx_ring *, int, u8);
196static void	igb_register_vlan(void *, struct ifnet *, u16);
197static void	igb_unregister_vlan(void *, struct ifnet *, u16);
198static int	igb_xmit(struct tx_ring *, struct mbuf **);
199static int	igb_dma_malloc(struct adapter *, bus_size_t,
200		    struct igb_dma_alloc *, int);
201static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
202static void	igb_print_debug_info(struct adapter *);
203static void	igb_print_nvm_info(struct adapter *);
204static int 	igb_is_valid_ether_addr(u8 *);
205static int	igb_sysctl_stats(SYSCTL_HANDLER_ARGS);
206static int	igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS);
207/* Management and WOL Support */
208static void	igb_init_manageability(struct adapter *);
209static void	igb_release_manageability(struct adapter *);
210static void     igb_get_hw_control(struct adapter *);
211static void     igb_release_hw_control(struct adapter *);
212static void     igb_enable_wakeup(device_t);
213
214#ifdef IGB_TIMESYNC
215/* Precision Time sync support */
216static int igb_tsync_init(struct adapter *);
217static void igb_tsync_disable(struct adapter *);
218#endif
219
220#if __FreeBSD_version > 700000
221static int	igb_irq_fast(void *);
222#else
223static void	igb_irq_fast(void *);
224#endif
225
226static void	igb_add_rx_process_limit(struct adapter *, const char *,
227		    const char *, int *, int);
228static void	igb_handle_rxtx(void *context, int pending);
229static void	igb_handle_tx(void *context, int pending);
230static void	igb_handle_rx(void *context, int pending);
231
232#if __FreeBSD_version >= 602105
233/* These are MSIX only irq handlers */
234static void	igb_msix_rx(void *);
235static void	igb_msix_tx(void *);
236static void	igb_msix_link(void *);
237#endif
238
239/* Adaptive Interrupt Moderation */
240static void	igb_update_aim(struct rx_ring *);
241
242/*********************************************************************
243 *  FreeBSD Device Interface Entry Points
244 *********************************************************************/
245
246static device_method_t igb_methods[] = {
247	/* Device interface */
248	DEVMETHOD(device_probe, igb_probe),
249	DEVMETHOD(device_attach, igb_attach),
250	DEVMETHOD(device_detach, igb_detach),
251	DEVMETHOD(device_shutdown, igb_shutdown),
252	DEVMETHOD(device_suspend, igb_suspend),
253	DEVMETHOD(device_resume, igb_resume),
254	{0, 0}
255};
256
257static driver_t igb_driver = {
258	"igb", igb_methods, sizeof(struct adapter),
259};
260
261static devclass_t igb_devclass;
262DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
263MODULE_DEPEND(igb, pci, 1, 1, 1);
264MODULE_DEPEND(igb, ether, 1, 1, 1);
265
266/*********************************************************************
267 *  Tunable default values.
268 *********************************************************************/
269
270/* Descriptor defaults */
271static int igb_rxd = IGB_DEFAULT_RXD;
272static int igb_txd = IGB_DEFAULT_TXD;
273TUNABLE_INT("hw.igb.rxd", &igb_rxd);
274TUNABLE_INT("hw.igb.txd", &igb_txd);
275
276/*
277** These parameters are used in Adaptive
278** Interrupt Moderation. The value is set
279** into EITR and controls the interrupt
280** frequency. A variable static scheme can
281** be created by changing the assigned value
282** of igb_ave_latency to the desired value,
283** and then set igb_enable_aim to FALSE.
284** This will result in all EITR registers
285** getting set to that value statically.
286*/
287static int igb_enable_aim = TRUE;
288TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
289static int igb_low_latency = IGB_LOW_LATENCY;
290TUNABLE_INT("hw.igb.low_latency", &igb_low_latency);
291static int igb_ave_latency = IGB_AVE_LATENCY;
292TUNABLE_INT("hw.igb.ave_latency", &igb_ave_latency);
293static int igb_bulk_latency = IGB_BULK_LATENCY;
294TUNABLE_INT("hw.igb.bulk_latency", &igb_bulk_latency);
295
296/*
297** IF YOU CHANGE THESE: be sure and change IGB_MSIX_VEC in
298** if_igb.h to match. These can be autoconfigured if set to
299** 0, it will then be based on number of cpus.
300*/
301static int igb_tx_queues = 1;
302static int igb_rx_queues = 1;
303TUNABLE_INT("hw.igb.tx_queues", &igb_tx_queues);
304TUNABLE_INT("hw.igb.rx_queues", &igb_rx_queues);
305
306/* How many packets rxeof tries to clean at a time */
307static int igb_rx_process_limit = 100;
308TUNABLE_INT("hw.igb.rx_process_limit", &igb_rx_process_limit);
309
310/* Flow control setting - default to FULL */
311static int igb_fc_setting = e1000_fc_full;
312TUNABLE_INT("hw.igb.fc_setting", &igb_fc_setting);
313
314/*
315 * Should the driver do LRO on the RX end
316 *  this can be toggled on the fly, but the
317 *  interface must be reset (down/up) for it
318 *  to take effect.
319 */
320static int igb_enable_lro = 1;
321TUNABLE_INT("hw.igb.enable_lro", &igb_enable_lro);
322
323/*
324 * Enable RX Header Split
325 */
326static int igb_rx_hdr_split = 1;
327TUNABLE_INT("hw.igb.rx_hdr_split", &igb_rx_hdr_split);
328
329extern int mp_ncpus;
330/*********************************************************************
331 *  Device identification routine
332 *
333 *  igb_probe determines if the driver should be loaded on
334 *  adapter based on PCI vendor/device id of the adapter.
335 *
336 *  return BUS_PROBE_DEFAULT on success, positive on failure
337 *********************************************************************/
338
339static int
340igb_probe(device_t dev)
341{
342	char		adapter_name[60];
343	uint16_t	pci_vendor_id = 0;
344	uint16_t	pci_device_id = 0;
345	uint16_t	pci_subvendor_id = 0;
346	uint16_t	pci_subdevice_id = 0;
347	igb_vendor_info_t *ent;
348
349	INIT_DEBUGOUT("igb_probe: begin");
350
351	pci_vendor_id = pci_get_vendor(dev);
352	if (pci_vendor_id != IGB_VENDOR_ID)
353		return (ENXIO);
354
355	pci_device_id = pci_get_device(dev);
356	pci_subvendor_id = pci_get_subvendor(dev);
357	pci_subdevice_id = pci_get_subdevice(dev);
358
359	ent = igb_vendor_info_array;
360	while (ent->vendor_id != 0) {
361		if ((pci_vendor_id == ent->vendor_id) &&
362		    (pci_device_id == ent->device_id) &&
363
364		    ((pci_subvendor_id == ent->subvendor_id) ||
365		    (ent->subvendor_id == PCI_ANY_ID)) &&
366
367		    ((pci_subdevice_id == ent->subdevice_id) ||
368		    (ent->subdevice_id == PCI_ANY_ID))) {
369			sprintf(adapter_name, "%s %s",
370				igb_strings[ent->index],
371				igb_driver_version);
372			device_set_desc_copy(dev, adapter_name);
373			return (BUS_PROBE_DEFAULT);
374		}
375		ent++;
376	}
377
378	return (ENXIO);
379}
380
381/*********************************************************************
382 *  Device initialization routine
383 *
384 *  The attach entry point is called when the driver is being loaded.
385 *  This routine identifies the type of hardware, allocates all resources
386 *  and initializes the hardware.
387 *
388 *  return 0 on success, positive on failure
389 *********************************************************************/
390
391static int
392igb_attach(device_t dev)
393{
394	struct adapter	*adapter;
395	int		error = 0;
396	u16		eeprom_data;
397
398	INIT_DEBUGOUT("igb_attach: begin");
399
400	adapter = device_get_softc(dev);
401	adapter->dev = adapter->osdep.dev = dev;
402	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
403
404	/* SYSCTL stuff */
405	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
406	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
407	    OID_AUTO, "debug", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
408	    igb_sysctl_debug_info, "I", "Debug Information");
409
410	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
411	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
412	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
413	    igb_sysctl_stats, "I", "Statistics");
414
415	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
416	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
417	    OID_AUTO, "flow_control", CTLTYPE_INT|CTLFLAG_RW,
418	    &igb_fc_setting, 0, "Flow Control");
419
420	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
421	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
422	    OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW,
423	    &igb_enable_lro, 0, "Large Receive Offload");
424
425	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
426	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
427	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
428	    &igb_enable_aim, 1, "Interrupt Moderation");
429
430	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
431	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
432	    OID_AUTO, "low_latency", CTLTYPE_INT|CTLFLAG_RW,
433	    &igb_low_latency, 1, "Low Latency");
434
435	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
436	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
437	    OID_AUTO, "ave_latency", CTLTYPE_INT|CTLFLAG_RW,
438	    &igb_ave_latency, 1, "Average Latency");
439
440	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
441	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
442	    OID_AUTO, "bulk_latency", CTLTYPE_INT|CTLFLAG_RW,
443	    &igb_bulk_latency, 1, "Bulk Latency");
444
445	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
446	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
447	    OID_AUTO, "hdr_split", CTLTYPE_INT|CTLFLAG_RW,
448	    &igb_rx_hdr_split, 0, "RX Header Split");
449
450	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
451
452	/* Determine hardware and mac info */
453	igb_identify_hardware(adapter);
454
455	/* Setup PCI resources */
456	if (igb_allocate_pci_resources(adapter)) {
457		device_printf(dev, "Allocation of PCI resources failed\n");
458		error = ENXIO;
459		goto err_pci;
460	}
461
462	/* Do Shared Code initialization */
463	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
464		device_printf(dev, "Setup of Shared code failed\n");
465		error = ENXIO;
466		goto err_pci;
467	}
468
469	e1000_get_bus_info(&adapter->hw);
470
471	/* Sysctls for limiting the amount of work done in the taskqueue */
472	igb_add_rx_process_limit(adapter, "rx_processing_limit",
473	    "max number of rx packets to process", &adapter->rx_process_limit,
474	    igb_rx_process_limit);
475
476	/*
477	 * Validate number of transmit and receive descriptors. It
478	 * must not exceed hardware maximum, and must be multiple
479	 * of E1000_DBA_ALIGN.
480	 */
481	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
482	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
483		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
484		    IGB_DEFAULT_TXD, igb_txd);
485		adapter->num_tx_desc = IGB_DEFAULT_TXD;
486	} else
487		adapter->num_tx_desc = igb_txd;
488	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
489	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
490		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
491		    IGB_DEFAULT_RXD, igb_rxd);
492		adapter->num_rx_desc = IGB_DEFAULT_RXD;
493	} else
494		adapter->num_rx_desc = igb_rxd;
495
496	adapter->hw.mac.autoneg = DO_AUTO_NEG;
497	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
498	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
499
500	/* Copper options */
501	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
502		adapter->hw.phy.mdix = AUTO_ALL_MODES;
503		adapter->hw.phy.disable_polarity_correction = FALSE;
504		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
505	}
506
507	/*
508	 * Set the frame limits assuming
509	 * standard ethernet sized frames.
510	 */
511	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
512	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
513
514	/*
515	** Allocate and Setup Queues
516	*/
517	if (igb_allocate_queues(adapter)) {
518		error = ENOMEM;
519		goto err_pci;
520	}
521
522	/*
523	** Start from a known state, this is
524	** important in reading the nvm and
525	** mac from that.
526	*/
527	e1000_reset_hw(&adapter->hw);
528
529	/* Make sure we have a good EEPROM before we read from it */
530	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
531		/*
532		** Some PCI-E parts fail the first check due to
533		** the link being in sleep state, call it again,
534		** if it fails a second time its a real issue.
535		*/
536		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
537			device_printf(dev,
538			    "The EEPROM Checksum Is Not Valid\n");
539			error = EIO;
540			goto err_late;
541		}
542	}
543
544	/*
545	** Copy the permanent MAC address out of the EEPROM
546	*/
547	if (e1000_read_mac_addr(&adapter->hw) < 0) {
548		device_printf(dev, "EEPROM read error while reading MAC"
549		    " address\n");
550		error = EIO;
551		goto err_late;
552	}
553	/* Check its sanity */
554	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
555		device_printf(dev, "Invalid MAC address\n");
556		error = EIO;
557		goto err_late;
558	}
559
560	/* Now Initialize the hardware */
561	if (igb_hardware_init(adapter)) {
562		device_printf(dev, "Unable to initialize the hardware\n");
563		error = EIO;
564		goto err_late;
565	}
566
567	/*
568	** Configure Interrupts
569	*/
570	if (adapter->msix > 1) /* MSIX */
571		error = igb_allocate_msix(adapter);
572	else /* MSI or Legacy */
573		error = igb_allocate_legacy(adapter);
574	if (error)
575		goto err_late;
576
577	/* Setup OS specific network interface */
578	igb_setup_interface(dev, adapter);
579
580	/* Initialize statistics */
581	igb_update_stats_counters(adapter);
582
583	adapter->hw.mac.get_link_status = 1;
584	igb_update_link_status(adapter);
585
586	/* Indicate SOL/IDER usage */
587	if (e1000_check_reset_block(&adapter->hw))
588		device_printf(dev,
589		    "PHY reset is blocked due to SOL/IDER session.\n");
590
591	/* Determine if we have to control management hardware */
592	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
593
594	/*
595	 * Setup Wake-on-Lan
596	 */
597	/* APME bit in EEPROM is mapped to WUC.APME */
598	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
599	if (eeprom_data)
600		adapter->wol = E1000_WUFC_MAG;
601
602	/* Register for VLAN events */
603	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
604	     igb_register_vlan, 0, EVENTHANDLER_PRI_FIRST);
605	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
606	     igb_unregister_vlan, 0, EVENTHANDLER_PRI_FIRST);
607
608	/* Tell the stack that the interface is not active */
609	adapter->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
610
611	INIT_DEBUGOUT("igb_attach: end");
612
613	return (0);
614
615err_late:
616	igb_free_transmit_structures(adapter);
617	igb_free_receive_structures(adapter);
618	igb_release_hw_control(adapter);
619err_pci:
620	igb_free_pci_resources(adapter);
621	IGB_CORE_LOCK_DESTROY(adapter);
622
623	return (error);
624}
625
626/*********************************************************************
627 *  Device removal routine
628 *
629 *  The detach entry point is called when the driver is being removed.
630 *  This routine stops the adapter and deallocates all the resources
631 *  that were allocated for driver operation.
632 *
633 *  return 0 on success, positive on failure
634 *********************************************************************/
635
636static int
637igb_detach(device_t dev)
638{
639	struct adapter	*adapter = device_get_softc(dev);
640	struct ifnet	*ifp = adapter->ifp;
641
642	INIT_DEBUGOUT("igb_detach: begin");
643
644	/* Make sure VLANS are not using driver */
645#if __FreeBSD_version >= 700000
646	if (adapter->ifp->if_vlantrunk != NULL) {
647#else
648	if (adapter->ifp->if_nvlans != 0) {
649#endif
650		device_printf(dev,"Vlan in use, detach first\n");
651		return (EBUSY);
652	}
653
654	IGB_CORE_LOCK(adapter);
655	adapter->in_detach = 1;
656	igb_stop(adapter);
657	IGB_CORE_UNLOCK(adapter);
658
659	e1000_phy_hw_reset(&adapter->hw);
660
661	/* Give control back to firmware */
662	igb_release_manageability(adapter);
663	igb_release_hw_control(adapter);
664
665	if (adapter->wol) {
666		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
667		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
668		igb_enable_wakeup(dev);
669	}
670
671	/* Unregister VLAN events */
672	if (adapter->vlan_attach != NULL)
673		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
674	if (adapter->vlan_detach != NULL)
675		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
676
677	ether_ifdetach(adapter->ifp);
678
679	callout_drain(&adapter->timer);
680
681	igb_free_pci_resources(adapter);
682	bus_generic_detach(dev);
683	if_free(ifp);
684
685	igb_free_transmit_structures(adapter);
686	igb_free_receive_structures(adapter);
687
688	IGB_CORE_LOCK_DESTROY(adapter);
689
690	return (0);
691}
692
693/*********************************************************************
694 *
695 *  Shutdown entry point
696 *
697 **********************************************************************/
698
699static int
700igb_shutdown(device_t dev)
701{
702	return igb_suspend(dev);
703}
704
705/*
706 * Suspend/resume device methods.
707 */
708static int
709igb_suspend(device_t dev)
710{
711	struct adapter *adapter = device_get_softc(dev);
712
713	IGB_CORE_LOCK(adapter);
714
715	igb_stop(adapter);
716
717        igb_release_manageability(adapter);
718	igb_release_hw_control(adapter);
719
720        if (adapter->wol) {
721                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
722                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
723                igb_enable_wakeup(dev);
724        }
725
726	IGB_CORE_UNLOCK(adapter);
727
728	return bus_generic_suspend(dev);
729}
730
731static int
732igb_resume(device_t dev)
733{
734	struct adapter *adapter = device_get_softc(dev);
735	struct ifnet *ifp = adapter->ifp;
736
737	IGB_CORE_LOCK(adapter);
738	igb_init_locked(adapter);
739	igb_init_manageability(adapter);
740
741	if ((ifp->if_flags & IFF_UP) &&
742	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
743		igb_start(ifp);
744
745	IGB_CORE_UNLOCK(adapter);
746
747	return bus_generic_resume(dev);
748}
749
750
751/*********************************************************************
752 *  Transmit entry point
753 *
754 *  igb_start is called by the stack to initiate a transmit.
755 *  The driver will remain in this routine as long as there are
756 *  packets to transmit and transmit resources are available.
757 *  In case resources are not available stack is notified and
758 *  the packet is requeued.
759 **********************************************************************/
760
761static void
762igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
763{
764	struct adapter	*adapter = ifp->if_softc;
765	struct mbuf	*m_head;
766
767	IGB_TX_LOCK_ASSERT(txr);
768
769	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
770	    IFF_DRV_RUNNING)
771		return;
772	if (!adapter->link_active)
773		return;
774
775	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
776
777		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
778		if (m_head == NULL)
779			break;
780		/*
781		 *  Encapsulation can modify our pointer, and or make it
782		 *  NULL on failure.  In that event, we can't requeue.
783		 */
784		if (igb_xmit(txr, &m_head)) {
785			if (m_head == NULL)
786				break;
787			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
788			IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
789			break;
790		}
791
792		/* Send a copy of the frame to the BPF listener */
793		ETHER_BPF_MTAP(ifp, m_head);
794
795		/* Set timeout in case hardware has problems transmitting. */
796		txr->watchdog_timer = IGB_TX_TIMEOUT;
797	}
798}
799
800static void
801igb_start(struct ifnet *ifp)
802{
803	struct adapter	*adapter = ifp->if_softc;
804	struct tx_ring	*txr;
805	u32		queue = 0;
806
807	/*
808	** This is really just here for testing
809	** TX multiqueue, ultimately what is
810	** needed is the flow support in the stack
811	** and appropriate logic here to deal with
812	** it. -jfv
813	*/
814	if (adapter->num_tx_queues > 1)
815		queue = (curcpu % adapter->num_tx_queues);
816
817	txr = &adapter->tx_rings[queue];
818	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
819		IGB_TX_LOCK(txr);
820		igb_start_locked(txr, ifp);
821		IGB_TX_UNLOCK(txr);
822	}
823}
824
825/*********************************************************************
826 *  Ioctl entry point
827 *
828 *  igb_ioctl is called when the user wants to configure the
829 *  interface.
830 *
831 *  return 0 on success, positive on failure
832 **********************************************************************/
833
834static int
835igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
836{
837	struct adapter	*adapter = ifp->if_softc;
838	struct ifreq *ifr = (struct ifreq *)data;
839#ifdef INET
840	struct ifaddr *ifa = (struct ifaddr *)data;
841#endif
842	int error = 0;
843
844	if (adapter->in_detach)
845		return (error);
846
847	switch (command) {
848	case SIOCSIFADDR:
849#ifdef INET
850		if (ifa->ifa_addr->sa_family == AF_INET) {
851			/*
852			 * XXX
853			 * Since resetting hardware takes a very long time
854			 * and results in link renegotiation we only
855			 * initialize the hardware only when it is absolutely
856			 * required.
857			 */
858			ifp->if_flags |= IFF_UP;
859			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
860				IGB_CORE_LOCK(adapter);
861				igb_init_locked(adapter);
862				IGB_CORE_UNLOCK(adapter);
863			}
864			arp_ifinit(ifp, ifa);
865		} else
866#endif
867			error = ether_ioctl(ifp, command, data);
868		break;
869	case SIOCSIFMTU:
870	    {
871		int max_frame_size;
872
873		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
874
875		IGB_CORE_LOCK(adapter);
876		max_frame_size = 9234;
877		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
878		    ETHER_CRC_LEN) {
879			IGB_CORE_UNLOCK(adapter);
880			error = EINVAL;
881			break;
882		}
883
884		ifp->if_mtu = ifr->ifr_mtu;
885		adapter->max_frame_size =
886		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
887		igb_init_locked(adapter);
888		IGB_CORE_UNLOCK(adapter);
889		break;
890	    }
891	case SIOCSIFFLAGS:
892		IOCTL_DEBUGOUT("ioctl rcv'd:\
893		    SIOCSIFFLAGS (Set Interface Flags)");
894		IGB_CORE_LOCK(adapter);
895		if (ifp->if_flags & IFF_UP) {
896			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
897				if ((ifp->if_flags ^ adapter->if_flags) &
898				    (IFF_PROMISC | IFF_ALLMULTI)) {
899					igb_disable_promisc(adapter);
900					igb_set_promisc(adapter);
901				}
902			} else
903				igb_init_locked(adapter);
904		} else
905			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
906				igb_stop(adapter);
907		adapter->if_flags = ifp->if_flags;
908		IGB_CORE_UNLOCK(adapter);
909		break;
910	case SIOCADDMULTI:
911	case SIOCDELMULTI:
912		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
913		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
914			IGB_CORE_LOCK(adapter);
915			igb_disable_intr(adapter);
916			igb_set_multi(adapter);
917				igb_enable_intr(adapter);
918			IGB_CORE_UNLOCK(adapter);
919		}
920		break;
921	case SIOCSIFMEDIA:
922		/* Check SOL/IDER usage */
923		IGB_CORE_LOCK(adapter);
924		if (e1000_check_reset_block(&adapter->hw)) {
925			IGB_CORE_UNLOCK(adapter);
926			device_printf(adapter->dev, "Media change is"
927			    " blocked due to SOL/IDER session.\n");
928			break;
929		}
930		IGB_CORE_UNLOCK(adapter);
931	case SIOCGIFMEDIA:
932		IOCTL_DEBUGOUT("ioctl rcv'd: \
933		    SIOCxIFMEDIA (Get/Set Interface Media)");
934		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
935		break;
936	case SIOCSIFCAP:
937	    {
938		int mask, reinit;
939
940		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
941		reinit = 0;
942		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
943		if (mask & IFCAP_HWCSUM) {
944			ifp->if_capenable ^= IFCAP_HWCSUM;
945			reinit = 1;
946		}
947		if (mask & IFCAP_TSO4) {
948			ifp->if_capenable ^= IFCAP_TSO4;
949			reinit = 1;
950		}
951		if (mask & IFCAP_VLAN_HWTAGGING) {
952			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
953			reinit = 1;
954		}
955		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
956			igb_init(adapter);
957#if __FreeBSD_version >= 700000
958		VLAN_CAPABILITIES(ifp);
959#endif
960		break;
961	    }
962
963#ifdef IGB_TIMESYNC
964	/*
965	** IOCTL support for Precision Time (IEEE 1588) Support
966	*/
967	case IGB_TIMESYNC_READTS:
968	    {
969		u32 rx_ctl, tx_ctl;
970		struct igb_tsync_read *tdata;
971
972		tdata = (struct igb_tsync_read *) ifr->ifr_data;
973
974		if (tdata->read_current_time) {
975			getnanotime(&tdata->system_time);
976			tdata->network_time = E1000_READ_REG(&adapter->hw,
977			    E1000_SYSTIML);
978			tdata->network_time |=
979			    (u64)E1000_READ_REG(&adapter->hw,
980			    E1000_SYSTIMH ) << 32;
981                }
982
983		rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
984		tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
985
986		if (rx_ctl & 0x1) {
987			u32 tmp;
988			unsigned char *tmp_cp;
989
990			tdata->rx_valid = 1;
991			tdata->rx_stamp = E1000_READ_REG(&adapter->hw, E1000_RXSTMPL);
992			tdata->rx_stamp |= (u64)E1000_READ_REG(&adapter->hw,
993			    E1000_RXSTMPH) << 32;
994
995			tmp = E1000_READ_REG(&adapter->hw, E1000_RXSATRL);
996			tmp_cp = (unsigned char *) &tmp;
997			tdata->srcid[0] = tmp_cp[0];
998			tdata->srcid[1] = tmp_cp[1];
999			tdata->srcid[2] = tmp_cp[2];
1000			tdata->srcid[3] = tmp_cp[3];
1001			tmp = E1000_READ_REG(&adapter->hw, E1000_RXSATRH);
1002			tmp_cp = (unsigned char *) &tmp;
1003			tdata->srcid[4] = tmp_cp[0];
1004			tdata->srcid[5] = tmp_cp[1];
1005			tdata->seqid = tmp >> 16;
1006			tdata->seqid = htons(tdata->seqid);
1007		} else
1008			tdata->rx_valid = 0;
1009
1010		if (tx_ctl & 0x1) {
1011			tdata->tx_valid = 1;
1012			tdata->tx_stamp = E1000_READ_REG(&adapter->hw, E1000_TXSTMPL);
1013			tdata->tx_stamp |= (u64) E1000_READ_REG(&adapter->hw,
1014			    E1000_TXSTMPH) << 32;
1015		} else
1016			tdata->tx_valid = 0;
1017
1018		return (0);
1019	    }
1020#endif	/* IGB_TIMESYNC */
1021
1022	default:
1023		error = ether_ioctl(ifp, command, data);
1024		break;
1025	}
1026
1027	return (error);
1028}
1029
1030/*********************************************************************
1031 *  Watchdog timer:
1032 *
1033 *  This routine is called from the local timer every second.
1034 *  As long as transmit descriptors are being cleaned the value
1035 *  is non-zero and we do nothing. Reaching 0 indicates a tx hang
1036 *  and we then reset the device.
1037 *
1038 **********************************************************************/
1039
1040static void
1041igb_watchdog(struct adapter *adapter)
1042{
1043	struct tx_ring	*txr = adapter->tx_rings;
1044	bool		tx_hang = FALSE;
1045
1046	IGB_CORE_LOCK_ASSERT(adapter);
1047
1048	/*
1049	** The timer is set to 5 every time start() queues a packet.
1050	** Then txeof keeps resetting it as long as it cleans at
1051	** least one descriptor.
1052	** Finally, anytime all descriptors are clean the timer is
1053	** set to 0.
1054	**
1055	** With TX Multiqueue we need to check every queue's timer,
1056	** if any time out we do the reset.
1057	*/
1058	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1059		IGB_TX_LOCK(txr);
1060		if (txr->watchdog_timer == 0 ||
1061		    (--txr->watchdog_timer)) {
1062			IGB_TX_UNLOCK(txr);
1063			continue;
1064		} else {
1065			tx_hang = TRUE;
1066			IGB_TX_UNLOCK(txr);
1067			break;
1068		}
1069	}
1070	if (tx_hang == FALSE)
1071		return;
1072
1073	/* If we are in this routine because of pause frames, then
1074	 * don't reset the hardware.
1075	 */
1076	if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
1077	    E1000_STATUS_TXOFF) {
1078		txr = adapter->tx_rings; /* reset pointer */
1079		for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1080			IGB_TX_LOCK(txr);
1081			txr->watchdog_timer = IGB_TX_TIMEOUT;
1082			IGB_TX_UNLOCK(txr);
1083		}
1084		return;
1085	}
1086
1087	if (e1000_check_for_link(&adapter->hw) == 0)
1088		device_printf(adapter->dev, "watchdog timeout -- resetting\n");
1089
1090	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1091		device_printf(adapter->dev, "Queue(%d) tdh = %d, tdt = %d\n",
1092		    i, E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
1093		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
1094		device_printf(adapter->dev, "Queue(%d) desc avail = %d,"
1095		    " Next Desc to Clean = %d\n", i, txr->tx_avail,
1096		    txr->next_to_clean);
1097	}
1098
1099	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1100	adapter->watchdog_events++;
1101
1102	igb_init_locked(adapter);
1103}
1104
1105/*********************************************************************
1106 *  Init entry point
1107 *
1108 *  This routine is used in two ways. It is used by the stack as
1109 *  init entry point in network interface structure. It is also used
1110 *  by the driver as a hw/sw initialization routine to get to a
1111 *  consistent state.
1112 *
1113 *  return 0 on success, positive on failure
1114 **********************************************************************/
1115
1116static void
1117igb_init_locked(struct adapter *adapter)
1118{
1119	struct rx_ring *rxr = adapter->rx_rings;
1120	struct tx_ring *txr = adapter->tx_rings;
1121	struct ifnet	*ifp = adapter->ifp;
1122	device_t	dev = adapter->dev;
1123	u32		pba = 0;
1124
1125	INIT_DEBUGOUT("igb_init: begin");
1126
1127	IGB_CORE_LOCK_ASSERT(adapter);
1128
1129	igb_stop(adapter);
1130
1131	/*
1132	 * Packet Buffer Allocation (PBA)
1133	 * Writing PBA sets the receive portion of the buffer
1134	 * the remainder is used for the transmit buffer.
1135	 */
1136	if (adapter->hw.mac.type == e1000_82575) {
1137		INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
1138		pba = E1000_PBA_32K; /* 32K for Rx, 16K for Tx */
1139		E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1140	}
1141
1142	/* Get the latest mac address, User can use a LAA */
1143        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1144              ETHER_ADDR_LEN);
1145
1146	/* Put the address into the Receive Address Array */
1147	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1148
1149	/* Initialize the hardware */
1150	if (igb_hardware_init(adapter)) {
1151		device_printf(dev, "Unable to initialize the hardware\n");
1152		return;
1153	}
1154	igb_update_link_status(adapter);
1155
1156	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1157
1158	if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) &&
1159	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) {
1160		u32 ctrl;
1161		ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1162		ctrl |= E1000_CTRL_VME;
1163		E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1164	}
1165
1166	/* Set hardware offload abilities */
1167	ifp->if_hwassist = 0;
1168	if (ifp->if_capenable & IFCAP_TXCSUM) {
1169		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1170#if __FreeBSD_version >= 800000
1171		if (adapter->hw.mac.type == e1000_82576)
1172			ifp->if_hwassist |= CSUM_SCTP;
1173#endif
1174	}
1175
1176	if (ifp->if_capenable & IFCAP_TSO4)
1177		ifp->if_hwassist |= CSUM_TSO;
1178
1179	/* Configure for OS presence */
1180	igb_init_manageability(adapter);
1181
1182	/* Prepare transmit descriptors and buffers */
1183	igb_setup_transmit_structures(adapter);
1184	igb_initialize_transmit_units(adapter);
1185
1186	/* Setup Multicast table */
1187	igb_set_multi(adapter);
1188
1189	/*
1190	** Figure out the desired mbuf pool
1191	** for doing jumbo/packetsplit
1192	*/
1193	if (ifp->if_mtu > ETHERMTU)
1194		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1195	else
1196		adapter->rx_mbuf_sz = MCLBYTES;
1197
1198	/* Prepare receive descriptors and buffers */
1199	if (igb_setup_receive_structures(adapter)) {
1200		device_printf(dev, "Could not setup receive structures\n");
1201		igb_stop(adapter);
1202		return;
1203	}
1204	igb_initialize_receive_units(adapter);
1205
1206	/* Don't lose promiscuous settings */
1207	igb_set_promisc(adapter);
1208
1209	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1210	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1211
1212	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1213	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1214
1215	if (adapter->msix > 1) /* Set up queue routing */
1216		igb_configure_queues(adapter);
1217
1218        /* Set default RX interrupt moderation */
1219	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
1220		E1000_WRITE_REG(&adapter->hw,
1221		    E1000_EITR(rxr->msix), igb_ave_latency);
1222		rxr->eitr_setting = igb_ave_latency;
1223	}
1224
1225	/* Set TX interrupt rate & reset TX watchdog */
1226	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
1227		E1000_WRITE_REG(&adapter->hw,
1228		    E1000_EITR(txr->msix), igb_ave_latency);
1229		txr->watchdog_timer = FALSE;
1230	}
1231
1232	/* this clears any pending interrupts */
1233	E1000_READ_REG(&adapter->hw, E1000_ICR);
1234	igb_enable_intr(adapter);
1235	E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1236
1237#ifdef IGB_TIMESYNC
1238	/* Initialize IEEE 1588 Time sync if available */
1239	if (adapter->hw.mac.type == e1000_82576)
1240		igb_tsync_init(adapter);
1241#endif
1242
1243	/* Don't reset the phy next time init gets called */
1244	adapter->hw.phy.reset_disable = TRUE;
1245}
1246
1247static void
1248igb_init(void *arg)
1249{
1250	struct adapter *adapter = arg;
1251
1252	IGB_CORE_LOCK(adapter);
1253	igb_init_locked(adapter);
1254	IGB_CORE_UNLOCK(adapter);
1255}
1256
1257
1258static void
1259igb_handle_rxtx(void *context, int pending)
1260{
1261	struct adapter	*adapter = context;
1262	struct tx_ring	*txr = adapter->tx_rings;
1263	struct rx_ring	*rxr = adapter->rx_rings;
1264	struct ifnet	*ifp;
1265
1266	ifp = adapter->ifp;
1267
1268	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1269		if (igb_rxeof(rxr, adapter->rx_process_limit))
1270			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1271		IGB_TX_LOCK(txr);
1272		igb_txeof(txr);
1273
1274		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1275			igb_start_locked(txr, ifp);
1276		IGB_TX_UNLOCK(txr);
1277	}
1278
1279	igb_enable_intr(adapter);
1280}
1281
1282static void
1283igb_handle_rx(void *context, int pending)
1284{
1285	struct rx_ring  *rxr = context;
1286	struct adapter  *adapter = rxr->adapter;
1287	struct ifnet    *ifp = adapter->ifp;
1288
1289	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1290		if (igb_rxeof(rxr, adapter->rx_process_limit) != 0)
1291			/* More to clean, schedule another task */
1292			taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1293
1294}
1295
1296static void
1297igb_handle_tx(void *context, int pending)
1298{
1299	struct tx_ring  *txr = context;
1300	struct adapter  *adapter = txr->adapter;
1301	struct ifnet    *ifp = adapter->ifp;
1302
1303	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1304		IGB_TX_LOCK(txr);
1305		igb_txeof(txr);
1306		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1307			igb_start_locked(txr, ifp);
1308		IGB_TX_UNLOCK(txr);
1309	}
1310}
1311
1312
1313/*********************************************************************
1314 *
1315 *  MSI/Legacy Deferred
1316 *  Interrupt Service routine
1317 *
1318 *********************************************************************/
1319#if __FreeBSD_version < 700000
1320static void
1321#else
1322static int
1323#endif
1324igb_irq_fast(void *arg)
1325{
1326	struct adapter	*adapter = arg;
1327	uint32_t	reg_icr;
1328
1329
1330	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1331
1332	/* Hot eject?  */
1333	if (reg_icr == 0xffffffff)
1334		return FILTER_STRAY;
1335
1336	/* Definitely not our interrupt.  */
1337	if (reg_icr == 0x0)
1338		return FILTER_STRAY;
1339
1340	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1341		return FILTER_STRAY;
1342
1343	/*
1344	 * Mask interrupts until the taskqueue is finished running.  This is
1345	 * cheap, just assume that it is needed.  This also works around the
1346	 * MSI message reordering errata on certain systems.
1347	 */
1348	igb_disable_intr(adapter);
1349	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1350
1351	/* Link status change */
1352	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1353		adapter->hw.mac.get_link_status = 1;
1354		igb_update_link_status(adapter);
1355	}
1356
1357	if (reg_icr & E1000_ICR_RXO)
1358		adapter->rx_overruns++;
1359	return FILTER_HANDLED;
1360}
1361
1362
1363#if __FreeBSD_version >= 602105
1364/*********************************************************************
1365 *
1366 *  MSIX TX Interrupt Service routine
1367 *
1368 **********************************************************************/
1369static void
1370igb_msix_tx(void *arg)
1371{
1372	struct tx_ring *txr = arg;
1373	struct adapter *adapter = txr->adapter;
1374	u32		loop = IGB_MAX_LOOP;
1375	bool		more;
1376
1377	++txr->tx_irq;
1378	IGB_TX_LOCK(txr);
1379
1380	do {
1381		more = igb_txeof(txr);
1382	} while (loop-- && more);
1383
1384	IGB_TX_UNLOCK(txr);
1385
1386	/* Schedule a clean task */
1387	taskqueue_enqueue(adapter->tq, &txr->tx_task);
1388
1389	/* Reenable this interrupt */
1390	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, txr->eims);
1391	return;
1392}
1393
1394/*********************************************************************
1395 *
1396 *  MSIX RX Interrupt Service routine
1397 *
1398 **********************************************************************/
1399
1400static void
1401igb_msix_rx(void *arg)
1402{
1403	struct rx_ring *rxr = arg;
1404	struct adapter *adapter = rxr->adapter;
1405	u32		loop = IGB_MAX_LOOP;
1406	bool		more;
1407
1408	++rxr->rx_irq;
1409	do {
1410		more = igb_rxeof(rxr, adapter->rx_process_limit);
1411	} while (loop-- && more);
1412
1413	/* Update interrupt rate */
1414	if (igb_enable_aim == TRUE)
1415		igb_update_aim(rxr);
1416
1417	/* Schedule another clean */
1418	taskqueue_enqueue(adapter->tq, &rxr->rx_task);
1419
1420	/* Reenable this interrupt */
1421	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, rxr->eims);
1422	return;
1423}
1424
1425
1426/*********************************************************************
1427 *
1428 *  MSIX Link Interrupt Service routine
1429 *
1430 **********************************************************************/
1431
1432static void
1433igb_msix_link(void *arg)
1434{
1435	struct adapter	*adapter = arg;
1436	u32       	icr;
1437
1438	++adapter->link_irq;
1439	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1440	if (!(icr & E1000_ICR_LSC))
1441		goto spurious;
1442	adapter->hw.mac.get_link_status = 1;
1443	igb_update_link_status(adapter);
1444
1445spurious:
1446	/* Rearm */
1447	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1448	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1449	return;
1450}
1451#endif
1452
1453
1454/*
1455** Routine to adjust the RX EITR value based on traffic,
1456** its a simple three state model, but seems to help.
1457**
1458** Note that the three EITR values are tuneable using
1459** sysctl in real time. The feature can be effectively
1460** nullified by setting them equal.
1461*/
1462#define BULK_THRESHOLD	10000
1463#define AVE_THRESHOLD	1600
1464
1465static void
1466igb_update_aim(struct rx_ring *rxr)
1467{
1468	struct adapter	*adapter = rxr->adapter;
1469	u32		olditr, newitr;
1470
1471	/* Update interrupt moderation based on traffic */
1472	olditr = rxr->eitr_setting;
1473	newitr = olditr;
1474
1475	/* Idle, don't change setting */
1476	if (rxr->bytes == 0)
1477		return;
1478
1479	if (olditr == igb_low_latency) {
1480		if (rxr->bytes > AVE_THRESHOLD)
1481			newitr = igb_ave_latency;
1482	} else if (olditr == igb_ave_latency) {
1483		if (rxr->bytes < AVE_THRESHOLD)
1484			newitr = igb_low_latency;
1485		else if (rxr->bytes > BULK_THRESHOLD)
1486			newitr = igb_bulk_latency;
1487	} else if (olditr == igb_bulk_latency) {
1488		if (rxr->bytes < BULK_THRESHOLD)
1489			newitr = igb_ave_latency;
1490	}
1491
1492	if (olditr != newitr) {
1493		/* Change interrupt rate */
1494		rxr->eitr_setting = newitr;
1495		E1000_WRITE_REG(&adapter->hw, E1000_EITR(rxr->me),
1496		    newitr | (newitr << 16));
1497	}
1498
1499	rxr->bytes = 0;
1500        return;
1501}
1502
1503
1504/*********************************************************************
1505 *
1506 *  Media Ioctl callback
1507 *
1508 *  This routine is called whenever the user queries the status of
1509 *  the interface using ifconfig.
1510 *
1511 **********************************************************************/
1512static void
1513igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1514{
1515	struct adapter *adapter = ifp->if_softc;
1516	u_char fiber_type = IFM_1000_SX;
1517
1518	INIT_DEBUGOUT("igb_media_status: begin");
1519
1520	IGB_CORE_LOCK(adapter);
1521	igb_update_link_status(adapter);
1522
1523	ifmr->ifm_status = IFM_AVALID;
1524	ifmr->ifm_active = IFM_ETHER;
1525
1526	if (!adapter->link_active) {
1527		IGB_CORE_UNLOCK(adapter);
1528		return;
1529	}
1530
1531	ifmr->ifm_status |= IFM_ACTIVE;
1532
1533	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1534	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1535		ifmr->ifm_active |= fiber_type | IFM_FDX;
1536	else {
1537		switch (adapter->link_speed) {
1538		case 10:
1539			ifmr->ifm_active |= IFM_10_T;
1540			break;
1541		case 100:
1542			ifmr->ifm_active |= IFM_100_TX;
1543			break;
1544		case 1000:
1545			ifmr->ifm_active |= IFM_1000_T;
1546			break;
1547		}
1548		if (adapter->link_duplex == FULL_DUPLEX)
1549			ifmr->ifm_active |= IFM_FDX;
1550		else
1551			ifmr->ifm_active |= IFM_HDX;
1552	}
1553	IGB_CORE_UNLOCK(adapter);
1554}
1555
1556/*********************************************************************
1557 *
1558 *  Media Ioctl callback
1559 *
1560 *  This routine is called when the user changes speed/duplex using
1561 *  media/mediopt option with ifconfig.
1562 *
1563 **********************************************************************/
1564static int
1565igb_media_change(struct ifnet *ifp)
1566{
1567	struct adapter *adapter = ifp->if_softc;
1568	struct ifmedia  *ifm = &adapter->media;
1569
1570	INIT_DEBUGOUT("igb_media_change: begin");
1571
1572	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1573		return (EINVAL);
1574
1575	IGB_CORE_LOCK(adapter);
1576	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1577	case IFM_AUTO:
1578		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1579		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1580		break;
1581	case IFM_1000_LX:
1582	case IFM_1000_SX:
1583	case IFM_1000_T:
1584		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1585		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1586		break;
1587	case IFM_100_TX:
1588		adapter->hw.mac.autoneg = FALSE;
1589		adapter->hw.phy.autoneg_advertised = 0;
1590		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1591			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1592		else
1593			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1594		break;
1595	case IFM_10_T:
1596		adapter->hw.mac.autoneg = FALSE;
1597		adapter->hw.phy.autoneg_advertised = 0;
1598		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1599			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1600		else
1601			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1602		break;
1603	default:
1604		device_printf(adapter->dev, "Unsupported media type\n");
1605	}
1606
1607	/* As the speed/duplex settings my have changed we need to
1608	 * reset the PHY.
1609	 */
1610	adapter->hw.phy.reset_disable = FALSE;
1611
1612	igb_init_locked(adapter);
1613	IGB_CORE_UNLOCK(adapter);
1614
1615	return (0);
1616}
1617
1618
1619/*********************************************************************
1620 *
1621 *  This routine maps the mbufs to Advanced TX descriptors.
1622 *  used by the 82575 adapter.
1623 *
1624 **********************************************************************/
1625
1626static int
1627igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1628{
1629	struct adapter		*adapter = txr->adapter;
1630	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1631	bus_dmamap_t		map;
1632	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1633	union e1000_adv_tx_desc	*txd = NULL;
1634	struct mbuf		*m_head;
1635	u32			olinfo_status = 0, cmd_type_len = 0;
1636	int			nsegs, i, j, error, first, last = 0;
1637	u32			hdrlen = 0, offload = 0;
1638
1639	m_head = *m_headp;
1640
1641
1642	/* Set basic descriptor constants */
1643	cmd_type_len |= E1000_ADVTXD_DTYP_DATA;
1644	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1645	if (m_head->m_flags & M_VLANTAG)
1646		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1647
1648        /*
1649         * Force a cleanup if number of TX descriptors
1650         * available hits the threshold
1651         */
1652	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) {
1653		igb_txeof(txr);
1654		/* Now do we at least have a minimal? */
1655		if (txr->tx_avail <= IGB_TX_OP_THRESHOLD) {
1656			txr->no_desc_avail++;
1657			return (ENOBUFS);
1658		}
1659	}
1660
1661	/*
1662         * Map the packet for DMA.
1663	 *
1664	 * Capture the first descriptor index,
1665	 * this descriptor will have the index
1666	 * of the EOP which is the only one that
1667	 * now gets a DONE bit writeback.
1668	 */
1669	first = txr->next_avail_desc;
1670	tx_buffer = &txr->tx_buffers[first];
1671	tx_buffer_mapped = tx_buffer;
1672	map = tx_buffer->map;
1673
1674	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1675	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1676
1677	if (error == EFBIG) {
1678		struct mbuf *m;
1679
1680		m = m_defrag(*m_headp, M_DONTWAIT);
1681		if (m == NULL) {
1682			adapter->mbuf_defrag_failed++;
1683			m_freem(*m_headp);
1684			*m_headp = NULL;
1685			return (ENOBUFS);
1686		}
1687		*m_headp = m;
1688
1689		/* Try it again */
1690		error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1691		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1692
1693		if (error == ENOMEM) {
1694			adapter->no_tx_dma_setup++;
1695			return (error);
1696		} else if (error != 0) {
1697			adapter->no_tx_dma_setup++;
1698			m_freem(*m_headp);
1699			*m_headp = NULL;
1700			return (error);
1701		}
1702	} else if (error == ENOMEM) {
1703		adapter->no_tx_dma_setup++;
1704		return (error);
1705	} else if (error != 0) {
1706		adapter->no_tx_dma_setup++;
1707		m_freem(*m_headp);
1708		*m_headp = NULL;
1709		return (error);
1710	}
1711
1712	/* Check again to be sure we have enough descriptors */
1713        if (nsegs > (txr->tx_avail - 2)) {
1714                txr->no_desc_avail++;
1715		bus_dmamap_unload(txr->txtag, map);
1716		return (ENOBUFS);
1717        }
1718	m_head = *m_headp;
1719
1720        /*
1721         * Set up the context descriptor:
1722         * used when any hardware offload is done.
1723	 * This includes CSUM, VLAN, and TSO. It
1724	 * will use the first descriptor.
1725         */
1726        if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1727		if (igb_tso_setup(txr, m_head, &hdrlen)) {
1728			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1729			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1730			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1731		} else
1732			return (ENXIO);
1733	} else
1734		/* Do all other context descriptor setup */
1735		offload = igb_tx_ctx_setup(txr, m_head);
1736	if (offload == TRUE)
1737		olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1738#ifdef IGB_TIMESYNC
1739	if (offload == IGB_TIMESTAMP)
1740		cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
1741#endif
1742	/* Calculate payload length */
1743	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1744	    << E1000_ADVTXD_PAYLEN_SHIFT);
1745
1746	/* Set up our transmit descriptors */
1747	i = txr->next_avail_desc;
1748	for (j = 0; j < nsegs; j++) {
1749		bus_size_t seg_len;
1750		bus_addr_t seg_addr;
1751
1752		tx_buffer = &txr->tx_buffers[i];
1753		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1754		seg_addr = segs[j].ds_addr;
1755		seg_len  = segs[j].ds_len;
1756
1757		txd->read.buffer_addr = htole64(seg_addr);
1758		txd->read.cmd_type_len = htole32(
1759		    adapter->txd_cmd | cmd_type_len | seg_len);
1760		txd->read.olinfo_status = htole32(olinfo_status);
1761		last = i;
1762		if (++i == adapter->num_tx_desc)
1763			i = 0;
1764		tx_buffer->m_head = NULL;
1765		tx_buffer->next_eop = -1;
1766	}
1767
1768	txr->next_avail_desc = i;
1769	txr->tx_avail -= nsegs;
1770
1771        tx_buffer->m_head = m_head;
1772	tx_buffer_mapped->map = tx_buffer->map;
1773	tx_buffer->map = map;
1774        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1775
1776        /*
1777         * Last Descriptor of Packet
1778	 * needs End Of Packet (EOP)
1779	 * and Report Status (RS)
1780         */
1781        txd->read.cmd_type_len |=
1782	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1783	/*
1784	 * Keep track in the first buffer which
1785	 * descriptor will be written back
1786	 */
1787	tx_buffer = &txr->tx_buffers[first];
1788	tx_buffer->next_eop = last;
1789
1790	/*
1791	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1792	 * that this frame is available to transmit.
1793	 */
1794	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1795	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1796	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1797	++txr->tx_packets;
1798
1799	return (0);
1800
1801}
1802
1803static void
1804igb_set_promisc(struct adapter *adapter)
1805{
1806	struct ifnet	*ifp = adapter->ifp;
1807	uint32_t	reg_rctl;
1808
1809	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1810
1811	if (ifp->if_flags & IFF_PROMISC) {
1812		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1813		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1814	} else if (ifp->if_flags & IFF_ALLMULTI) {
1815		reg_rctl |= E1000_RCTL_MPE;
1816		reg_rctl &= ~E1000_RCTL_UPE;
1817		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1818	}
1819}
1820
1821static void
1822igb_disable_promisc(struct adapter *adapter)
1823{
1824	uint32_t	reg_rctl;
1825
1826	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1827
1828	reg_rctl &=  (~E1000_RCTL_UPE);
1829	reg_rctl &=  (~E1000_RCTL_MPE);
1830	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1831}
1832
1833
1834/*********************************************************************
1835 *  Multicast Update
1836 *
1837 *  This routine is called whenever multicast address list is updated.
1838 *
1839 **********************************************************************/
1840
1841static void
1842igb_set_multi(struct adapter *adapter)
1843{
1844	struct ifnet	*ifp = adapter->ifp;
1845	struct ifmultiaddr *ifma;
1846	u32 reg_rctl = 0;
1847	u8  mta[MAX_NUM_MULTICAST_ADDRESSES * ETH_ADDR_LEN];
1848
1849	int mcnt = 0;
1850
1851	IOCTL_DEBUGOUT("igb_set_multi: begin");
1852
1853	IF_ADDR_LOCK(ifp);
1854	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1855		if (ifma->ifma_addr->sa_family != AF_LINK)
1856			continue;
1857
1858		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1859			break;
1860
1861		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
1862		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
1863		mcnt++;
1864	}
1865	IF_ADDR_UNLOCK(ifp);
1866
1867	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
1868		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1869		reg_rctl |= E1000_RCTL_MPE;
1870		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
1871	} else
1872		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
1873}
1874
1875
1876/*********************************************************************
1877 *  Timer routine
1878 *
1879 *  This routine checks for link status and updates statistics.
1880 *
1881 **********************************************************************/
1882
1883static void
1884igb_local_timer(void *arg)
1885{
1886	struct adapter	*adapter = arg;
1887	struct ifnet	*ifp = adapter->ifp;
1888
1889	IGB_CORE_LOCK_ASSERT(adapter);
1890
1891	igb_update_link_status(adapter);
1892	igb_update_stats_counters(adapter);
1893
1894	if (igb_display_debug_stats && ifp->if_drv_flags & IFF_DRV_RUNNING)
1895		igb_print_hw_stats(adapter);
1896
1897	/*
1898	 * Each second we check the watchdog to
1899	 * protect against hardware hangs.
1900	 */
1901	igb_watchdog(adapter);
1902
1903	/* Trigger an RX interrupt on all queues */
1904	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->rx_mask);
1905
1906	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1907
1908}
1909
1910static void
1911igb_update_link_status(struct adapter *adapter)
1912{
1913	struct e1000_hw *hw = &adapter->hw;
1914	struct ifnet *ifp = adapter->ifp;
1915	device_t dev = adapter->dev;
1916	struct tx_ring *txr = adapter->tx_rings;
1917	u32 link_check = 0;
1918
1919	/* Get the cached link value or read for real */
1920        switch (hw->phy.media_type) {
1921        case e1000_media_type_copper:
1922                if (hw->mac.get_link_status) {
1923			/* Do the work to read phy */
1924                        e1000_check_for_link(hw);
1925                        link_check = !hw->mac.get_link_status;
1926                } else
1927                        link_check = TRUE;
1928                break;
1929        case e1000_media_type_fiber:
1930                e1000_check_for_link(hw);
1931                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
1932                                 E1000_STATUS_LU);
1933                break;
1934        case e1000_media_type_internal_serdes:
1935                e1000_check_for_link(hw);
1936                link_check = adapter->hw.mac.serdes_has_link;
1937                break;
1938        default:
1939        case e1000_media_type_unknown:
1940                break;
1941        }
1942
1943	/* Now we check if a transition has happened */
1944	if (link_check && (adapter->link_active == 0)) {
1945		e1000_get_speed_and_duplex(&adapter->hw,
1946		    &adapter->link_speed, &adapter->link_duplex);
1947		if (bootverbose)
1948			device_printf(dev, "Link is up %d Mbps %s\n",
1949			    adapter->link_speed,
1950			    ((adapter->link_duplex == FULL_DUPLEX) ?
1951			    "Full Duplex" : "Half Duplex"));
1952		adapter->link_active = 1;
1953		ifp->if_baudrate = adapter->link_speed * 1000000;
1954		if_link_state_change(ifp, LINK_STATE_UP);
1955	} else if (!link_check && (adapter->link_active == 1)) {
1956		ifp->if_baudrate = adapter->link_speed = 0;
1957		adapter->link_duplex = 0;
1958		if (bootverbose)
1959			device_printf(dev, "Link is Down\n");
1960		adapter->link_active = 0;
1961		if_link_state_change(ifp, LINK_STATE_DOWN);
1962		/* Turn off watchdogs */
1963		for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
1964			txr->watchdog_timer = FALSE;
1965	}
1966}
1967
1968/*********************************************************************
1969 *
1970 *  This routine disables all traffic on the adapter by issuing a
1971 *  global reset on the MAC and deallocates TX/RX buffers.
1972 *
1973 **********************************************************************/
1974
1975static void
1976igb_stop(void *arg)
1977{
1978	struct adapter	*adapter = arg;
1979	struct ifnet	*ifp = adapter->ifp;
1980
1981	IGB_CORE_LOCK_ASSERT(adapter);
1982
1983	INIT_DEBUGOUT("igb_stop: begin");
1984
1985	igb_disable_intr(adapter);
1986
1987	callout_stop(&adapter->timer);
1988
1989	/* Tell the stack that the interface is no longer active */
1990	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1991
1992#ifdef IGB_TIMESYNC
1993	/* Disable IEEE 1588 Time sync */
1994	if (adapter->hw.mac.type == e1000_82576)
1995		igb_tsync_disable(adapter);
1996#endif
1997
1998	e1000_reset_hw(&adapter->hw);
1999	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2000}
2001
2002
2003/*********************************************************************
2004 *
2005 *  Determine hardware revision.
2006 *
2007 **********************************************************************/
2008static void
2009igb_identify_hardware(struct adapter *adapter)
2010{
2011	device_t dev = adapter->dev;
2012
2013	/* Make sure our PCI config space has the necessary stuff set */
2014	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2015	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2016	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2017		device_printf(dev, "Memory Access and/or Bus Master bits "
2018		    "were not set!\n");
2019		adapter->hw.bus.pci_cmd_word |=
2020		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2021		pci_write_config(dev, PCIR_COMMAND,
2022		    adapter->hw.bus.pci_cmd_word, 2);
2023	}
2024
2025	/* Save off the information about this board */
2026	adapter->hw.vendor_id = pci_get_vendor(dev);
2027	adapter->hw.device_id = pci_get_device(dev);
2028	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2029	adapter->hw.subsystem_vendor_id =
2030	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2031	adapter->hw.subsystem_device_id =
2032	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2033
2034	/* Do Shared Code Init and Setup */
2035	if (e1000_set_mac_type(&adapter->hw)) {
2036		device_printf(dev, "Setup init failure\n");
2037		return;
2038	}
2039}
2040
2041static int
2042igb_allocate_pci_resources(struct adapter *adapter)
2043{
2044	device_t	dev = adapter->dev;
2045	int		rid, error = 0;
2046
2047	rid = PCIR_BAR(0);
2048	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2049	    &rid, RF_ACTIVE);
2050	if (adapter->pci_mem == NULL) {
2051		device_printf(dev, "Unable to allocate bus resource: memory\n");
2052		return (ENXIO);
2053	}
2054	adapter->osdep.mem_bus_space_tag =
2055	    rman_get_bustag(adapter->pci_mem);
2056	adapter->osdep.mem_bus_space_handle =
2057	    rman_get_bushandle(adapter->pci_mem);
2058	adapter->hw.hw_addr = (uint8_t *)&adapter->osdep.mem_bus_space_handle;
2059
2060	/*
2061	** Init the resource arrays
2062	*/
2063	for (int i = 0; i < IGB_MSIX_VEC; i++) {
2064		adapter->rid[i] = i + 1; /* MSI/X RID starts at 1 */
2065		adapter->tag[i] = NULL;
2066		adapter->res[i] = NULL;
2067	}
2068
2069	adapter->num_tx_queues = 1; /* Defaults for Legacy or MSI */
2070	adapter->num_rx_queues = 1;
2071
2072#if __FreeBSD_version >= 602105
2073	/* This will setup either MSI/X or MSI */
2074	adapter->msix = igb_setup_msix(adapter);
2075#endif
2076	adapter->hw.back = &adapter->osdep;
2077
2078	return (error);
2079}
2080
2081/*********************************************************************
2082 *
2083 *  Setup the Legacy or MSI Interrupt handler
2084 *
2085 **********************************************************************/
2086static int
2087igb_allocate_legacy(struct adapter *adapter)
2088{
2089	device_t dev = adapter->dev;
2090	int error;
2091
2092	/* Turn off all interrupts */
2093	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2094
2095	/* Legacy RID at 0 */
2096	if (adapter->msix == 0)
2097		adapter->rid[0] = 0;
2098
2099	/* We allocate a single interrupt resource */
2100	adapter->res[0] = bus_alloc_resource_any(dev,
2101	    SYS_RES_IRQ, &adapter->rid[0], RF_SHAREABLE | RF_ACTIVE);
2102	if (adapter->res[0] == NULL) {
2103		device_printf(dev, "Unable to allocate bus resource: "
2104		    "interrupt\n");
2105		return (ENXIO);
2106	}
2107
2108	/*
2109	 * Try allocating a fast interrupt and the associated deferred
2110	 * processing contexts.
2111	 */
2112	TASK_INIT(&adapter->rxtx_task, 0, igb_handle_rxtx, adapter);
2113	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2114	    taskqueue_thread_enqueue, &adapter->tq);
2115	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2116	    device_get_nameunit(adapter->dev));
2117	if ((error = bus_setup_intr(dev, adapter->res[0],
2118	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast,
2119#if __FreeBSD_version >= 700000
2120		    NULL,
2121#endif
2122	    adapter, &adapter->tag[0])) != 0) {
2123		device_printf(dev, "Failed to register fast interrupt "
2124			    "handler: %d\n", error);
2125		taskqueue_free(adapter->tq);
2126		adapter->tq = NULL;
2127		return (error);
2128	}
2129
2130	return (0);
2131}
2132
2133
2134#if __FreeBSD_version >= 602105
2135/*********************************************************************
2136 *
2137 *  Setup the MSIX Interrupt handlers:
2138 *
2139 **********************************************************************/
2140static int
2141igb_allocate_msix(struct adapter *adapter)
2142{
2143	device_t dev = adapter->dev;
2144	struct tx_ring *txr = adapter->tx_rings;
2145	struct rx_ring *rxr = adapter->rx_rings;
2146	int error, vector = 0;
2147
2148	/*
2149	 * Setup the interrupt handlers
2150	 */
2151
2152	/* TX Setup */
2153	for (int i = 0; i < adapter->num_tx_queues; i++, vector++, txr++) {
2154		adapter->res[vector] = bus_alloc_resource_any(dev,
2155		    SYS_RES_IRQ, &adapter->rid[vector],
2156		    RF_SHAREABLE | RF_ACTIVE);
2157		if (adapter->res[vector] == NULL) {
2158			device_printf(dev,
2159			    "Unable to allocate bus resource: "
2160			    "MSIX TX Interrupt\n");
2161			return (ENXIO);
2162		}
2163		error = bus_setup_intr(dev, adapter->res[vector],
2164	    	    INTR_TYPE_NET | INTR_MPSAFE,
2165#if __FreeBSD_version >= 700000
2166		    NULL,
2167#endif
2168		    igb_msix_tx, txr, &adapter->tag[vector]);
2169		if (error) {
2170			adapter->res[vector] = NULL;
2171			device_printf(dev, "Failed to register TX handler");
2172			return (error);
2173		}
2174		/* Make tasklet for deferred handling - one per queue */
2175		TASK_INIT(&txr->tx_task, 0, igb_handle_tx, txr);
2176		if (adapter->hw.mac.type == e1000_82575) {
2177			txr->eims = E1000_EICR_TX_QUEUE0 << i;
2178			/* MSIXBM registers start at 0 */
2179			txr->msix = adapter->rid[vector] - 1;
2180		} else {
2181			txr->eims = 1 << vector;
2182			txr->msix = vector;
2183		}
2184	}
2185
2186	/* RX Setup */
2187	for (int i = 0; i < adapter->num_rx_queues; i++, vector++, rxr++) {
2188		adapter->res[vector] = bus_alloc_resource_any(dev,
2189		    SYS_RES_IRQ, &adapter->rid[vector],
2190		    RF_SHAREABLE | RF_ACTIVE);
2191		if (adapter->res[vector] == NULL) {
2192			device_printf(dev,
2193			    "Unable to allocate bus resource: "
2194			    "MSIX RX Interrupt\n");
2195			return (ENXIO);
2196		}
2197		error = bus_setup_intr(dev, adapter->res[vector],
2198	    	    INTR_TYPE_NET | INTR_MPSAFE,
2199#if __FreeBSD_version >= 700000
2200		    NULL,
2201#endif
2202		    igb_msix_rx, rxr, &adapter->tag[vector]);
2203		if (error) {
2204			adapter->res[vector] = NULL;
2205			device_printf(dev, "Failed to register RX handler");
2206			return (error);
2207		}
2208		/* Make tasklet for deferred handling - one per queue */
2209		TASK_INIT(&rxr->rx_task, 0, igb_handle_rx, rxr);
2210		if (adapter->hw.mac.type == e1000_82575) {
2211			rxr->eims = E1000_EICR_RX_QUEUE0 << i;
2212			rxr->msix = adapter->rid[vector] - 1;
2213		} else {
2214			rxr->eims = 1 << vector;
2215			rxr->msix = vector;
2216		}
2217		/* Get a mask for local timer */
2218		adapter->rx_mask |= rxr->eims;
2219	}
2220
2221	/* And Link */
2222	adapter->res[vector] = bus_alloc_resource_any(dev,
2223	    SYS_RES_IRQ, &adapter->rid[vector],
2224		    RF_SHAREABLE | RF_ACTIVE);
2225	if (adapter->res[vector] == NULL) {
2226		device_printf(dev,
2227		    "Unable to allocate bus resource: "
2228		    "MSIX Link Interrupt\n");
2229		return (ENXIO);
2230	}
2231	if ((error = bus_setup_intr(dev, adapter->res[vector],
2232	    INTR_TYPE_NET | INTR_MPSAFE,
2233#if __FreeBSD_version >= 700000
2234		    NULL,
2235#endif
2236	    igb_msix_link, adapter, &adapter->tag[vector])) != 0) {
2237		device_printf(dev, "Failed to register Link handler");
2238		return (error);
2239	}
2240	if (adapter->hw.mac.type == e1000_82575)
2241		adapter->linkvec = adapter->rid[vector] - 1;
2242	else
2243		adapter->linkvec = vector;
2244
2245	adapter->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2246	    taskqueue_thread_enqueue, &adapter->tq);
2247	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2248	    device_get_nameunit(adapter->dev));
2249
2250	return (0);
2251}
2252#else /* FreeBSD 6.1/2 */
2253static int
2254igb_allocate_msix(struct adapter *adapter)
2255{
2256	return (1);
2257}
2258#endif
2259
2260
2261static void
2262igb_configure_queues(struct adapter *adapter)
2263{
2264	struct	e1000_hw *hw = &adapter->hw;
2265	struct	tx_ring	*txr;
2266	struct	rx_ring	*rxr;
2267
2268	/* Turn on MSIX */
2269	/*
2270	** 82576 uses IVARs to route MSI/X
2271	** interrupts, its not very intuitive,
2272	** study the code carefully :)
2273	*/
2274	if (adapter->hw.mac.type == e1000_82576) {
2275		u32	ivar = 0;
2276		/* First turn on the capability */
2277		E1000_WRITE_REG(hw, E1000_GPIE,
2278		    E1000_GPIE_MSIX_MODE |
2279		    E1000_GPIE_EIAME |
2280		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2281		/* RX */
2282		for (int i = 0; i < adapter->num_rx_queues; i++) {
2283			u32 index = i & 0x7; /* Each IVAR has two entries */
2284			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2285			rxr = &adapter->rx_rings[i];
2286			if (i < 8) {
2287				ivar &= 0xFFFFFF00;
2288				ivar |= rxr->msix | E1000_IVAR_VALID;
2289			} else {
2290				ivar &= 0xFF00FFFF;
2291				ivar |= (rxr->msix | E1000_IVAR_VALID) << 16;
2292			}
2293			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2294			adapter->eims_mask |= rxr->eims;
2295		}
2296		/* TX */
2297		for (int i = 0; i < adapter->num_tx_queues; i++) {
2298			u32 index = i & 0x7; /* Each IVAR has two entries */
2299			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2300			txr = &adapter->tx_rings[i];
2301			if (i < 8) {
2302				ivar &= 0xFFFF00FF;
2303				ivar |= (txr->msix | E1000_IVAR_VALID) << 8;
2304			} else {
2305				ivar &= 0x00FFFFFF;
2306				ivar |= (txr->msix | E1000_IVAR_VALID) << 24;
2307			}
2308			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2309			adapter->eims_mask |= txr->eims;
2310		}
2311
2312		/* And for the link interrupt */
2313		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2314		adapter->link_mask = 1 << adapter->linkvec;
2315		adapter->eims_mask |= adapter->link_mask;
2316		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2317	} else
2318	{ /* 82575 */
2319		int tmp;
2320
2321                /* enable MSI-X PBA support*/
2322		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2323                tmp |= E1000_CTRL_EXT_PBA_CLR;
2324                /* Auto-Mask interrupts upon ICR read. */
2325                tmp |= E1000_CTRL_EXT_EIAME;
2326                tmp |= E1000_CTRL_EXT_IRCA;
2327                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2328
2329		/* TX */
2330		for (int i = 0; i < adapter->num_tx_queues; i++) {
2331			txr = &adapter->tx_rings[i];
2332			E1000_WRITE_REG(hw, E1000_MSIXBM(txr->msix),
2333			    txr->eims);
2334			adapter->eims_mask |= txr->eims;
2335		}
2336
2337		/* RX */
2338		for (int i = 0; i < adapter->num_rx_queues; i++) {
2339			rxr = &adapter->rx_rings[i];
2340			E1000_WRITE_REG(hw, E1000_MSIXBM(rxr->msix),
2341			    rxr->eims);
2342			adapter->eims_mask |= rxr->eims;
2343		}
2344
2345		/* Link */
2346		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2347		    E1000_EIMS_OTHER);
2348		adapter->link_mask |= E1000_EIMS_OTHER;
2349		adapter->eims_mask |= adapter->link_mask;
2350	}
2351	return;
2352}
2353
2354
2355static void
2356igb_free_pci_resources(struct adapter *adapter)
2357{
2358	device_t dev = adapter->dev;
2359
2360	/* Make sure the for loop below runs once */
2361	if (adapter->msix == 0)
2362		adapter->msix = 1;
2363
2364	/*
2365	 * First release all the interrupt resources:
2366	 *      notice that since these are just kept
2367	 *      in an array we can do the same logic
2368	 *      whether its MSIX or just legacy.
2369	 */
2370	for (int i = 0; i < adapter->msix; i++) {
2371		if (adapter->tag[i] != NULL) {
2372			bus_teardown_intr(dev, adapter->res[i],
2373			    adapter->tag[i]);
2374			adapter->tag[i] = NULL;
2375		}
2376		if (adapter->res[i] != NULL) {
2377			bus_release_resource(dev, SYS_RES_IRQ,
2378			    adapter->rid[i], adapter->res[i]);
2379		}
2380	}
2381
2382#if __FreeBSD_version >= 602105
2383	if (adapter->msix)
2384		pci_release_msi(dev);
2385
2386	if (adapter->msix_mem != NULL)
2387		bus_release_resource(dev, SYS_RES_MEMORY,
2388		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2389#endif
2390
2391	if (adapter->pci_mem != NULL)
2392		bus_release_resource(dev, SYS_RES_MEMORY,
2393		    PCIR_BAR(0), adapter->pci_mem);
2394
2395}
2396
2397#if __FreeBSD_version >= 602105
2398/*
2399 * Setup Either MSI/X or MSI
2400 */
2401static int
2402igb_setup_msix(struct adapter *adapter)
2403{
2404	device_t dev = adapter->dev;
2405	int rid, want, queues, msgs;
2406
2407	/* First try MSI/X */
2408	rid = PCIR_BAR(IGB_MSIX_BAR);
2409	adapter->msix_mem = bus_alloc_resource_any(dev,
2410	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2411       	if (!adapter->msix_mem) {
2412		/* May not be enabled */
2413		device_printf(adapter->dev,
2414		    "Unable to map MSIX table \n");
2415		goto msi;
2416	}
2417
2418	msgs = pci_msix_count(dev);
2419	if (msgs == 0) { /* system has msix disabled */
2420		bus_release_resource(dev, SYS_RES_MEMORY,
2421		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2422		adapter->msix_mem = NULL;
2423		goto msi;
2424	}
2425
2426	/* Limit by the number set in header */
2427	if (msgs > IGB_MSIX_VEC)
2428		msgs = IGB_MSIX_VEC;
2429
2430	/* Figure out a reasonable auto config value */
2431	queues = (mp_ncpus > ((msgs-1)/2)) ? (msgs-1)/2 : mp_ncpus;
2432
2433	if (igb_tx_queues == 0)
2434		igb_tx_queues = queues;
2435	if (igb_rx_queues == 0)
2436		igb_rx_queues = queues;
2437	want = igb_tx_queues + igb_rx_queues + 1;
2438	if (msgs >= want)
2439		msgs = want;
2440	else {
2441               	device_printf(adapter->dev,
2442		    "MSIX Configuration Problem, "
2443		    "%d vectors configured, but %d queues wanted!\n",
2444		    msgs, want);
2445		return (ENXIO);
2446	}
2447	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2448               	device_printf(adapter->dev,
2449		    "Using MSIX interrupts with %d vectors\n", msgs);
2450		adapter->num_tx_queues = igb_tx_queues;
2451		adapter->num_rx_queues = igb_rx_queues;
2452		return (msgs);
2453	}
2454msi:
2455       	msgs = pci_msi_count(dev);
2456       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2457               	device_printf(adapter->dev,"Using MSI interrupt\n");
2458	return (msgs);
2459}
2460#endif /*  __FreeBSD_version >= 602105 */
2461
2462/*********************************************************************
2463 *
2464 *  Initialize the hardware to a configuration
2465 *  as specified by the adapter structure.
2466 *
2467 **********************************************************************/
2468static int
2469igb_hardware_init(struct adapter *adapter)
2470{
2471	device_t	dev = adapter->dev;
2472	u32		rx_buffer_size;
2473
2474	INIT_DEBUGOUT("igb_hardware_init: begin");
2475
2476	/* Issue a global reset */
2477	e1000_reset_hw(&adapter->hw);
2478
2479	/* Let the firmware know the OS is in control */
2480	igb_get_hw_control(adapter);
2481
2482	/*
2483	 * These parameters control the automatic generation (Tx) and
2484	 * response (Rx) to Ethernet PAUSE frames.
2485	 * - High water mark should allow for at least two frames to be
2486	 *   received after sending an XOFF.
2487	 * - Low water mark works best when it is very near the high water mark.
2488	 *   This allows the receiver to restart by sending XON when it has
2489	 *   drained a bit. Here we use an arbitary value of 1500 which will
2490	 *   restart after one full frame is pulled from the buffer. There
2491	 *   could be several smaller frames in the buffer and if so they will
2492	 *   not trigger the XON until their total number reduces the buffer
2493	 *   by 1500.
2494	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2495	 */
2496	if (adapter->hw.mac.type == e1000_82576)
2497		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2498		    E1000_RXPBS) & 0xffff) << 10 );
2499	else
2500		rx_buffer_size = ((E1000_READ_REG(&adapter->hw,
2501		    E1000_PBA) & 0xffff) << 10 );
2502
2503	adapter->hw.fc.high_water = rx_buffer_size -
2504	    roundup2(adapter->max_frame_size, 1024);
2505	adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2506
2507	adapter->hw.fc.pause_time = IGB_FC_PAUSE_TIME;
2508	adapter->hw.fc.send_xon = TRUE;
2509
2510	/* Set Flow control, use the tunable location if sane */
2511	if ((igb_fc_setting >= 0) || (igb_fc_setting < 4))
2512		adapter->hw.fc.requested_mode = igb_fc_setting;
2513	else
2514		adapter->hw.fc.requested_mode = e1000_fc_none;
2515
2516	if (e1000_init_hw(&adapter->hw) < 0) {
2517		device_printf(dev, "Hardware Initialization Failed\n");
2518		return (EIO);
2519	}
2520
2521	e1000_check_for_link(&adapter->hw);
2522
2523	return (0);
2524}
2525
2526/*********************************************************************
2527 *
2528 *  Setup networking device structure and register an interface.
2529 *
2530 **********************************************************************/
2531static void
2532igb_setup_interface(device_t dev, struct adapter *adapter)
2533{
2534	struct ifnet   *ifp;
2535
2536	INIT_DEBUGOUT("igb_setup_interface: begin");
2537
2538	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2539	if (ifp == NULL)
2540		panic("%s: can not if_alloc()", device_get_nameunit(dev));
2541	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2542	ifp->if_mtu = ETHERMTU;
2543	ifp->if_init =  igb_init;
2544	ifp->if_softc = adapter;
2545	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2546	ifp->if_ioctl = igb_ioctl;
2547	ifp->if_start = igb_start;
2548	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2549	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2550	IFQ_SET_READY(&ifp->if_snd);
2551
2552	ether_ifattach(ifp, adapter->hw.mac.addr);
2553
2554	ifp->if_capabilities = ifp->if_capenable = 0;
2555
2556	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2557	ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_VLAN_MTU;
2558	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2559	ifp->if_capenable = ifp->if_capabilities;
2560
2561	/*
2562	 * Tell the upper layer(s) we support long frames.
2563	 */
2564	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2565	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER;
2566	ifp->if_capabilities |= IFCAP_VLAN_MTU;
2567	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER;
2568	ifp->if_capenable |= IFCAP_VLAN_MTU;
2569
2570	/*
2571	 * Specify the media types supported by this adapter and register
2572	 * callbacks to update media and link information
2573	 */
2574	ifmedia_init(&adapter->media, IFM_IMASK,
2575	    igb_media_change, igb_media_status);
2576	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2577	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2578		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2579			    0, NULL);
2580		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2581	} else {
2582		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2583		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2584			    0, NULL);
2585		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2586			    0, NULL);
2587		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2588			    0, NULL);
2589		if (adapter->hw.phy.type != e1000_phy_ife) {
2590			ifmedia_add(&adapter->media,
2591				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2592			ifmedia_add(&adapter->media,
2593				IFM_ETHER | IFM_1000_T, 0, NULL);
2594		}
2595	}
2596	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2597	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2598}
2599
2600
2601/*
2602 * Manage DMA'able memory.
2603 */
2604static void
2605igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2606{
2607	if (error)
2608		return;
2609	*(bus_addr_t *) arg = segs[0].ds_addr;
2610}
2611
2612static int
2613igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2614        struct igb_dma_alloc *dma, int mapflags)
2615{
2616	int error;
2617
2618	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2619				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2620				BUS_SPACE_MAXADDR,	/* lowaddr */
2621				BUS_SPACE_MAXADDR,	/* highaddr */
2622				NULL, NULL,		/* filter, filterarg */
2623				size,			/* maxsize */
2624				1,			/* nsegments */
2625				size,			/* maxsegsize */
2626				0,			/* flags */
2627				NULL,			/* lockfunc */
2628				NULL,			/* lockarg */
2629				&dma->dma_tag);
2630	if (error) {
2631		device_printf(adapter->dev,
2632		    "%s: bus_dma_tag_create failed: %d\n",
2633		    __func__, error);
2634		goto fail_0;
2635	}
2636
2637	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2638	    BUS_DMA_NOWAIT, &dma->dma_map);
2639	if (error) {
2640		device_printf(adapter->dev,
2641		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2642		    __func__, (uintmax_t)size, error);
2643		goto fail_2;
2644	}
2645
2646	dma->dma_paddr = 0;
2647	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2648	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2649	if (error || dma->dma_paddr == 0) {
2650		device_printf(adapter->dev,
2651		    "%s: bus_dmamap_load failed: %d\n",
2652		    __func__, error);
2653		goto fail_3;
2654	}
2655
2656	return (0);
2657
2658fail_3:
2659	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2660fail_2:
2661	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2662	bus_dma_tag_destroy(dma->dma_tag);
2663fail_0:
2664	dma->dma_map = NULL;
2665	dma->dma_tag = NULL;
2666
2667	return (error);
2668}
2669
2670static void
2671igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
2672{
2673	if (dma->dma_tag == NULL)
2674		return;
2675	if (dma->dma_map != NULL) {
2676		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2677		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2678		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2679		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2680		dma->dma_map = NULL;
2681	}
2682	bus_dma_tag_destroy(dma->dma_tag);
2683	dma->dma_tag = NULL;
2684}
2685
2686
2687/*********************************************************************
2688 *
2689 *  Allocate memory for the transmit and receive rings, and then
2690 *  the descriptors associated with each, called only once at attach.
2691 *
2692 **********************************************************************/
2693static int
2694igb_allocate_queues(struct adapter *adapter)
2695{
2696	device_t dev = adapter->dev;
2697	struct tx_ring *txr;
2698	struct rx_ring *rxr;
2699	int rsize, tsize, error = E1000_SUCCESS;
2700	int txconf = 0, rxconf = 0;
2701
2702	/* First allocate the TX ring struct memory */
2703	if (!(adapter->tx_rings =
2704	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2705	    adapter->num_tx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2706		device_printf(dev, "Unable to allocate TX ring memory\n");
2707		error = ENOMEM;
2708		goto fail;
2709	}
2710	txr = adapter->tx_rings;
2711
2712	/* Next allocate the RX */
2713	if (!(adapter->rx_rings =
2714	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2715	    adapter->num_rx_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2716		device_printf(dev, "Unable to allocate RX ring memory\n");
2717		error = ENOMEM;
2718		goto rx_fail;
2719	}
2720	rxr = adapter->rx_rings;
2721
2722	tsize = roundup2(adapter->num_tx_desc *
2723	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
2724	/*
2725	 * Now set up the TX queues, txconf is needed to handle the
2726	 * possibility that things fail midcourse and we need to
2727	 * undo memory gracefully
2728	 */
2729	for (int i = 0; i < adapter->num_tx_queues; i++, txconf++) {
2730		/* Set up some basics */
2731		txr = &adapter->tx_rings[i];
2732		txr->adapter = adapter;
2733		txr->me = i;
2734
2735		/* Initialize the TX lock */
2736		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2737		    device_get_nameunit(dev), txr->me);
2738		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2739
2740		if (igb_dma_malloc(adapter, tsize,
2741			&txr->txdma, BUS_DMA_NOWAIT)) {
2742			device_printf(dev,
2743			    "Unable to allocate TX Descriptor memory\n");
2744			error = ENOMEM;
2745			goto err_tx_desc;
2746		}
2747		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
2748		bzero((void *)txr->tx_base, tsize);
2749
2750        	/* Now allocate transmit buffers for the ring */
2751        	if (igb_allocate_transmit_buffers(txr)) {
2752			device_printf(dev,
2753			    "Critical Failure setting up transmit buffers\n");
2754			error = ENOMEM;
2755			goto err_tx_desc;
2756        	}
2757
2758	}
2759
2760	/*
2761	 * Next the RX queues...
2762	 */
2763	rsize = roundup2(adapter->num_rx_desc *
2764	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
2765	for (int i = 0; i < adapter->num_rx_queues; i++, rxconf++) {
2766		rxr = &adapter->rx_rings[i];
2767		rxr->adapter = adapter;
2768		rxr->me = i;
2769
2770		/* Initialize the RX lock */
2771		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2772		    device_get_nameunit(dev), txr->me);
2773		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2774
2775		if (igb_dma_malloc(adapter, rsize,
2776			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2777			device_printf(dev,
2778			    "Unable to allocate RxDescriptor memory\n");
2779			error = ENOMEM;
2780			goto err_rx_desc;
2781		}
2782		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2783		bzero((void *)rxr->rx_base, rsize);
2784
2785        	/* Allocate receive buffers for the ring*/
2786		if (igb_allocate_receive_buffers(rxr)) {
2787			device_printf(dev,
2788			    "Critical Failure setting up receive buffers\n");
2789			error = ENOMEM;
2790			goto err_rx_desc;
2791		}
2792	}
2793
2794	return (0);
2795
2796err_rx_desc:
2797	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2798		igb_dma_free(adapter, &rxr->rxdma);
2799err_tx_desc:
2800	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2801		igb_dma_free(adapter, &txr->txdma);
2802	free(adapter->rx_rings, M_DEVBUF);
2803rx_fail:
2804	free(adapter->tx_rings, M_DEVBUF);
2805fail:
2806	return (error);
2807}
2808
2809/*********************************************************************
2810 *
2811 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2812 *  the information needed to transmit a packet on the wire. This is
2813 *  called only once at attach, setup is done every reset.
2814 *
2815 **********************************************************************/
2816static int
2817igb_allocate_transmit_buffers(struct tx_ring *txr)
2818{
2819	struct adapter *adapter = txr->adapter;
2820	device_t dev = adapter->dev;
2821	struct igb_tx_buffer *txbuf;
2822	int error, i;
2823
2824	/*
2825	 * Setup DMA descriptor areas.
2826	 */
2827	if ((error = bus_dma_tag_create(NULL,		/* parent */
2828			       PAGE_SIZE, 0,		/* alignment, bounds */
2829			       BUS_SPACE_MAXADDR,	/* lowaddr */
2830			       BUS_SPACE_MAXADDR,	/* highaddr */
2831			       NULL, NULL,		/* filter, filterarg */
2832			       IGB_TSO_SIZE,		/* maxsize */
2833			       IGB_MAX_SCATTER,		/* nsegments */
2834			       PAGE_SIZE,		/* maxsegsize */
2835			       0,			/* flags */
2836			       NULL,			/* lockfunc */
2837			       NULL,			/* lockfuncarg */
2838			       &txr->txtag))) {
2839		device_printf(dev,"Unable to allocate TX DMA tag\n");
2840		goto fail;
2841	}
2842
2843	if (!(txr->tx_buffers =
2844	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
2845	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2846		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2847		error = ENOMEM;
2848		goto fail;
2849	}
2850
2851        /* Create the descriptor buffer dma maps */
2852	txbuf = txr->tx_buffers;
2853	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2854		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
2855		if (error != 0) {
2856			device_printf(dev, "Unable to create TX DMA map\n");
2857			goto fail;
2858		}
2859	}
2860
2861	return 0;
2862fail:
2863	/* We free all, it handles case where we are in the middle */
2864	igb_free_transmit_structures(adapter);
2865	return (error);
2866}
2867
2868/*********************************************************************
2869 *
2870 *  Initialize a transmit ring.
2871 *
2872 **********************************************************************/
2873static void
2874igb_setup_transmit_ring(struct tx_ring *txr)
2875{
2876	struct adapter *adapter = txr->adapter;
2877	struct igb_tx_buffer *txbuf;
2878	int i;
2879
2880	/* Clear the old ring contents */
2881	bzero((void *)txr->tx_base,
2882	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
2883	/* Reset indices */
2884	txr->next_avail_desc = 0;
2885	txr->next_to_clean = 0;
2886
2887	/* Free any existing tx buffers. */
2888        txbuf = txr->tx_buffers;
2889	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
2890		if (txbuf->m_head != NULL) {
2891			bus_dmamap_sync(txr->txtag, txbuf->map,
2892			    BUS_DMASYNC_POSTWRITE);
2893			bus_dmamap_unload(txr->txtag, txbuf->map);
2894			m_freem(txbuf->m_head);
2895			txbuf->m_head = NULL;
2896		}
2897		/* clear the watch index */
2898		txbuf->next_eop = -1;
2899        }
2900
2901	/* Set number of descriptors available */
2902	txr->tx_avail = adapter->num_tx_desc;
2903
2904	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
2905	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2906
2907}
2908
2909/*********************************************************************
2910 *
2911 *  Initialize all transmit rings.
2912 *
2913 **********************************************************************/
2914static void
2915igb_setup_transmit_structures(struct adapter *adapter)
2916{
2917	struct tx_ring *txr = adapter->tx_rings;
2918
2919	for (int i = 0; i < adapter->num_tx_queues; i++, txr++)
2920		igb_setup_transmit_ring(txr);
2921
2922	return;
2923}
2924
2925/*********************************************************************
2926 *
2927 *  Enable transmit unit.
2928 *
2929 **********************************************************************/
2930static void
2931igb_initialize_transmit_units(struct adapter *adapter)
2932{
2933	struct tx_ring	*txr = adapter->tx_rings;
2934	u32		tctl, txdctl;
2935
2936	 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
2937
2938	/* Setup the Base and Length of the Tx Descriptor Rings */
2939	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2940		u64 bus_addr = txr->txdma.dma_paddr;
2941
2942		E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(i),
2943		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2944		E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(i),
2945		    (uint32_t)(bus_addr >> 32));
2946		E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(i),
2947		    (uint32_t)bus_addr);
2948
2949		/* Setup the HW Tx Head and Tail descriptor pointers */
2950		E1000_WRITE_REG(&adapter->hw, E1000_TDT(i), 0);
2951		E1000_WRITE_REG(&adapter->hw, E1000_TDH(i), 0);
2952
2953		HW_DEBUGOUT2("Base = %x, Length = %x\n",
2954		    E1000_READ_REG(&adapter->hw, E1000_TDBAL(i)),
2955		    E1000_READ_REG(&adapter->hw, E1000_TDLEN(i)));
2956
2957		/* Setup Transmit Descriptor Base Settings */
2958		adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2959
2960		txdctl = E1000_READ_REG(&adapter->hw, E1000_TXDCTL(i));
2961		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2962		E1000_WRITE_REG(&adapter->hw, E1000_TXDCTL(i), txdctl);
2963	}
2964
2965	/* Program the Transmit Control Register */
2966	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2967	tctl &= ~E1000_TCTL_CT;
2968	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2969		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2970
2971	e1000_config_collision_dist(&adapter->hw);
2972
2973	/* This write will effectively turn on the transmit unit. */
2974	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2975
2976}
2977
2978/*********************************************************************
2979 *
2980 *  Free all transmit rings.
2981 *
2982 **********************************************************************/
2983static void
2984igb_free_transmit_structures(struct adapter *adapter)
2985{
2986	struct tx_ring *txr = adapter->tx_rings;
2987
2988	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
2989		IGB_TX_LOCK(txr);
2990		igb_free_transmit_buffers(txr);
2991		igb_dma_free(adapter, &txr->txdma);
2992		IGB_TX_UNLOCK(txr);
2993		IGB_TX_LOCK_DESTROY(txr);
2994	}
2995	free(adapter->tx_rings, M_DEVBUF);
2996}
2997
2998/*********************************************************************
2999 *
3000 *  Free transmit ring related data structures.
3001 *
3002 **********************************************************************/
3003static void
3004igb_free_transmit_buffers(struct tx_ring *txr)
3005{
3006	struct adapter *adapter = txr->adapter;
3007	struct igb_tx_buffer *tx_buffer;
3008	int             i;
3009
3010	INIT_DEBUGOUT("free_transmit_ring: begin");
3011
3012	if (txr->tx_buffers == NULL)
3013		return;
3014
3015	tx_buffer = txr->tx_buffers;
3016	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3017		if (tx_buffer->m_head != NULL) {
3018			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3019			    BUS_DMASYNC_POSTWRITE);
3020			bus_dmamap_unload(txr->txtag,
3021			    tx_buffer->map);
3022			m_freem(tx_buffer->m_head);
3023			tx_buffer->m_head = NULL;
3024			if (tx_buffer->map != NULL) {
3025				bus_dmamap_destroy(txr->txtag,
3026				    tx_buffer->map);
3027				tx_buffer->map = NULL;
3028			}
3029		} else if (tx_buffer->map != NULL) {
3030			bus_dmamap_unload(txr->txtag,
3031			    tx_buffer->map);
3032			bus_dmamap_destroy(txr->txtag,
3033			    tx_buffer->map);
3034			tx_buffer->map = NULL;
3035		}
3036	}
3037
3038	if (txr->tx_buffers != NULL) {
3039		free(txr->tx_buffers, M_DEVBUF);
3040		txr->tx_buffers = NULL;
3041	}
3042	if (txr->txtag != NULL) {
3043		bus_dma_tag_destroy(txr->txtag);
3044		txr->txtag = NULL;
3045	}
3046	return;
3047}
3048
3049#if __FreeBSD_version >= 700000
3050/**********************************************************************
3051 *
3052 *  Setup work for hardware segmentation offload (TSO) on
3053 *  adapters using advanced tx descriptors (82575)
3054 *
3055 **********************************************************************/
3056static boolean_t
3057igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3058{
3059	struct adapter *adapter = txr->adapter;
3060	struct e1000_adv_tx_context_desc *TXD;
3061	struct igb_tx_buffer        *tx_buffer;
3062	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3063	u32 mss_l4len_idx = 0;
3064	u16 vtag = 0;
3065	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3066	struct ether_vlan_header *eh;
3067	struct ip *ip;
3068	struct tcphdr *th;
3069
3070
3071	/*
3072	 * Determine where frame payload starts.
3073	 * Jump over vlan headers if already present
3074	 */
3075	eh = mtod(mp, struct ether_vlan_header *);
3076	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
3077		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3078	else
3079		ehdrlen = ETHER_HDR_LEN;
3080
3081	/* Ensure we have at least the IP+TCP header in the first mbuf. */
3082	if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
3083		return FALSE;
3084
3085	/* Only supports IPV4 for now */
3086	ctxd = txr->next_avail_desc;
3087	tx_buffer = &txr->tx_buffers[ctxd];
3088	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3089
3090	ip = (struct ip *)(mp->m_data + ehdrlen);
3091	if (ip->ip_p != IPPROTO_TCP)
3092                return FALSE;   /* 0 */
3093	ip->ip_sum = 0;
3094	ip_hlen = ip->ip_hl << 2;
3095	th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3096	th->th_sum = in_pseudo(ip->ip_src.s_addr,
3097	    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3098	tcp_hlen = th->th_off << 2;
3099	/*
3100	 * Calculate header length, this is used
3101	 * in the transmit desc in igb_xmit
3102	 */
3103	*hdrlen = ehdrlen + ip_hlen + tcp_hlen;
3104
3105	/* VLAN MACLEN IPLEN */
3106	if (mp->m_flags & M_VLANTAG) {
3107		vtag = htole16(mp->m_pkthdr.ether_vtag);
3108		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3109	}
3110
3111	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3112	vlan_macip_lens |= ip_hlen;
3113	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3114
3115	/* ADV DTYPE TUCMD */
3116	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3117	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3118	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3119	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3120
3121	/* MSS L4LEN IDX */
3122	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3123	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3124	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3125
3126	TXD->seqnum_seed = htole32(0);
3127	tx_buffer->m_head = NULL;
3128	tx_buffer->next_eop = -1;
3129
3130	if (++ctxd == adapter->num_tx_desc)
3131		ctxd = 0;
3132
3133	txr->tx_avail--;
3134	txr->next_avail_desc = ctxd;
3135	return TRUE;
3136}
3137#else	/* fake out for 6.2 */
3138static boolean_t
3139igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *hdrlen)
3140{
3141	return (FALSE);
3142}
3143#endif
3144
3145/*********************************************************************
3146 *
3147 *  Context Descriptor setup for VLAN or CSUM
3148 *
3149 **********************************************************************/
3150
3151static int
3152igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3153{
3154	struct adapter *adapter = txr->adapter;
3155	struct e1000_adv_tx_context_desc *TXD;
3156	struct igb_tx_buffer        *tx_buffer;
3157	uint32_t vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3158	struct ether_vlan_header *eh;
3159	struct ip *ip = NULL;
3160	struct ip6_hdr *ip6;
3161	int  ehdrlen, ip_hlen = 0;
3162	u16	etype;
3163	u8	ipproto = 0;
3164	bool	offload = FALSE;
3165#if __FreeBSD_version >= 700000
3166	u16 vtag = 0;
3167#else
3168	struct m_tag	*mtag;
3169#endif
3170
3171	int ctxd = txr->next_avail_desc;
3172	tx_buffer = &txr->tx_buffers[ctxd];
3173	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3174
3175	if (mp->m_pkthdr.csum_flags & CSUM_OFFLOAD)
3176		offload = TRUE;
3177
3178	/*
3179	** In advanced descriptors the vlan tag must
3180	** be placed into the descriptor itself.
3181	*/
3182#if __FreeBSD_version < 700000
3183	mtag = VLAN_OUTPUT_TAG(ifp, mp);
3184	if (mtag != NULL) {
3185		vlan_macip_lens |=
3186		    htole16(VLAN_TAG_VALUE(mtag)) << E1000_ADVTXD_VLAN_SHIFT;
3187		offload = TRUE;
3188	}
3189#else
3190	if (mp->m_flags & M_VLANTAG) {
3191		vtag = htole16(mp->m_pkthdr.ether_vtag);
3192		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3193		offload = TRUE;
3194	}
3195#endif
3196	/*
3197	 * Determine where frame payload starts.
3198	 * Jump over vlan headers if already present,
3199	 * helpful for QinQ too.
3200	 */
3201	eh = mtod(mp, struct ether_vlan_header *);
3202	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3203		etype = ntohs(eh->evl_proto);
3204		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3205	} else {
3206		etype = ntohs(eh->evl_encap_proto);
3207		ehdrlen = ETHER_HDR_LEN;
3208	}
3209
3210	/* Set the ether header length */
3211	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3212
3213	switch (etype) {
3214		case ETHERTYPE_IP:
3215			ip = (struct ip *)(mp->m_data + ehdrlen);
3216			ip_hlen = ip->ip_hl << 2;
3217			if (mp->m_len < ehdrlen + ip_hlen)
3218				return FALSE;
3219			ipproto = ip->ip_p;
3220			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3221			break;
3222		case ETHERTYPE_IPV6:
3223			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3224			ip_hlen = sizeof(struct ip6_hdr);
3225			if (mp->m_len < ehdrlen + ip_hlen)
3226				return FALSE; /* failure */
3227			ipproto = ip6->ip6_nxt;
3228			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3229			break;
3230#ifdef IGB_TIMESYNC
3231		case ETHERTYPE_IEEE1588:
3232			return (IGB_TIMESTAMP);
3233#endif
3234		default:
3235			return (FALSE);
3236	}
3237
3238	vlan_macip_lens |= ip_hlen;
3239	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3240
3241	switch (ipproto) {
3242		case IPPROTO_TCP:
3243			if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3244				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3245				offload = TRUE;
3246			}
3247			break;
3248		case IPPROTO_UDP:
3249		{
3250#ifdef IGB_TIMESYNC
3251			void *hdr = (caddr_t) ip + ip_hlen;
3252			struct udphdr *uh = (struct udphdr *)hdr;
3253
3254			if (uh->uh_dport == htons(TSYNC_PORT))
3255				return (IGB_TIMESTAMP);
3256#endif
3257			if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3258				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3259				offload = TRUE;
3260			}
3261			break;
3262		}
3263#if __FreeBSD_version >= 800000
3264		case IPPROTO_SCTP:
3265		{
3266			if (mp->m_pkthdr.csum_flags & CSUM_SCTP) {
3267				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3268				offload = TRUE;
3269			}
3270			break;
3271		}
3272#endif
3273		default:
3274			return (FALSE);
3275	}
3276
3277	if (offload != TRUE)
3278		return (FALSE);
3279
3280	/* Now copy bits into descriptor */
3281	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3282	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3283	TXD->seqnum_seed = htole32(0);
3284	TXD->mss_l4len_idx = htole32(0);
3285
3286	tx_buffer->m_head = NULL;
3287	tx_buffer->next_eop = -1;
3288
3289	/* We've consumed the first desc, adjust counters */
3290	if (++ctxd == adapter->num_tx_desc)
3291		ctxd = 0;
3292	txr->next_avail_desc = ctxd;
3293	--txr->tx_avail;
3294
3295        return (TRUE);
3296}
3297
3298
3299/**********************************************************************
3300 *
3301 *  Examine each tx_buffer in the used queue. If the hardware is done
3302 *  processing the packet then free associated resources. The
3303 *  tx_buffer is put back on the free queue.
3304 *
3305 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3306 **********************************************************************/
3307static bool
3308igb_txeof(struct tx_ring *txr)
3309{
3310	struct adapter	*adapter = txr->adapter;
3311        int first, last, done, num_avail;
3312	u32 cleaned = 0;
3313        struct igb_tx_buffer *tx_buffer;
3314        struct e1000_tx_desc   *tx_desc, *eop_desc;
3315	struct ifnet   *ifp = adapter->ifp;
3316
3317	IGB_TX_LOCK_ASSERT(txr);
3318
3319        if (txr->tx_avail == adapter->num_tx_desc)
3320                return FALSE;
3321
3322        num_avail = txr->tx_avail;
3323        first = txr->next_to_clean;
3324        tx_desc = &txr->tx_base[first];
3325        tx_buffer = &txr->tx_buffers[first];
3326	last = tx_buffer->next_eop;
3327        eop_desc = &txr->tx_base[last];
3328
3329	/*
3330	 * What this does is get the index of the
3331	 * first descriptor AFTER the EOP of the
3332	 * first packet, that way we can do the
3333	 * simple comparison on the inner while loop.
3334	 */
3335	if (++last == adapter->num_tx_desc)
3336 		last = 0;
3337	done = last;
3338
3339        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3340            BUS_DMASYNC_POSTREAD);
3341
3342        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3343		/* We clean the range of the packet */
3344		while (first != done) {
3345                	tx_desc->upper.data = 0;
3346                	tx_desc->lower.data = 0;
3347                	tx_desc->buffer_addr = 0;
3348                	++num_avail; ++cleaned;
3349
3350			if (tx_buffer->m_head) {
3351				ifp->if_opackets++;
3352				bus_dmamap_sync(txr->txtag,
3353				    tx_buffer->map,
3354				    BUS_DMASYNC_POSTWRITE);
3355				bus_dmamap_unload(txr->txtag,
3356				    tx_buffer->map);
3357
3358                        	m_freem(tx_buffer->m_head);
3359                        	tx_buffer->m_head = NULL;
3360                	}
3361			tx_buffer->next_eop = -1;
3362
3363	                if (++first == adapter->num_tx_desc)
3364				first = 0;
3365
3366	                tx_buffer = &txr->tx_buffers[first];
3367			tx_desc = &txr->tx_base[first];
3368		}
3369		/* See if we can continue to the next packet */
3370		last = tx_buffer->next_eop;
3371		if (last != -1) {
3372        		eop_desc = &txr->tx_base[last];
3373			/* Get new done point */
3374			if (++last == adapter->num_tx_desc) last = 0;
3375			done = last;
3376		} else
3377			break;
3378        }
3379        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3380            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3381
3382        txr->next_to_clean = first;
3383
3384        /*
3385         * If we have enough room, clear IFF_DRV_OACTIVE to
3386         * tell the stack that it is OK to send packets.
3387         * If there are no pending descriptors, clear the timeout.
3388         */
3389        if (num_avail > IGB_TX_CLEANUP_THRESHOLD) {
3390                ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
3391                if (num_avail == adapter->num_tx_desc) {
3392			txr->watchdog_timer = 0;
3393        		txr->tx_avail = num_avail;
3394			return FALSE;
3395		}
3396        }
3397	/* Some descriptors cleaned, reset the watchdog */
3398	if (cleaned)
3399		txr->watchdog_timer = IGB_TX_TIMEOUT;
3400        txr->tx_avail = num_avail;
3401        return TRUE;
3402}
3403
3404
3405/*********************************************************************
3406 *
3407 *  Setup descriptor buffer(s) from system mbuf buffer pools.
3408 *  		i - designates the ring index
3409 *		clean - tells the function whether to update
3410 *		        the header, the packet buffer, or both.
3411 *
3412 **********************************************************************/
3413static int
3414igb_get_buf(struct rx_ring *rxr, int i, u8 clean)
3415{
3416	struct adapter		*adapter = rxr->adapter;
3417	struct mbuf		*mh, *mp;
3418	bus_dma_segment_t	seg[2];
3419	bus_dmamap_t		map;
3420	struct igb_rx_buffer	*rx_buffer;
3421	int			error, nsegs;
3422	int			merr = 0;
3423
3424
3425	rx_buffer = &rxr->rx_buffers[i];
3426
3427	/* First get our header and payload mbuf */
3428	if (clean & IGB_CLEAN_HEADER) {
3429		mh = m_gethdr(M_DONTWAIT, MT_DATA);
3430		if (mh == NULL)
3431			goto remap;
3432	} else  /* reuse */
3433		mh = rxr->rx_buffers[i].m_head;
3434
3435	mh->m_len = MHLEN;
3436	mh->m_flags |= M_PKTHDR;
3437
3438	if (clean & IGB_CLEAN_PAYLOAD) {
3439		mp = m_getjcl(M_DONTWAIT, MT_DATA,
3440		    M_PKTHDR, adapter->rx_mbuf_sz);
3441		if (mp == NULL)
3442			goto remap;
3443		mp->m_len = adapter->rx_mbuf_sz;
3444		mp->m_flags &= ~M_PKTHDR;
3445	} else {	/* reusing */
3446		mp = rxr->rx_buffers[i].m_pack;
3447		mp->m_len = adapter->rx_mbuf_sz;
3448		mp->m_flags &= ~M_PKTHDR;
3449	}
3450	/*
3451	** Need to create a chain for the following
3452	** dmamap call at this point.
3453	*/
3454	mh->m_next = mp;
3455	mh->m_pkthdr.len = mh->m_len + mp->m_len;
3456
3457	/* Get the memory mapping */
3458	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3459	    rxr->rx_spare_map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3460	if (error != 0) {
3461		printf("GET BUF: dmamap load failure - %d\n", error);
3462		m_free(mh);
3463		return (error);
3464	}
3465
3466	/* Unload old mapping and update buffer struct */
3467	if (rx_buffer->m_head != NULL)
3468			bus_dmamap_unload(rxr->rxtag, rx_buffer->map);
3469	map = rx_buffer->map;
3470	rx_buffer->map = rxr->rx_spare_map;
3471	rxr->rx_spare_map = map;
3472	rx_buffer->m_head = mh;
3473	rx_buffer->m_pack = mp;
3474	bus_dmamap_sync(rxr->rxtag,
3475	    rx_buffer->map, BUS_DMASYNC_PREREAD);
3476
3477	/* Update descriptor */
3478	rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3479	rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3480
3481	return (0);
3482
3483	/*
3484	** If we get here, we have an mbuf resource
3485	** issue, so we discard the incoming packet
3486	** and attempt to reuse existing mbufs next
3487	** pass thru the ring, but to do so we must
3488	** fix up the descriptor which had the address
3489	** clobbered with writeback info.
3490	*/
3491remap:
3492	adapter->mbuf_header_failed++;
3493	merr = ENOBUFS;
3494	/* Is there a reusable buffer? */
3495	mh = rxr->rx_buffers[i].m_head;
3496	if (mh == NULL) /* Nope, init error */
3497		return (merr);
3498	mp = rxr->rx_buffers[i].m_pack;
3499	if (mp == NULL) /* Nope, init error */
3500		return (merr);
3501	/* Get our old mapping */
3502	rx_buffer = &rxr->rx_buffers[i];
3503	error = bus_dmamap_load_mbuf_sg(rxr->rxtag,
3504	    rx_buffer->map, mh, seg, &nsegs, BUS_DMA_NOWAIT);
3505	if (error != 0) {
3506		/* We really have a problem */
3507		m_free(mh);
3508		return (error);
3509	}
3510	/* Now fix the descriptor as needed */
3511	rxr->rx_base[i].read.hdr_addr = htole64(seg[0].ds_addr);
3512	rxr->rx_base[i].read.pkt_addr = htole64(seg[1].ds_addr);
3513	return (merr);
3514}
3515
3516
3517/*********************************************************************
3518 *
3519 *  Allocate memory for rx_buffer structures. Since we use one
3520 *  rx_buffer per received packet, the maximum number of rx_buffer's
3521 *  that we'll need is equal to the number of receive descriptors
3522 *  that we've allocated.
3523 *
3524 **********************************************************************/
3525static int
3526igb_allocate_receive_buffers(struct rx_ring *rxr)
3527{
3528	struct	adapter 	*adapter = rxr->adapter;
3529	device_t 		dev = adapter->dev;
3530	struct igb_rx_buffer 	*rxbuf;
3531	int             	i, bsize, error;
3532
3533	bsize = sizeof(struct igb_rx_buffer) * adapter->num_rx_desc;
3534	if (!(rxr->rx_buffers =
3535	    (struct igb_rx_buffer *) malloc(bsize,
3536	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3537		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3538		error = ENOMEM;
3539		goto fail;
3540	}
3541
3542	/*
3543	** The tag is made to accomodate the largest buffer size
3544	** with packet split (hence the two segments, even though
3545	** it may not always use this.
3546	*/
3547	if ((error = bus_dma_tag_create(NULL,		/* parent */
3548				   PAGE_SIZE, 0,	/* alignment, bounds */
3549				   BUS_SPACE_MAXADDR,	/* lowaddr */
3550				   BUS_SPACE_MAXADDR,	/* highaddr */
3551				   NULL, NULL,		/* filter, filterarg */
3552				   MJUM16BYTES,		/* maxsize */
3553				   2,			/* nsegments */
3554				   MJUMPAGESIZE,	/* maxsegsize */
3555				   0,			/* flags */
3556				   NULL,		/* lockfunc */
3557				   NULL,		/* lockfuncarg */
3558				   &rxr->rxtag))) {
3559		device_printf(dev, "Unable to create RX DMA tag\n");
3560		goto fail;
3561	}
3562
3563	/* Create the spare map (used by getbuf) */
3564        error = bus_dmamap_create(rxr->rxtag, BUS_DMA_NOWAIT,
3565	     &rxr->rx_spare_map);
3566	if (error) {
3567		device_printf(dev,
3568		    "%s: bus_dmamap_create header spare failed: %d\n",
3569		    __func__, error);
3570		goto fail;
3571	}
3572
3573	for (i = 0; i < adapter->num_rx_desc; i++, rxbuf++) {
3574		rxbuf = &rxr->rx_buffers[i];
3575		error = bus_dmamap_create(rxr->rxtag,
3576		    BUS_DMA_NOWAIT, &rxbuf->map);
3577		if (error) {
3578			device_printf(dev, "Unable to create RX DMA maps\n");
3579			goto fail;
3580		}
3581	}
3582
3583	return (0);
3584
3585fail:
3586	/* Frees all, but can handle partial completion */
3587	igb_free_receive_structures(adapter);
3588	return (error);
3589}
3590
3591/*********************************************************************
3592 *
3593 *  Initialize a receive ring and its buffers.
3594 *
3595 **********************************************************************/
3596static int
3597igb_setup_receive_ring(struct rx_ring *rxr)
3598{
3599	struct	adapter		*adapter;
3600	device_t		dev;
3601	struct igb_rx_buffer	*rxbuf;
3602	struct lro_ctrl		*lro = &rxr->lro;
3603	int			j, rsize;
3604
3605	adapter = rxr->adapter;
3606	dev = adapter->dev;
3607
3608	/* Clear the ring contents */
3609	rsize = roundup2(adapter->num_rx_desc *
3610	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3611	bzero((void *)rxr->rx_base, rsize);
3612
3613	/*
3614	** Free current RX buffer structures and their mbufs
3615	*/
3616	for (int i = 0; i < adapter->num_rx_desc; i++) {
3617		rxbuf = &rxr->rx_buffers[i];
3618		bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3619		    BUS_DMASYNC_POSTREAD);
3620		bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3621		if (rxbuf->m_head) {
3622			rxbuf->m_head->m_next = rxbuf->m_pack;
3623			m_freem(rxbuf->m_head);
3624		}
3625		rxbuf->m_head = NULL;
3626		rxbuf->m_pack = NULL;
3627	}
3628
3629	/* Next replenish the ring */
3630	for (j = 0; j < adapter->num_rx_desc; j++) {
3631		if (igb_get_buf(rxr, j, IGB_CLEAN_BOTH) == ENOBUFS) {
3632			rxr->rx_buffers[j].m_head = NULL;
3633			rxr->rx_buffers[j].m_pack = NULL;
3634			rxr->rx_base[j].read.hdr_addr = 0;
3635			rxr->rx_base[j].read.pkt_addr = 0;
3636			goto fail;
3637		}
3638	}
3639
3640	/* Setup our descriptor indices */
3641	rxr->next_to_check = 0;
3642	rxr->last_cleaned = 0;
3643
3644	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3645	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3646
3647        /* Now set up the LRO interface */
3648	if (igb_enable_lro) {
3649		int err = tcp_lro_init(lro);
3650		if (err) {
3651			device_printf(dev,"LRO Initialization failed!\n");
3652			goto fail;
3653		}
3654		INIT_DEBUGOUT("RX LRO Initialized\n");
3655		lro->ifp = adapter->ifp;
3656	}
3657
3658	return (0);
3659fail:
3660	/*
3661	 * We need to clean up any buffers allocated
3662	 * so far, 'j' is the failing index.
3663	 */
3664	for (int i = 0; i < j; i++) {
3665		rxbuf = &rxr->rx_buffers[i];
3666		if (rxbuf->m_head != NULL) {
3667			bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3668			    BUS_DMASYNC_POSTREAD);
3669			bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3670			m_freem(rxbuf->m_head);
3671			rxbuf->m_head = NULL;
3672		}
3673	}
3674	return (ENOBUFS);
3675}
3676
3677/*********************************************************************
3678 *
3679 *  Initialize all receive rings.
3680 *
3681 **********************************************************************/
3682static int
3683igb_setup_receive_structures(struct adapter *adapter)
3684{
3685	struct rx_ring *rxr = adapter->rx_rings;
3686	int i, j;
3687
3688	for (i = 0; i < adapter->num_rx_queues; i++, rxr++)
3689		if (igb_setup_receive_ring(rxr))
3690			goto fail;
3691
3692	return (0);
3693fail:
3694	/*
3695	 * Free RX buffers allocated so far, we will only handle
3696	 * the rings that completed, the failing case will have
3697	 * cleaned up for itself. The value of 'i' will be the
3698	 * failed ring so we must pre-decrement it.
3699	 */
3700	rxr = adapter->rx_rings;
3701	for (--i; i > 0; i--, rxr++) {
3702		for (j = 0; j < adapter->num_rx_desc; j++) {
3703			struct igb_rx_buffer *rxbuf;
3704			rxbuf = &rxr->rx_buffers[j];
3705			if (rxbuf->m_head != NULL) {
3706				bus_dmamap_sync(rxr->rxtag, rxbuf->map,
3707			  	  BUS_DMASYNC_POSTREAD);
3708				bus_dmamap_unload(rxr->rxtag, rxbuf->map);
3709				m_freem(rxbuf->m_head);
3710				rxbuf->m_head = NULL;
3711			}
3712		}
3713	}
3714
3715	return (ENOBUFS);
3716}
3717
3718/*********************************************************************
3719 *
3720 *  Enable receive unit.
3721 *
3722 **********************************************************************/
3723static void
3724igb_initialize_receive_units(struct adapter *adapter)
3725{
3726	struct rx_ring	*rxr = adapter->rx_rings;
3727	struct ifnet	*ifp = adapter->ifp;
3728	u32		rctl, rxcsum, psize, srrctl = 0;
3729
3730	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
3731
3732	/*
3733	 * Make sure receives are disabled while setting
3734	 * up the descriptor ring
3735	 */
3736	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3737	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3738
3739	/*
3740	** Set up for header split
3741	*/
3742	if (igb_rx_hdr_split) {
3743		/* Use a standard mbuf for the header */
3744		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3745		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3746	} else
3747		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
3748
3749	/*
3750	** Set up for jumbo frames
3751	*/
3752	if (ifp->if_mtu > ETHERMTU) {
3753		rctl |= E1000_RCTL_LPE;
3754		srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3755		rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
3756
3757		/* Set maximum packet len */
3758		psize = adapter->max_frame_size;
3759		/* are we on a vlan? */
3760#if __FreeBSD_version >= 700000
3761		if (adapter->ifp->if_vlantrunk != NULL)
3762#else
3763		if (adapter->ifp->if_nvlans != 0)
3764#endif
3765			psize += VLAN_TAG_SIZE;
3766		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
3767	} else {
3768		rctl &= ~E1000_RCTL_LPE;
3769		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3770		rctl |= E1000_RCTL_SZ_2048;
3771	}
3772
3773	/* Setup the Base and Length of the Rx Descriptor Rings */
3774	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3775		u64 bus_addr = rxr->rxdma.dma_paddr;
3776		u32 rxdctl;
3777
3778		E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(i),
3779		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3780		E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(i),
3781		    (uint32_t)(bus_addr >> 32));
3782		E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(i),
3783		    (uint32_t)bus_addr);
3784		E1000_WRITE_REG(&adapter->hw, E1000_SRRCTL(i), srrctl);
3785		/* Enable this Queue */
3786		rxdctl = E1000_READ_REG(&adapter->hw, E1000_RXDCTL(i));
3787		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3788		rxdctl &= 0xFFF00000;
3789		rxdctl |= IGB_RX_PTHRESH;
3790		rxdctl |= IGB_RX_HTHRESH << 8;
3791		rxdctl |= IGB_RX_WTHRESH << 16;
3792		E1000_WRITE_REG(&adapter->hw, E1000_RXDCTL(i), rxdctl);
3793	}
3794
3795	/*
3796	** Setup for RX MultiQueue
3797	*/
3798	rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3799	if (adapter->num_rx_queues >1) {
3800		u32 random[10], mrqc, shift = 0;
3801		union igb_reta {
3802			u32 dword;
3803			u8  bytes[4];
3804		} reta;
3805
3806		arc4rand(&random, sizeof(random), 0);
3807		if (adapter->hw.mac.type == e1000_82575)
3808			shift = 6;
3809		/* Warning FM follows */
3810		for (int i = 0; i < 128; i++) {
3811			reta.bytes[i & 3] =
3812			    (i % adapter->num_rx_queues) << shift;
3813			if ((i & 3) == 3)
3814				E1000_WRITE_REG(&adapter->hw,
3815				    E1000_RETA(i >> 2), reta.dword);
3816		}
3817		/* Now fill in hash table */
3818		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
3819		for (int i = 0; i < 10; i++)
3820			E1000_WRITE_REG_ARRAY(&adapter->hw,
3821			    E1000_RSSRK(0), i, random[i]);
3822
3823		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
3824		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
3825		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
3826		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
3827		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
3828		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
3829		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
3830		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
3831
3832		E1000_WRITE_REG(&adapter->hw, E1000_MRQC, mrqc);
3833
3834		/*
3835		** NOTE: Receive Full-Packet Checksum Offload
3836		** is mutually exclusive with Multiqueue. However
3837		** this is not the same as TCP/IP checksums which
3838		** still work.
3839		*/
3840		rxcsum |= E1000_RXCSUM_PCSD;
3841#if __FreeBSD_version >= 800000
3842		/* For SCTP Offload */
3843		if ((adapter->hw.mac.type == e1000_82576)
3844		    && (ifp->if_capenable & IFCAP_RXCSUM))
3845			rxcsum |= E1000_RXCSUM_CRCOFL;
3846#endif
3847	} else {
3848		/* Non RSS setup */
3849		if (ifp->if_capenable & IFCAP_RXCSUM) {
3850			rxcsum |= E1000_RXCSUM_IPPCSE;
3851#if __FreeBSD_version >= 800000
3852			if (adapter->hw.mac.type == e1000_82576)
3853				rxcsum |= E1000_RXCSUM_CRCOFL;
3854#endif
3855		} else
3856			rxcsum &= ~E1000_RXCSUM_TUOFL;
3857	}
3858	E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3859
3860	/* Setup the Receive Control Register */
3861	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3862	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3863		   E1000_RCTL_RDMTS_HALF |
3864		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3865
3866	/* Make sure VLAN Filters are off */
3867	rctl &= ~E1000_RCTL_VFE;
3868	/* Don't store bad packets */
3869	rctl &= ~E1000_RCTL_SBP;
3870
3871	/* Enable Receives */
3872	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3873
3874	/*
3875	 * Setup the HW Rx Head and Tail Descriptor Pointers
3876	 *   - needs to be after enable
3877	 */
3878	for (int i = 0; i < adapter->num_rx_queues; i++) {
3879		E1000_WRITE_REG(&adapter->hw, E1000_RDH(i), 0);
3880		E1000_WRITE_REG(&adapter->hw, E1000_RDT(i),
3881		     adapter->num_rx_desc - 1);
3882	}
3883	return;
3884}
3885
3886/*********************************************************************
3887 *
3888 *  Free receive rings.
3889 *
3890 **********************************************************************/
3891static void
3892igb_free_receive_structures(struct adapter *adapter)
3893{
3894	struct rx_ring *rxr = adapter->rx_rings;
3895
3896	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
3897		struct lro_ctrl	*lro = &rxr->lro;
3898		igb_free_receive_buffers(rxr);
3899		tcp_lro_free(lro);
3900		igb_dma_free(adapter, &rxr->rxdma);
3901	}
3902
3903	free(adapter->rx_rings, M_DEVBUF);
3904}
3905
3906/*********************************************************************
3907 *
3908 *  Free receive ring data structures.
3909 *
3910 **********************************************************************/
3911static void
3912igb_free_receive_buffers(struct rx_ring *rxr)
3913{
3914	struct adapter	*adapter = rxr->adapter;
3915	struct igb_rx_buffer *rx_buffer;
3916
3917	INIT_DEBUGOUT("free_receive_structures: begin");
3918
3919	if (rxr->rx_spare_map) {
3920		bus_dmamap_destroy(rxr->rxtag, rxr->rx_spare_map);
3921		rxr->rx_spare_map = NULL;
3922	}
3923
3924	/* Cleanup any existing buffers */
3925	if (rxr->rx_buffers != NULL) {
3926		rx_buffer = &rxr->rx_buffers[0];
3927		for (int i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3928			if (rx_buffer->m_head != NULL) {
3929				bus_dmamap_sync(rxr->rxtag, rx_buffer->map,
3930				    BUS_DMASYNC_POSTREAD);
3931				bus_dmamap_unload(rxr->rxtag,
3932				    rx_buffer->map);
3933				m_freem(rx_buffer->m_head);
3934				rx_buffer->m_head = NULL;
3935			} else if (rx_buffer->map != NULL)
3936				bus_dmamap_unload(rxr->rxtag,
3937				    rx_buffer->map);
3938			if (rx_buffer->map != NULL) {
3939				bus_dmamap_destroy(rxr->rxtag,
3940				    rx_buffer->map);
3941				rx_buffer->map = NULL;
3942			}
3943		}
3944	}
3945
3946	if (rxr->rx_buffers != NULL) {
3947		free(rxr->rx_buffers, M_DEVBUF);
3948		rxr->rx_buffers = NULL;
3949	}
3950
3951	if (rxr->rxtag != NULL) {
3952		bus_dma_tag_destroy(rxr->rxtag);
3953		rxr->rxtag = NULL;
3954	}
3955}
3956/*********************************************************************
3957 *
3958 *  This routine executes in interrupt context. It replenishes
3959 *  the mbufs in the descriptor and sends data which has been
3960 *  dma'ed into host memory to upper layer.
3961 *
3962 *  We loop at most count times if count is > 0, or until done if
3963 *  count < 0.
3964 *
3965 *  Return TRUE if more to clean, FALSE otherwise
3966 *********************************************************************/
3967static bool
3968igb_rxeof(struct rx_ring *rxr, int count)
3969{
3970	struct adapter		*adapter = rxr->adapter;
3971	struct ifnet		*ifp;
3972	struct lro_ctrl		*lro = &rxr->lro;
3973	struct lro_entry	*queued;
3974	int			i;
3975	u32			staterr;
3976	union e1000_adv_rx_desc	*cur;
3977
3978
3979	IGB_RX_LOCK(rxr);
3980	ifp = adapter->ifp;
3981	i = rxr->next_to_check;
3982	cur = &rxr->rx_base[i];
3983	staterr = cur->wb.upper.status_error;
3984
3985	if (!(staterr & E1000_RXD_STAT_DD)) {
3986		IGB_RX_UNLOCK(rxr);
3987		return FALSE;
3988	}
3989
3990	/* Sync the ring */
3991	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
3992	    BUS_DMASYNC_POSTREAD);
3993
3994	/* Main clean loop */
3995	while ((staterr & E1000_RXD_STAT_DD) &&
3996	    (count != 0) &&
3997	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3998		struct mbuf *sendmp, *mh, *mp;
3999		u16 hlen, plen, hdr, ptype, len_adj;
4000		u8 dopayload, accept_frame, eop;
4001
4002		accept_frame = 1;
4003		hlen = plen = len_adj = 0;
4004		sendmp = mh = mp = NULL;
4005		ptype = (u16)(cur->wb.lower.lo_dword.data >> 4);
4006
4007		/* Sync the buffers */
4008		bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[i].map,
4009			    BUS_DMASYNC_POSTREAD);
4010
4011		/*
4012		** The way the hardware is configured to
4013		** split, it will ONLY use the header buffer
4014		** when header split is enabled, otherwise we
4015		** get normal behavior, ie, both header and
4016		** payload are DMA'd into the payload buffer.
4017		**
4018		** The fmp test is to catch the case where a
4019		** packet spans multiple descriptors, in that
4020		** case only the first header is valid.
4021		*/
4022		if ((igb_rx_hdr_split) && (rxr->fmp == NULL)){
4023			hdr = le16toh(cur->
4024			    wb.lower.lo_dword.hs_rss.hdr_info);
4025			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4026			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4027			if (hlen > IGB_HDR_BUF)
4028				hlen = IGB_HDR_BUF;
4029			plen = le16toh(cur->wb.upper.length);
4030			/* Handle the header mbuf */
4031			mh = rxr->rx_buffers[i].m_head;
4032			mh->m_len = hlen;
4033			dopayload = IGB_CLEAN_HEADER;
4034			/*
4035			** Get the payload length, this
4036			** could be zero if its a small
4037			** packet.
4038			*/
4039			if (plen) {
4040				mp = rxr->rx_buffers[i].m_pack;
4041				mp->m_len = plen;
4042				mp->m_next = NULL;
4043				mp->m_flags &= ~M_PKTHDR;
4044				mh->m_next = mp;
4045				mh->m_flags |= M_PKTHDR;
4046				dopayload = IGB_CLEAN_BOTH;
4047				rxr->rx_split_packets++;
4048			} else {  /* small packets */
4049				mh->m_flags &= ~M_PKTHDR;
4050				mh->m_next = NULL;
4051			}
4052		} else {
4053			/*
4054			** Either no header split, or a
4055			** secondary piece of a fragmented
4056			** split packet.
4057			*/
4058			mh = rxr->rx_buffers[i].m_pack;
4059			mh->m_flags |= M_PKTHDR;
4060			mh->m_len = le16toh(cur->wb.upper.length);
4061			dopayload = IGB_CLEAN_PAYLOAD;
4062		}
4063
4064		if (staterr & E1000_RXD_STAT_EOP) {
4065			count--;
4066			eop = 1;
4067			/*
4068			** Strip CRC and account for frag
4069			*/
4070			if (mp) {
4071				if (mp->m_len < ETHER_CRC_LEN) {
4072					/* a frag, how much is left? */
4073					len_adj = ETHER_CRC_LEN - mp->m_len;
4074					mp->m_len = 0;
4075				} else
4076					mp->m_len -= ETHER_CRC_LEN;
4077			} else { /* not split */
4078				if (mh->m_len < ETHER_CRC_LEN) {
4079					len_adj = ETHER_CRC_LEN - mh->m_len;
4080					mh->m_len = 0;
4081				} else
4082					mh->m_len -= ETHER_CRC_LEN;
4083			}
4084		} else
4085			eop = 0;
4086
4087		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)
4088			accept_frame = 0;
4089
4090		if (accept_frame) {
4091			if (igb_get_buf(rxr, i, dopayload) != 0) {
4092				ifp->if_iqdrops++;
4093				goto discard;
4094			}
4095			/* Initial frame - setup */
4096			if (rxr->fmp == NULL) {
4097				mh->m_flags |= M_PKTHDR;
4098				mh->m_pkthdr.len = mh->m_len;
4099				rxr->fmp = mh; /* Store the first mbuf */
4100				rxr->lmp = mh;
4101				if (mp) { /* Add payload if split */
4102					mh->m_pkthdr.len += mp->m_len;
4103					rxr->lmp = mh->m_next;
4104				}
4105			} else {
4106				/* Adjust for CRC frag */
4107				if (len_adj) {
4108					rxr->lmp->m_len -= len_adj;
4109					rxr->fmp->m_pkthdr.len -= len_adj;
4110				}
4111				/* Chain mbuf's together */
4112				mh->m_flags &= ~M_PKTHDR;
4113				rxr->lmp->m_next = mh;
4114				rxr->lmp = rxr->lmp->m_next;
4115				rxr->fmp->m_pkthdr.len += mh->m_len;
4116			}
4117
4118			if (eop) {
4119				bool sctp = ((ptype & 0x40) != 0);
4120				rxr->fmp->m_pkthdr.rcvif = ifp;
4121				ifp->if_ipackets++;
4122				rxr->rx_packets++;
4123				/* capture data for AIM */
4124				rxr->bytes += rxr->fmp->m_pkthdr.len;
4125				rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4126
4127				igb_rx_checksum(staterr, rxr->fmp, sctp);
4128				if (staterr & E1000_RXD_STAT_VP) {
4129#if __FreeBSD_version >= 700000
4130					rxr->fmp->m_pkthdr.ether_vtag =
4131					    le16toh(cur->wb.upper.vlan);
4132					rxr->fmp->m_flags |= M_VLANTAG;
4133#else
4134					VLAN_INPUT_TAG_NEW(ifp, rxr->fmp,
4135					    (le16toh(cur->wb.upper.vlan) &
4136					    E1000_RXD_SPC_VLAN_MASK));
4137#endif
4138				}
4139				sendmp = rxr->fmp;
4140				rxr->fmp = NULL;
4141				rxr->lmp = NULL;
4142			}
4143		} else {
4144			ifp->if_ierrors++;
4145discard:
4146			/* Reuse loaded DMA map and just update mbuf chain */
4147			if (hlen) {
4148				mh = rxr->rx_buffers[i].m_head;
4149				mh->m_len = MHLEN;
4150				mh->m_next = NULL;
4151			}
4152			mp = rxr->rx_buffers[i].m_pack;
4153			mp->m_len = mp->m_pkthdr.len = adapter->rx_mbuf_sz;
4154			mp->m_data = mp->m_ext.ext_buf;
4155			mp->m_next = NULL;
4156			if (adapter->max_frame_size <=
4157			    (MCLBYTES - ETHER_ALIGN))
4158				m_adj(mp, ETHER_ALIGN);
4159			if (rxr->fmp != NULL) {
4160				/* handles the whole chain */
4161				m_freem(rxr->fmp);
4162				rxr->fmp = NULL;
4163				rxr->lmp = NULL;
4164			}
4165			sendmp = NULL;
4166		}
4167
4168		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4169		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4170
4171		rxr->last_cleaned = i; /* For updating tail */
4172
4173		/* Advance our pointers to the next descriptor. */
4174		if (++i == adapter->num_rx_desc)
4175			i = 0;
4176
4177		/*
4178		** Note that we hold the RX lock thru
4179		** the following call so this ring's
4180		** next_to_check is not gonna change.
4181		*/
4182		if (sendmp != NULL) {
4183			/* Use LRO if possible */
4184			if ((!lro->lro_cnt) || (tcp_lro_rx(lro, sendmp, 0)))
4185				/* Pass up to the stack */
4186				(*ifp->if_input)(ifp, sendmp);
4187		}
4188
4189		/* Get the next descriptor */
4190		cur = &rxr->rx_base[i];
4191		staterr = cur->wb.upper.status_error;
4192	}
4193	rxr->next_to_check = i;
4194
4195	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
4196	E1000_WRITE_REG(&adapter->hw, E1000_RDT(rxr->me), rxr->last_cleaned);
4197
4198	/*
4199	 * Flush any outstanding LRO work
4200	 */
4201	while (!SLIST_EMPTY(&lro->lro_active)) {
4202		queued = SLIST_FIRST(&lro->lro_active);
4203		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4204		tcp_lro_flush(lro, queued);
4205	}
4206
4207	IGB_RX_UNLOCK(rxr);
4208
4209	/*
4210	** We still have cleaning to do?
4211	** Schedule another interrupt if so.
4212	*/
4213	if (staterr & E1000_RXD_STAT_DD) {
4214		E1000_WRITE_REG(&adapter->hw, E1000_EICS, rxr->eims);
4215		return TRUE;
4216	}
4217
4218	return FALSE;
4219}
4220
4221
4222/*********************************************************************
4223 *
4224 *  Verify that the hardware indicated that the checksum is valid.
4225 *  Inform the stack about the status of checksum so that stack
4226 *  doesn't spend time verifying the checksum.
4227 *
4228 *********************************************************************/
4229static void
4230igb_rx_checksum(u32 staterr, struct mbuf *mp, bool sctp)
4231{
4232	u16 status = (u16)staterr;
4233	u8  errors = (u8) (staterr >> 24);
4234
4235	/* Ignore Checksum bit is set */
4236	if (status & E1000_RXD_STAT_IXSM) {
4237		mp->m_pkthdr.csum_flags = 0;
4238		return;
4239	}
4240
4241	if (status & E1000_RXD_STAT_IPCS) {
4242		/* Did it pass? */
4243		if (!(errors & E1000_RXD_ERR_IPE)) {
4244			/* IP Checksum Good */
4245			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4246			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4247		} else
4248			mp->m_pkthdr.csum_flags = 0;
4249	}
4250
4251	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4252		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4253#if __FreeBSD_version >= 800000
4254		if (sctp) /* reassign */
4255			type = CSUM_SCTP_VALID;
4256#endif
4257		/* Did it pass? */
4258		if (!(errors & E1000_RXD_ERR_TCPE)) {
4259			mp->m_pkthdr.csum_flags |= type;
4260			if (!sctp)
4261				mp->m_pkthdr.csum_data = htons(0xffff);
4262		}
4263	}
4264	return;
4265}
4266
4267/*
4268 * This routine is run via an vlan
4269 * config EVENT
4270 */
4271static void
4272igb_register_vlan(void *unused, struct ifnet *ifp, u16 vtag)
4273{
4274	struct adapter	*adapter = ifp->if_softc;
4275	u32		ctrl, rctl, index, vfta;
4276
4277	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4278	ctrl |= E1000_CTRL_VME;
4279	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4280
4281	/* Setup for Hardware Filter */
4282	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4283	rctl |= E1000_RCTL_VFE;
4284	rctl &= ~E1000_RCTL_CFIEN;
4285	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4286
4287	/* Make entry in the hardware filter table */
4288	index = ((vtag >> 5) & 0x7F);
4289	vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index);
4290	vfta |= (1 << (vtag & 0x1F));
4291	E1000_WRITE_REG_ARRAY(&adapter->hw, E1000_VFTA, index, vfta);
4292
4293	/* Update the frame size */
4294	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4295	    adapter->max_frame_size + VLAN_TAG_SIZE);
4296
4297}
4298
4299/*
4300 * This routine is run via an vlan
4301 * unconfig EVENT
4302 */
4303static void
4304igb_unregister_vlan(void *unused, struct ifnet *ifp, u16 vtag)
4305{
4306	struct adapter	*adapter = ifp->if_softc;
4307	u32		index, vfta;
4308
4309	/* Remove entry in the hardware filter table */
4310	index = ((vtag >> 5) & 0x7F);
4311	vfta = E1000_READ_REG_ARRAY(&adapter->hw, E1000_VFTA, index);
4312	vfta &= ~(1 << (vtag & 0x1F));
4313	E1000_WRITE_REG_ARRAY(&adapter->hw, E1000_VFTA, index, vfta);
4314	/* Have all vlans unregistered? */
4315	if (adapter->ifp->if_vlantrunk == NULL) {
4316		u32 rctl;
4317		/* Turn off the filter table */
4318		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4319		rctl &= ~E1000_RCTL_VFE;
4320		rctl |= E1000_RCTL_CFIEN;
4321		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4322		/* Reset the frame size */
4323		E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4324		    adapter->max_frame_size);
4325	}
4326}
4327
4328static void
4329igb_enable_intr(struct adapter *adapter)
4330{
4331	/* With RSS set up what to auto clear */
4332	if (adapter->msix_mem) {
4333		E1000_WRITE_REG(&adapter->hw, E1000_EIAC,
4334		    adapter->eims_mask);
4335		E1000_WRITE_REG(&adapter->hw, E1000_EIAM,
4336		    adapter->eims_mask);
4337		E1000_WRITE_REG(&adapter->hw, E1000_EIMS,
4338		    adapter->eims_mask);
4339		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4340		    E1000_IMS_LSC);
4341	} else {
4342		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4343		    IMS_ENABLE_MASK);
4344	}
4345	E1000_WRITE_FLUSH(&adapter->hw);
4346
4347	return;
4348}
4349
4350static void
4351igb_disable_intr(struct adapter *adapter)
4352{
4353	if (adapter->msix_mem) {
4354		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4355		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4356	}
4357	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4358	E1000_WRITE_FLUSH(&adapter->hw);
4359	return;
4360}
4361
4362/*
4363 * Bit of a misnomer, what this really means is
4364 * to enable OS management of the system... aka
4365 * to disable special hardware management features
4366 */
4367static void
4368igb_init_manageability(struct adapter *adapter)
4369{
4370	if (adapter->has_manage) {
4371		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4372		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4373
4374		/* disable hardware interception of ARP */
4375		manc &= ~(E1000_MANC_ARP_EN);
4376
4377                /* enable receiving management packets to the host */
4378		manc |= E1000_MANC_EN_MNG2HOST;
4379		manc2h |= 1 << 5;  /* Mng Port 623 */
4380		manc2h |= 1 << 6;  /* Mng Port 664 */
4381		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4382		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4383	}
4384}
4385
4386/*
4387 * Give control back to hardware management
4388 * controller if there is one.
4389 */
4390static void
4391igb_release_manageability(struct adapter *adapter)
4392{
4393	if (adapter->has_manage) {
4394		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4395
4396		/* re-enable hardware interception of ARP */
4397		manc |= E1000_MANC_ARP_EN;
4398		manc &= ~E1000_MANC_EN_MNG2HOST;
4399
4400		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4401	}
4402}
4403
4404/*
4405 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4406 * For ASF and Pass Through versions of f/w this means that
4407 * the driver is loaded.
4408 *
4409 */
4410static void
4411igb_get_hw_control(struct adapter *adapter)
4412{
4413	u32 ctrl_ext;
4414
4415	/* Let firmware know the driver has taken over */
4416	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4417	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4418	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4419}
4420
4421/*
4422 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4423 * For ASF and Pass Through versions of f/w this means that the
4424 * driver is no longer loaded.
4425 *
4426 */
4427static void
4428igb_release_hw_control(struct adapter *adapter)
4429{
4430	u32 ctrl_ext;
4431
4432	/* Let firmware taken over control of h/w */
4433	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4434	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4435	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4436}
4437
4438static int
4439igb_is_valid_ether_addr(uint8_t *addr)
4440{
4441	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4442
4443	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4444		return (FALSE);
4445	}
4446
4447	return (TRUE);
4448}
4449
4450
4451/*
4452 * Enable PCI Wake On Lan capability
4453 */
4454void
4455igb_enable_wakeup(device_t dev)
4456{
4457	u16     cap, status;
4458	u8      id;
4459
4460	/* First find the capabilities pointer*/
4461	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4462	/* Read the PM Capabilities */
4463	id = pci_read_config(dev, cap, 1);
4464	if (id != PCIY_PMG)     /* Something wrong */
4465		return;
4466	/* OK, we have the power capabilities, so
4467	   now get the status register */
4468	cap += PCIR_POWER_STATUS;
4469	status = pci_read_config(dev, cap, 2);
4470	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4471	pci_write_config(dev, cap, status, 2);
4472	return;
4473}
4474
4475
4476/**********************************************************************
4477 *
4478 *  Update the board statistics counters.
4479 *
4480 **********************************************************************/
4481static void
4482igb_update_stats_counters(struct adapter *adapter)
4483{
4484	struct ifnet   *ifp;
4485
4486	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4487	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4488		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4489		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4490	}
4491	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4492	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4493	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4494	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4495
4496	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4497	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4498	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4499	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4500	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4501	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4502	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4503	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4504	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4505	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4506	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4507	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4508	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4509	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4510	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4511	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4512	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4513	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4514	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4515	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4516
4517	/* For the 64-bit byte counters the low dword must be read first. */
4518	/* Both registers clear on the read of the high dword */
4519
4520	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCH);
4521	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCH);
4522
4523	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4524	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4525	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4526	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4527	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4528
4529	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4530	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4531
4532	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4533	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4534	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4535	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4536	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4537	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4538	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4539	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4540	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4541	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4542
4543	adapter->stats.algnerrc +=
4544		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4545	adapter->stats.rxerrc +=
4546		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4547	adapter->stats.tncrs +=
4548		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4549	adapter->stats.cexterr +=
4550		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4551	adapter->stats.tsctc +=
4552		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4553	adapter->stats.tsctfc +=
4554		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4555	ifp = adapter->ifp;
4556
4557	ifp->if_collisions = adapter->stats.colc;
4558
4559	/* Rx Errors */
4560	ifp->if_ierrors = adapter->dropped_pkts + adapter->stats.rxerrc +
4561	    adapter->stats.crcerrs + adapter->stats.algnerrc +
4562	    adapter->stats.ruc + adapter->stats.roc +
4563	    adapter->stats.mpc + adapter->stats.cexterr;
4564
4565	/* Tx Errors */
4566	ifp->if_oerrors = adapter->stats.ecol +
4567	    adapter->stats.latecol + adapter->watchdog_events;
4568}
4569
4570
4571/**********************************************************************
4572 *
4573 *  This routine is called only when igb_display_debug_stats is enabled.
4574 *  This routine provides a way to take a look at important statistics
4575 *  maintained by the driver and hardware.
4576 *
4577 **********************************************************************/
4578static void
4579igb_print_debug_info(struct adapter *adapter)
4580{
4581	device_t dev = adapter->dev;
4582	struct rx_ring *rxr = adapter->rx_rings;
4583	struct tx_ring *txr = adapter->tx_rings;
4584	uint8_t *hw_addr = adapter->hw.hw_addr;
4585
4586	device_printf(dev, "Adapter hardware address = %p \n", hw_addr);
4587	device_printf(dev, "CTRL = 0x%x RCTL = 0x%x \n",
4588	    E1000_READ_REG(&adapter->hw, E1000_CTRL),
4589	    E1000_READ_REG(&adapter->hw, E1000_RCTL));
4590
4591#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4592	device_printf(dev, "IMS = 0x%x EIMS = 0x%x \n",
4593	    E1000_READ_REG(&adapter->hw, E1000_IMS),
4594	    E1000_READ_REG(&adapter->hw, E1000_EIMS));
4595#endif
4596
4597	device_printf(dev, "Packet buffer = Tx=%dk Rx=%dk \n",
4598	    ((E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff0000) >> 16),\
4599	    (E1000_READ_REG(&adapter->hw, E1000_PBA) & 0xffff) );
4600	device_printf(dev, "Flow control watermarks high = %d low = %d\n",
4601	    adapter->hw.fc.high_water,
4602	    adapter->hw.fc.low_water);
4603
4604	for (int i = 0; i < adapter->num_tx_queues; i++, txr++) {
4605		device_printf(dev, "Queue(%d) tdh = %d, tdt = %d\n", i,
4606		    E1000_READ_REG(&adapter->hw, E1000_TDH(i)),
4607		    E1000_READ_REG(&adapter->hw, E1000_TDT(i)));
4608		device_printf(dev, "no descriptors avail event = %lld\n",
4609		    (long long)txr->no_desc_avail);
4610		device_printf(dev, "TX(%d) MSIX IRQ Handled = %lld\n", txr->me,
4611		    (long long)txr->tx_irq);
4612		device_printf(dev, "TX(%d) Packets sent = %lld\n", txr->me,
4613		    (long long)txr->tx_packets);
4614	}
4615
4616	for (int i = 0; i < adapter->num_rx_queues; i++, rxr++) {
4617		struct lro_ctrl *lro = &rxr->lro;
4618		device_printf(dev, "Queue(%d) rdh = %d, rdt = %d\n", i,
4619		    E1000_READ_REG(&adapter->hw, E1000_RDH(i)),
4620		    E1000_READ_REG(&adapter->hw, E1000_RDT(i)));
4621		device_printf(dev, "RX(%d) Packets received = %lld\n", rxr->me,
4622		    (long long)rxr->rx_packets);
4623		device_printf(dev, "RX(%d) Split Packets = %lld\n", rxr->me,
4624		    (long long)rxr->rx_split_packets);
4625		device_printf(dev, "RX(%d) Byte count = %lld\n", rxr->me,
4626		    (long long)rxr->rx_bytes);
4627		device_printf(dev, "RX(%d) MSIX IRQ Handled = %lld\n", rxr->me,
4628		    (long long)rxr->rx_irq);
4629		device_printf(dev,"RX(%d) LRO Queued= %d\n",
4630		    rxr->me, lro->lro_queued);
4631		device_printf(dev,"RX(%d) LRO Flushed= %d\n",
4632		    rxr->me, lro->lro_flushed);
4633	}
4634
4635	device_printf(dev, "LINK MSIX IRQ Handled = %u\n", adapter->link_irq);
4636
4637	device_printf(dev, "Mbuf defrag failed = %ld\n",
4638	    adapter->mbuf_defrag_failed);
4639	device_printf(dev, "Std mbuf header failed = %ld\n",
4640	    adapter->mbuf_header_failed);
4641	device_printf(dev, "Std mbuf packet failed = %ld\n",
4642	    adapter->mbuf_packet_failed);
4643	device_printf(dev, "Driver dropped packets = %ld\n",
4644	    adapter->dropped_pkts);
4645	device_printf(dev, "Driver tx dma failure in xmit = %ld\n",
4646		adapter->no_tx_dma_setup);
4647}
4648
4649static void
4650igb_print_hw_stats(struct adapter *adapter)
4651{
4652	device_t dev = adapter->dev;
4653
4654	device_printf(dev, "Excessive collisions = %lld\n",
4655	    (long long)adapter->stats.ecol);
4656#if	(DEBUG_HW > 0)  /* Dont output these errors normally */
4657	device_printf(dev, "Symbol errors = %lld\n",
4658	    (long long)adapter->stats.symerrs);
4659#endif
4660	device_printf(dev, "Sequence errors = %lld\n",
4661	    (long long)adapter->stats.sec);
4662	device_printf(dev, "Defer count = %lld\n",
4663	    (long long)adapter->stats.dc);
4664	device_printf(dev, "Missed Packets = %lld\n",
4665	    (long long)adapter->stats.mpc);
4666	device_printf(dev, "Receive No Buffers = %lld\n",
4667	    (long long)adapter->stats.rnbc);
4668	/* RLEC is inaccurate on some hardware, calculate our own. */
4669	device_printf(dev, "Receive Length Errors = %lld\n",
4670	    ((long long)adapter->stats.roc + (long long)adapter->stats.ruc));
4671	device_printf(dev, "Receive errors = %lld\n",
4672	    (long long)adapter->stats.rxerrc);
4673	device_printf(dev, "Crc errors = %lld\n",
4674	    (long long)adapter->stats.crcerrs);
4675	device_printf(dev, "Alignment errors = %lld\n",
4676	    (long long)adapter->stats.algnerrc);
4677	/* On 82575 these are collision counts */
4678	device_printf(dev, "Collision/Carrier extension errors = %lld\n",
4679	    (long long)adapter->stats.cexterr);
4680	device_printf(dev, "RX overruns = %ld\n", adapter->rx_overruns);
4681	device_printf(dev, "watchdog timeouts = %ld\n",
4682	    adapter->watchdog_events);
4683	device_printf(dev, "XON Rcvd = %lld\n",
4684	    (long long)adapter->stats.xonrxc);
4685	device_printf(dev, "XON Xmtd = %lld\n",
4686	    (long long)adapter->stats.xontxc);
4687	device_printf(dev, "XOFF Rcvd = %lld\n",
4688	    (long long)adapter->stats.xoffrxc);
4689	device_printf(dev, "XOFF Xmtd = %lld\n",
4690	    (long long)adapter->stats.xofftxc);
4691	device_printf(dev, "Good Packets Rcvd = %lld\n",
4692	    (long long)adapter->stats.gprc);
4693	device_printf(dev, "Good Packets Xmtd = %lld\n",
4694	    (long long)adapter->stats.gptc);
4695	device_printf(dev, "TSO Contexts Xmtd = %lld\n",
4696	    (long long)adapter->stats.tsctc);
4697	device_printf(dev, "TSO Contexts Failed = %lld\n",
4698	    (long long)adapter->stats.tsctfc);
4699}
4700
4701/**********************************************************************
4702 *
4703 *  This routine provides a way to dump out the adapter eeprom,
4704 *  often a useful debug/service tool. This only dumps the first
4705 *  32 words, stuff that matters is in that extent.
4706 *
4707 **********************************************************************/
4708static void
4709igb_print_nvm_info(struct adapter *adapter)
4710{
4711	u16	eeprom_data;
4712	int	i, j, row = 0;
4713
4714	/* Its a bit crude, but it gets the job done */
4715	printf("\nInterface EEPROM Dump:\n");
4716	printf("Offset\n0x0000  ");
4717	for (i = 0, j = 0; i < 32; i++, j++) {
4718		if (j == 8) { /* Make the offset block */
4719			j = 0; ++row;
4720			printf("\n0x00%x0  ",row);
4721		}
4722		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4723		printf("%04x ", eeprom_data);
4724	}
4725	printf("\n");
4726}
4727
4728static int
4729igb_sysctl_debug_info(SYSCTL_HANDLER_ARGS)
4730{
4731	struct adapter *adapter;
4732	int error;
4733	int result;
4734
4735	result = -1;
4736	error = sysctl_handle_int(oidp, &result, 0, req);
4737
4738	if (error || !req->newptr)
4739		return (error);
4740
4741	if (result == 1) {
4742		adapter = (struct adapter *)arg1;
4743		igb_print_debug_info(adapter);
4744	}
4745	/*
4746	 * This value will cause a hex dump of the
4747	 * first 32 16-bit words of the EEPROM to
4748	 * the screen.
4749	 */
4750	if (result == 2) {
4751		adapter = (struct adapter *)arg1;
4752		igb_print_nvm_info(adapter);
4753        }
4754
4755	return (error);
4756}
4757
4758
4759static int
4760igb_sysctl_stats(SYSCTL_HANDLER_ARGS)
4761{
4762	struct adapter *adapter;
4763	int error;
4764	int result;
4765
4766	result = -1;
4767	error = sysctl_handle_int(oidp, &result, 0, req);
4768
4769	if (error || !req->newptr)
4770		return (error);
4771
4772	if (result == 1) {
4773		adapter = (struct adapter *)arg1;
4774		igb_print_hw_stats(adapter);
4775	}
4776
4777	return (error);
4778}
4779
4780static void
4781igb_add_rx_process_limit(struct adapter *adapter, const char *name,
4782	const char *description, int *limit, int value)
4783{
4784	*limit = value;
4785	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4786	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4787	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
4788}
4789
4790#ifdef IGB_TIMESYNC
4791/*
4792 * Initialize the Time Sync Feature
4793 */
4794static int
4795igb_tsync_init(struct adapter *adapter)
4796{
4797	device_t	dev = adapter->dev;
4798	u32		tx_ctl, rx_ctl, val;
4799
4800
4801	E1000_WRITE_REG(&adapter->hw, E1000_TIMINCA, (1<<24) |
4802	    20833/PICOSECS_PER_TICK);
4803
4804	adapter->last_stamp =  E1000_READ_REG(&adapter->hw, E1000_SYSTIML);
4805	adapter->last_stamp |= (u64)E1000_READ_REG(&adapter->hw,
4806	    E1000_SYSTIMH) << 32ULL;
4807
4808	/* Enable the TX side */
4809	tx_ctl =  E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4810	tx_ctl |= 0x10;
4811	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCTXCTL, tx_ctl);
4812	E1000_WRITE_FLUSH(&adapter->hw);
4813
4814	tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4815	if ((tx_ctl & 0x10) == 0) {
4816     		device_printf(dev, "Failed to enable TX timestamping\n");
4817		return (ENXIO);
4818	}
4819
4820	/* Enable RX */
4821	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4822	rx_ctl |= 0x10; /* Enable the feature */
4823	rx_ctl |= 0x04; /* This value turns on Ver 1 and 2 */
4824	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCTL, rx_ctl);
4825
4826	/*
4827	 * Ethertype Filter Queue Filter[0][15:0] = 0x88F7 (Ethertype)
4828	 * Ethertype Filter Queue Filter[0][26] = 0x1 (Enable filter)
4829	 * Ethertype Filter Queue Filter[0][31] = 0x1 (Enable Timestamping)
4830	 */
4831	E1000_WRITE_REG(&adapter->hw, E1000_ETQF(0), 0x440088f7);
4832	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCFG, 0x0);
4833
4834	/*
4835	 * Source Port Queue Filter Setup:
4836	 *  this is for UDP port filtering
4837	 */
4838	E1000_WRITE_REG(&adapter->hw, E1000_SPQF(0), TSYNC_PORT);
4839	/* Protocol = UDP, enable Timestamp, and filter on source/protocol */
4840	val = (0x11 | (1 << 27) | (6 << 28));
4841	E1000_WRITE_REG(&adapter->hw, E1000_FTQF(0), val);
4842
4843	E1000_WRITE_FLUSH(&adapter->hw);
4844
4845	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4846	if ((rx_ctl & 0x10) == 0) {
4847     		device_printf(dev, "Failed to enable RX timestamping\n");
4848		return (ENXIO);
4849	}
4850
4851	device_printf(dev, "IEEE 1588 Precision Time Protocol enabled\n");
4852
4853	return (0);
4854}
4855
4856/*
4857 * Disable the Time Sync Feature
4858 */
4859static void
4860igb_tsync_disable(struct adapter *adapter)
4861{
4862	u32		tx_ctl, rx_ctl;
4863
4864	tx_ctl =  E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4865	tx_ctl &= ~0x10;
4866	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCTXCTL, tx_ctl);
4867	E1000_WRITE_FLUSH(&adapter->hw);
4868
4869	/* Invalidate TX Timestamp */
4870	E1000_READ_REG(&adapter->hw, E1000_TXSTMPH);
4871
4872	tx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCTXCTL);
4873	if (tx_ctl & 0x10)
4874     		HW_DEBUGOUT("Failed to disable TX timestamping\n");
4875
4876	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4877	rx_ctl &= ~0x10;
4878
4879	E1000_WRITE_REG(&adapter->hw, E1000_TSYNCRXCTL, rx_ctl);
4880	E1000_WRITE_FLUSH(&adapter->hw);
4881
4882	/* Invalidate RX Timestamp */
4883	E1000_READ_REG(&adapter->hw, E1000_RXSATRH);
4884
4885	rx_ctl = E1000_READ_REG(&adapter->hw, E1000_TSYNCRXCTL);
4886	if (rx_ctl & 0x10)
4887		HW_DEBUGOUT("Failed to disable RX timestamping\n");
4888
4889	return;
4890}
4891
4892#endif /* IGB_TIMESYNC */
4893