if_igb.c revision 228415
196896Sgrog/******************************************************************************
296896Sgrog
396896Sgrog  Copyright (c) 2001-2011, Intel Corporation
496896Sgrog  All rights reserved.
596896Sgrog
696897Sgrog  Redistribution and use in source and binary forms, with or without
796897Sgrog  modification, are permitted provided that the following conditions are met:
896897Sgrog
996897Sgrog   1. Redistributions of source code must retain the above copyright notice,
1096897Sgrog      this list of conditions and the following disclaimer.
1196897Sgrog
1296897Sgrog   2. Redistributions in binary form must reproduce the above copyright
1396897Sgrog      notice, this list of conditions and the following disclaimer in the
1496897Sgrog      documentation and/or other materials provided with the distribution.
1596897Sgrog
1696897Sgrog   3. Neither the name of the Intel Corporation nor the names of its
1796897Sgrog      contributors may be used to endorse or promote products derived from
1896897Sgrog      this software without specific prior written permission.
1996897Sgrog
2096897Sgrog  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2196897Sgrog  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2296897Sgrog  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2396897Sgrog  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
2496897Sgrog  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2596897Sgrog  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2696897Sgrog  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2796897Sgrog  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2896897Sgrog  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2996897Sgrog  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
3096897Sgrog  POSSIBILITY OF SUCH DAMAGE.
3196897Sgrog
3296897Sgrog******************************************************************************/
3396897Sgrog/*$FreeBSD: head/sys/dev/e1000/if_igb.c 228415 2011-12-11 18:46:14Z jfv $*/
3496897Sgrog
3596897Sgrog
3696897Sgrog#ifdef HAVE_KERNEL_OPTION_HEADERS
3796897Sgrog#include "opt_device_polling.h"
3896897Sgrog#include "opt_inet.h"
3996897Sgrog#include "opt_inet6.h"
4096897Sgrog#include "opt_altq.h"
4196897Sgrog#endif
4296897Sgrog
4396896Sgrog#include <sys/param.h>
4496896Sgrog#include <sys/systm.h>
4596896Sgrog#if __FreeBSD_version >= 800000
4696896Sgrog#include <sys/buf_ring.h>
4796896Sgrog#endif
4896896Sgrog#include <sys/bus.h>
4996896Sgrog#include <sys/endian.h>
5096896Sgrog#include <sys/kernel.h>
5196896Sgrog#include <sys/kthread.h>
5296896Sgrog#include <sys/malloc.h>
5396896Sgrog#include <sys/mbuf.h>
5496896Sgrog#include <sys/module.h>
5596896Sgrog#include <sys/rman.h>
5696896Sgrog#include <sys/socket.h>
5796896Sgrog#include <sys/sockio.h>
5896896Sgrog#include <sys/sysctl.h>
5996896Sgrog#include <sys/taskqueue.h>
6096896Sgrog#include <sys/eventhandler.h>
6196896Sgrog#include <sys/pcpu.h>
6296896Sgrog#include <sys/smp.h>
6396896Sgrog#include <machine/smp.h>
6496896Sgrog#include <machine/bus.h>
6596896Sgrog#include <machine/resource.h>
6696896Sgrog
6796896Sgrog#include <net/bpf.h>
6896896Sgrog#include <net/ethernet.h>
6996896Sgrog#include <net/if.h>
7096896Sgrog#include <net/if_arp.h>
7196896Sgrog#include <net/if_dl.h>
7296896Sgrog#include <net/if_media.h>
7396896Sgrog
7496896Sgrog#include <net/if_types.h>
7596896Sgrog#include <net/if_vlan_var.h>
7696896Sgrog
7796896Sgrog#include <netinet/in_systm.h>
7896896Sgrog#include <netinet/in.h>
7996896Sgrog#include <netinet/if_ether.h>
8096896Sgrog#include <netinet/ip.h>
8196896Sgrog#include <netinet/ip6.h>
8296896Sgrog#include <netinet/tcp.h>
8396896Sgrog#include <netinet/tcp_lro.h>
8496896Sgrog#include <netinet/udp.h>
8596896Sgrog
8696896Sgrog#include <machine/in_cksum.h>
8796896Sgrog#include <dev/led/led.h>
8896896Sgrog#include <dev/pci/pcivar.h>
8996896Sgrog#include <dev/pci/pcireg.h>
9096896Sgrog
9196896Sgrog#include "e1000_api.h"
9296896Sgrog#include "e1000_82575.h"
9396896Sgrog#include "if_igb.h"
9496896Sgrog
9596896Sgrog/*********************************************************************
9696896Sgrog *  Set this to one to display debug statistics
9796896Sgrog *********************************************************************/
9896896Sgrogint	igb_display_debug_stats = 0;
9996896Sgrog
10096896Sgrog/*********************************************************************
10196896Sgrog *  Driver version:
10296896Sgrog *********************************************************************/
10396896Sgrogchar igb_driver_version[] = "version - 2.3.1";
10496896Sgrog
10596896Sgrog
10696896Sgrog/*********************************************************************
10796896Sgrog *  PCI Device ID Table
10896896Sgrog *
10996896Sgrog *  Used by probe to select devices to load on
11096896Sgrog *  Last field stores an index into e1000_strings
11196896Sgrog *  Last entry must be all 0s
11296896Sgrog *
11396896Sgrog *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
11496896Sgrog *********************************************************************/
11596896Sgrog
11696896Sgrogstatic igb_vendor_info_t igb_vendor_info_array[] =
11796896Sgrog{
11896896Sgrog	{ 0x8086, E1000_DEV_ID_82575EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
11996896Sgrog	{ 0x8086, E1000_DEV_ID_82575EB_FIBER_SERDES,
12096896Sgrog						PCI_ANY_ID, PCI_ANY_ID, 0},
12196896Sgrog	{ 0x8086, E1000_DEV_ID_82575GB_QUAD_COPPER,
12296896Sgrog						PCI_ANY_ID, PCI_ANY_ID, 0},
12396896Sgrog	{ 0x8086, E1000_DEV_ID_82576,		PCI_ANY_ID, PCI_ANY_ID, 0},
12496896Sgrog	{ 0x8086, E1000_DEV_ID_82576_NS,	PCI_ANY_ID, PCI_ANY_ID, 0},
12596896Sgrog	{ 0x8086, E1000_DEV_ID_82576_NS_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
12696896Sgrog	{ 0x8086, E1000_DEV_ID_82576_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
12796896Sgrog	{ 0x8086, E1000_DEV_ID_82576_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
12896896Sgrog	{ 0x8086, E1000_DEV_ID_82576_SERDES_QUAD,
12996896Sgrog						PCI_ANY_ID, PCI_ANY_ID, 0},
13096896Sgrog	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER,
13196896Sgrog						PCI_ANY_ID, PCI_ANY_ID, 0},
13296896Sgrog	{ 0x8086, E1000_DEV_ID_82576_QUAD_COPPER_ET2,
13396896Sgrog						PCI_ANY_ID, PCI_ANY_ID, 0},
13496896Sgrog	{ 0x8086, E1000_DEV_ID_82576_VF,	PCI_ANY_ID, PCI_ANY_ID, 0},
13596896Sgrog	{ 0x8086, E1000_DEV_ID_82580_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
13696896Sgrog	{ 0x8086, E1000_DEV_ID_82580_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
13796896Sgrog	{ 0x8086, E1000_DEV_ID_82580_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
13896896Sgrog	{ 0x8086, E1000_DEV_ID_82580_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
13996896Sgrog	{ 0x8086, E1000_DEV_ID_82580_COPPER_DUAL,
14096896Sgrog						PCI_ANY_ID, PCI_ANY_ID, 0},
14196896Sgrog	{ 0x8086, E1000_DEV_ID_82580_QUAD_FIBER,
14296896Sgrog						PCI_ANY_ID, PCI_ANY_ID, 0},
14396896Sgrog	{ 0x8086, E1000_DEV_ID_DH89XXCC_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
14496896Sgrog	{ 0x8086, E1000_DEV_ID_DH89XXCC_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
14596896Sgrog	{ 0x8086, E1000_DEV_ID_DH89XXCC_SFP,	PCI_ANY_ID, PCI_ANY_ID, 0},
14696896Sgrog	{ 0x8086, E1000_DEV_ID_DH89XXCC_BACKPLANE,
14796896Sgrog						PCI_ANY_ID, PCI_ANY_ID, 0},
14896896Sgrog	{ 0x8086, E1000_DEV_ID_I350_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
14996896Sgrog	{ 0x8086, E1000_DEV_ID_I350_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
15096896Sgrog	{ 0x8086, E1000_DEV_ID_I350_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
15196896Sgrog	{ 0x8086, E1000_DEV_ID_I350_SGMII,	PCI_ANY_ID, PCI_ANY_ID, 0},
15296896Sgrog	{ 0x8086, E1000_DEV_ID_I350_VF,		PCI_ANY_ID, PCI_ANY_ID, 0},
15396896Sgrog	/* required last entry */
15496896Sgrog	{ 0, 0, 0, 0, 0}
15596896Sgrog};
15696896Sgrog
15796896Sgrog/*********************************************************************
15896896Sgrog *  Table of branding strings for all supported NICs.
15996896Sgrog *********************************************************************/
16096896Sgrog
16196896Sgrogstatic char *igb_strings[] = {
16296896Sgrog	"Intel(R) PRO/1000 Network Connection"
16396896Sgrog};
16496896Sgrog
16596896Sgrog/*********************************************************************
16696896Sgrog *  Function prototypes
16796896Sgrog *********************************************************************/
16896896Sgrogstatic int	igb_probe(device_t);
16996896Sgrogstatic int	igb_attach(device_t);
17096896Sgrogstatic int	igb_detach(device_t);
17196896Sgrogstatic int	igb_shutdown(device_t);
17296896Sgrogstatic int	igb_suspend(device_t);
17396896Sgrogstatic int	igb_resume(device_t);
17496896Sgrogstatic void	igb_start(struct ifnet *);
17596896Sgrogstatic void	igb_start_locked(struct tx_ring *, struct ifnet *ifp);
17696896Sgrog#if __FreeBSD_version >= 800000
17796896Sgrogstatic int	igb_mq_start(struct ifnet *, struct mbuf *);
17896896Sgrogstatic int	igb_mq_start_locked(struct ifnet *,
17996896Sgrog		    struct tx_ring *, struct mbuf *);
18096896Sgrogstatic void	igb_qflush(struct ifnet *);
18196896Sgrog#endif
18296896Sgrogstatic int	igb_ioctl(struct ifnet *, u_long, caddr_t);
18396896Sgrogstatic void	igb_init(void *);
18496896Sgrogstatic void	igb_init_locked(struct adapter *);
18596896Sgrogstatic void	igb_stop(void *);
18696896Sgrogstatic void	igb_media_status(struct ifnet *, struct ifmediareq *);
18796896Sgrogstatic int	igb_media_change(struct ifnet *);
18896896Sgrogstatic void	igb_identify_hardware(struct adapter *);
18996896Sgrogstatic int	igb_allocate_pci_resources(struct adapter *);
19096896Sgrogstatic int	igb_allocate_msix(struct adapter *);
19196896Sgrogstatic int	igb_allocate_legacy(struct adapter *);
19296896Sgrogstatic int	igb_setup_msix(struct adapter *);
19396896Sgrogstatic void	igb_free_pci_resources(struct adapter *);
19496896Sgrogstatic void	igb_local_timer(void *);
19596896Sgrogstatic void	igb_reset(struct adapter *);
19696896Sgrogstatic int	igb_setup_interface(device_t, struct adapter *);
19796896Sgrogstatic int	igb_allocate_queues(struct adapter *);
19896896Sgrogstatic void	igb_configure_queues(struct adapter *);
19996896Sgrog
20096896Sgrogstatic int	igb_allocate_transmit_buffers(struct tx_ring *);
201static void	igb_setup_transmit_structures(struct adapter *);
202static void	igb_setup_transmit_ring(struct tx_ring *);
203static void	igb_initialize_transmit_units(struct adapter *);
204static void	igb_free_transmit_structures(struct adapter *);
205static void	igb_free_transmit_buffers(struct tx_ring *);
206
207static int	igb_allocate_receive_buffers(struct rx_ring *);
208static int	igb_setup_receive_structures(struct adapter *);
209static int	igb_setup_receive_ring(struct rx_ring *);
210static void	igb_initialize_receive_units(struct adapter *);
211static void	igb_free_receive_structures(struct adapter *);
212static void	igb_free_receive_buffers(struct rx_ring *);
213static void	igb_free_receive_ring(struct rx_ring *);
214
215static void	igb_enable_intr(struct adapter *);
216static void	igb_disable_intr(struct adapter *);
217static void	igb_update_stats_counters(struct adapter *);
218static bool	igb_txeof(struct tx_ring *);
219
220static __inline	void igb_rx_discard(struct rx_ring *, int);
221static __inline void igb_rx_input(struct rx_ring *,
222		    struct ifnet *, struct mbuf *, u32);
223
224static bool	igb_rxeof(struct igb_queue *, int, int *);
225static void	igb_rx_checksum(u32, struct mbuf *, u32);
226static int	igb_tx_ctx_setup(struct tx_ring *, struct mbuf *);
227static bool	igb_tso_setup(struct tx_ring *, struct mbuf *, int,
228		    struct ip *, struct tcphdr *);
229static void	igb_set_promisc(struct adapter *);
230static void	igb_disable_promisc(struct adapter *);
231static void	igb_set_multi(struct adapter *);
232static void	igb_update_link_status(struct adapter *);
233static void	igb_refresh_mbufs(struct rx_ring *, int);
234
235static void	igb_register_vlan(void *, struct ifnet *, u16);
236static void	igb_unregister_vlan(void *, struct ifnet *, u16);
237static void	igb_setup_vlan_hw_support(struct adapter *);
238
239static int	igb_xmit(struct tx_ring *, struct mbuf **);
240static int	igb_dma_malloc(struct adapter *, bus_size_t,
241		    struct igb_dma_alloc *, int);
242static void	igb_dma_free(struct adapter *, struct igb_dma_alloc *);
243static int	igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
244static void	igb_print_nvm_info(struct adapter *);
245static int 	igb_is_valid_ether_addr(u8 *);
246static void     igb_add_hw_stats(struct adapter *);
247
248static void	igb_vf_init_stats(struct adapter *);
249static void	igb_update_vf_stats_counters(struct adapter *);
250
251/* Management and WOL Support */
252static void	igb_init_manageability(struct adapter *);
253static void	igb_release_manageability(struct adapter *);
254static void     igb_get_hw_control(struct adapter *);
255static void     igb_release_hw_control(struct adapter *);
256static void     igb_enable_wakeup(device_t);
257static void     igb_led_func(void *, int);
258
259static int	igb_irq_fast(void *);
260static void	igb_msix_que(void *);
261static void	igb_msix_link(void *);
262static void	igb_handle_que(void *context, int pending);
263static void	igb_handle_link(void *context, int pending);
264
265static void	igb_set_sysctl_value(struct adapter *, const char *,
266		    const char *, int *, int);
267static int	igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
268static int	igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
269
270#ifdef DEVICE_POLLING
271static poll_handler_t igb_poll;
272#endif /* POLLING */
273
274/*********************************************************************
275 *  FreeBSD Device Interface Entry Points
276 *********************************************************************/
277
278static device_method_t igb_methods[] = {
279	/* Device interface */
280	DEVMETHOD(device_probe, igb_probe),
281	DEVMETHOD(device_attach, igb_attach),
282	DEVMETHOD(device_detach, igb_detach),
283	DEVMETHOD(device_shutdown, igb_shutdown),
284	DEVMETHOD(device_suspend, igb_suspend),
285	DEVMETHOD(device_resume, igb_resume),
286	{0, 0}
287};
288
289static driver_t igb_driver = {
290	"igb", igb_methods, sizeof(struct adapter),
291};
292
293static devclass_t igb_devclass;
294DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
295MODULE_DEPEND(igb, pci, 1, 1, 1);
296MODULE_DEPEND(igb, ether, 1, 1, 1);
297
298/*********************************************************************
299 *  Tunable default values.
300 *********************************************************************/
301
302/* Descriptor defaults */
303static int igb_rxd = IGB_DEFAULT_RXD;
304static int igb_txd = IGB_DEFAULT_TXD;
305TUNABLE_INT("hw.igb.rxd", &igb_rxd);
306TUNABLE_INT("hw.igb.txd", &igb_txd);
307
308/*
309** AIM: Adaptive Interrupt Moderation
310** which means that the interrupt rate
311** is varied over time based on the
312** traffic for that interrupt vector
313*/
314static int igb_enable_aim = TRUE;
315TUNABLE_INT("hw.igb.enable_aim", &igb_enable_aim);
316
317/*
318 * MSIX should be the default for best performance,
319 * but this allows it to be forced off for testing.
320 */
321static int igb_enable_msix = 1;
322TUNABLE_INT("hw.igb.enable_msix", &igb_enable_msix);
323
324/*
325** Tuneable Interrupt rate
326*/
327static int igb_max_interrupt_rate = 8000;
328TUNABLE_INT("hw.igb.max_interrupt_rate", &igb_max_interrupt_rate);
329
330/*
331** Header split causes the packet header to
332** be dma'd to a seperate mbuf from the payload.
333** this can have memory alignment benefits. But
334** another plus is that small packets often fit
335** into the header and thus use no cluster. Its
336** a very workload dependent type feature.
337*/
338static bool igb_header_split = FALSE;
339TUNABLE_INT("hw.igb.hdr_split", &igb_header_split);
340
341/*
342** This will autoconfigure based on
343** the number of CPUs if left at 0.
344*/
345static int igb_num_queues = 0;
346TUNABLE_INT("hw.igb.num_queues", &igb_num_queues);
347
348/*********************************************************************
349 *  Device identification routine
350 *
351 *  igb_probe determines if the driver should be loaded on
352 *  adapter based on PCI vendor/device id of the adapter.
353 *
354 *  return BUS_PROBE_DEFAULT on success, positive on failure
355 *********************************************************************/
356
357static int
358igb_probe(device_t dev)
359{
360	char		adapter_name[60];
361	uint16_t	pci_vendor_id = 0;
362	uint16_t	pci_device_id = 0;
363	uint16_t	pci_subvendor_id = 0;
364	uint16_t	pci_subdevice_id = 0;
365	igb_vendor_info_t *ent;
366
367	INIT_DEBUGOUT("igb_probe: begin");
368
369	pci_vendor_id = pci_get_vendor(dev);
370	if (pci_vendor_id != IGB_VENDOR_ID)
371		return (ENXIO);
372
373	pci_device_id = pci_get_device(dev);
374	pci_subvendor_id = pci_get_subvendor(dev);
375	pci_subdevice_id = pci_get_subdevice(dev);
376
377	ent = igb_vendor_info_array;
378	while (ent->vendor_id != 0) {
379		if ((pci_vendor_id == ent->vendor_id) &&
380		    (pci_device_id == ent->device_id) &&
381
382		    ((pci_subvendor_id == ent->subvendor_id) ||
383		    (ent->subvendor_id == PCI_ANY_ID)) &&
384
385		    ((pci_subdevice_id == ent->subdevice_id) ||
386		    (ent->subdevice_id == PCI_ANY_ID))) {
387			sprintf(adapter_name, "%s %s",
388				igb_strings[ent->index],
389				igb_driver_version);
390			device_set_desc_copy(dev, adapter_name);
391			return (BUS_PROBE_DEFAULT);
392		}
393		ent++;
394	}
395
396	return (ENXIO);
397}
398
399/*********************************************************************
400 *  Device initialization routine
401 *
402 *  The attach entry point is called when the driver is being loaded.
403 *  This routine identifies the type of hardware, allocates all resources
404 *  and initializes the hardware.
405 *
406 *  return 0 on success, positive on failure
407 *********************************************************************/
408
409static int
410igb_attach(device_t dev)
411{
412	struct adapter	*adapter;
413	int		error = 0;
414	u16		eeprom_data;
415
416	INIT_DEBUGOUT("igb_attach: begin");
417
418	if (resource_disabled("igb", device_get_unit(dev))) {
419		device_printf(dev, "Disabled by device hint\n");
420		return (ENXIO);
421	}
422
423	adapter = device_get_softc(dev);
424	adapter->dev = adapter->osdep.dev = dev;
425	IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
426
427	/* SYSCTL stuff */
428	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
429	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
430	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
431	    igb_sysctl_nvm_info, "I", "NVM Information");
432
433	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
434	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
435	    OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
436	    &igb_enable_aim, 1, "Interrupt Moderation");
437
438	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
439	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
440	    OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
441	    adapter, 0, igb_set_flowcntl, "I", "Flow Control");
442
443	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
444
445	/* Determine hardware and mac info */
446	igb_identify_hardware(adapter);
447
448	/* Setup PCI resources */
449	if (igb_allocate_pci_resources(adapter)) {
450		device_printf(dev, "Allocation of PCI resources failed\n");
451		error = ENXIO;
452		goto err_pci;
453	}
454
455	/* Do Shared Code initialization */
456	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
457		device_printf(dev, "Setup of Shared code failed\n");
458		error = ENXIO;
459		goto err_pci;
460	}
461
462	e1000_get_bus_info(&adapter->hw);
463
464	/* Sysctl for limiting the amount of work done in the taskqueue */
465	igb_set_sysctl_value(adapter, "rx_processing_limit",
466	    "max number of rx packets to process",
467	    &adapter->rx_process_limit, 100);
468
469	/*
470	 * Validate number of transmit and receive descriptors. It
471	 * must not exceed hardware maximum, and must be multiple
472	 * of E1000_DBA_ALIGN.
473	 */
474	if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
475	    (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
476		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
477		    IGB_DEFAULT_TXD, igb_txd);
478		adapter->num_tx_desc = IGB_DEFAULT_TXD;
479	} else
480		adapter->num_tx_desc = igb_txd;
481	if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
482	    (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
483		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
484		    IGB_DEFAULT_RXD, igb_rxd);
485		adapter->num_rx_desc = IGB_DEFAULT_RXD;
486	} else
487		adapter->num_rx_desc = igb_rxd;
488
489	adapter->hw.mac.autoneg = DO_AUTO_NEG;
490	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
491	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
492
493	/* Copper options */
494	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
495		adapter->hw.phy.mdix = AUTO_ALL_MODES;
496		adapter->hw.phy.disable_polarity_correction = FALSE;
497		adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
498	}
499
500	/*
501	 * Set the frame limits assuming
502	 * standard ethernet sized frames.
503	 */
504	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
505	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
506
507	/*
508	** Allocate and Setup Queues
509	*/
510	if (igb_allocate_queues(adapter)) {
511		error = ENOMEM;
512		goto err_pci;
513	}
514
515	/* Allocate the appropriate stats memory */
516	if (adapter->vf_ifp) {
517		adapter->stats =
518		    (struct e1000_vf_stats *)malloc(sizeof \
519		    (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
520		igb_vf_init_stats(adapter);
521	} else
522		adapter->stats =
523		    (struct e1000_hw_stats *)malloc(sizeof \
524		    (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
525	if (adapter->stats == NULL) {
526		device_printf(dev, "Can not allocate stats memory\n");
527		error = ENOMEM;
528		goto err_late;
529	}
530
531	/* Allocate multicast array memory. */
532	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
533	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
534	if (adapter->mta == NULL) {
535		device_printf(dev, "Can not allocate multicast setup array\n");
536		error = ENOMEM;
537		goto err_late;
538	}
539
540	/* Some adapter-specific advanced features */
541	if (adapter->hw.mac.type >= e1000_i350) {
542		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
543		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
544		    OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
545		    adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
546		igb_set_sysctl_value(adapter, "eee_disabled",
547		    "enable Energy Efficient Ethernet",
548		    &adapter->hw.dev_spec._82575.eee_disable,
549		    TRUE);
550		e1000_set_eee_i350(&adapter->hw);
551	}
552
553	/*
554	** Start from a known state, this is
555	** important in reading the nvm and
556	** mac from that.
557	*/
558	e1000_reset_hw(&adapter->hw);
559
560	/* Make sure we have a good EEPROM before we read from it */
561	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
562		/*
563		** Some PCI-E parts fail the first check due to
564		** the link being in sleep state, call it again,
565		** if it fails a second time its a real issue.
566		*/
567		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
568			device_printf(dev,
569			    "The EEPROM Checksum Is Not Valid\n");
570			error = EIO;
571			goto err_late;
572		}
573	}
574
575	/*
576	** Copy the permanent MAC address out of the EEPROM
577	*/
578	if (e1000_read_mac_addr(&adapter->hw) < 0) {
579		device_printf(dev, "EEPROM read error while reading MAC"
580		    " address\n");
581		error = EIO;
582		goto err_late;
583	}
584	/* Check its sanity */
585	if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
586		device_printf(dev, "Invalid MAC address\n");
587		error = EIO;
588		goto err_late;
589	}
590
591	/* Setup OS specific network interface */
592	if (igb_setup_interface(dev, adapter) != 0)
593		goto err_late;
594
595	/* Now get a good starting state */
596	igb_reset(adapter);
597
598	/* Initialize statistics */
599	igb_update_stats_counters(adapter);
600
601	adapter->hw.mac.get_link_status = 1;
602	igb_update_link_status(adapter);
603
604	/* Indicate SOL/IDER usage */
605	if (e1000_check_reset_block(&adapter->hw))
606		device_printf(dev,
607		    "PHY reset is blocked due to SOL/IDER session.\n");
608
609	/* Determine if we have to control management hardware */
610	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
611
612	/*
613	 * Setup Wake-on-Lan
614	 */
615	/* APME bit in EEPROM is mapped to WUC.APME */
616	eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
617	if (eeprom_data)
618		adapter->wol = E1000_WUFC_MAG;
619
620	/* Register for VLAN events */
621	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
622	     igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
623	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
624	     igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
625
626	igb_add_hw_stats(adapter);
627
628	/* Tell the stack that the interface is not active */
629	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
630	adapter->ifp->if_drv_flags |=  IFF_DRV_OACTIVE;
631
632	adapter->led_dev = led_create(igb_led_func, adapter,
633	    device_get_nameunit(dev));
634
635	/*
636	** Configure Interrupts
637	*/
638	if ((adapter->msix > 1) && (igb_enable_msix))
639		error = igb_allocate_msix(adapter);
640	else /* MSI or Legacy */
641		error = igb_allocate_legacy(adapter);
642	if (error)
643		goto err_late;
644
645	INIT_DEBUGOUT("igb_attach: end");
646
647	return (0);
648
649err_late:
650	igb_detach(dev);
651	igb_free_transmit_structures(adapter);
652	igb_free_receive_structures(adapter);
653	igb_release_hw_control(adapter);
654err_pci:
655	igb_free_pci_resources(adapter);
656	if (adapter->ifp != NULL)
657		if_free(adapter->ifp);
658	free(adapter->mta, M_DEVBUF);
659	IGB_CORE_LOCK_DESTROY(adapter);
660
661	return (error);
662}
663
664/*********************************************************************
665 *  Device removal routine
666 *
667 *  The detach entry point is called when the driver is being removed.
668 *  This routine stops the adapter and deallocates all the resources
669 *  that were allocated for driver operation.
670 *
671 *  return 0 on success, positive on failure
672 *********************************************************************/
673
674static int
675igb_detach(device_t dev)
676{
677	struct adapter	*adapter = device_get_softc(dev);
678	struct ifnet	*ifp = adapter->ifp;
679
680	INIT_DEBUGOUT("igb_detach: begin");
681
682	/* Make sure VLANS are not using driver */
683	if (adapter->ifp->if_vlantrunk != NULL) {
684		device_printf(dev,"Vlan in use, detach first\n");
685		return (EBUSY);
686	}
687
688	if (adapter->led_dev != NULL)
689		led_destroy(adapter->led_dev);
690
691#ifdef DEVICE_POLLING
692	if (ifp->if_capenable & IFCAP_POLLING)
693		ether_poll_deregister(ifp);
694#endif
695
696	IGB_CORE_LOCK(adapter);
697	adapter->in_detach = 1;
698	igb_stop(adapter);
699	IGB_CORE_UNLOCK(adapter);
700
701	e1000_phy_hw_reset(&adapter->hw);
702
703	/* Give control back to firmware */
704	igb_release_manageability(adapter);
705	igb_release_hw_control(adapter);
706
707	if (adapter->wol) {
708		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
709		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
710		igb_enable_wakeup(dev);
711	}
712
713	/* Unregister VLAN events */
714	if (adapter->vlan_attach != NULL)
715		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
716	if (adapter->vlan_detach != NULL)
717		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
718
719	ether_ifdetach(adapter->ifp);
720
721	callout_drain(&adapter->timer);
722
723	igb_free_pci_resources(adapter);
724	bus_generic_detach(dev);
725	if_free(ifp);
726
727	igb_free_transmit_structures(adapter);
728	igb_free_receive_structures(adapter);
729	if (adapter->mta != NULL)
730		free(adapter->mta, M_DEVBUF);
731
732	IGB_CORE_LOCK_DESTROY(adapter);
733
734	return (0);
735}
736
737/*********************************************************************
738 *
739 *  Shutdown entry point
740 *
741 **********************************************************************/
742
743static int
744igb_shutdown(device_t dev)
745{
746	return igb_suspend(dev);
747}
748
749/*
750 * Suspend/resume device methods.
751 */
752static int
753igb_suspend(device_t dev)
754{
755	struct adapter *adapter = device_get_softc(dev);
756
757	IGB_CORE_LOCK(adapter);
758
759	igb_stop(adapter);
760
761        igb_release_manageability(adapter);
762	igb_release_hw_control(adapter);
763
764        if (adapter->wol) {
765                E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
766                E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
767                igb_enable_wakeup(dev);
768        }
769
770	IGB_CORE_UNLOCK(adapter);
771
772	return bus_generic_suspend(dev);
773}
774
775static int
776igb_resume(device_t dev)
777{
778	struct adapter *adapter = device_get_softc(dev);
779	struct ifnet *ifp = adapter->ifp;
780
781	IGB_CORE_LOCK(adapter);
782	igb_init_locked(adapter);
783	igb_init_manageability(adapter);
784
785	if ((ifp->if_flags & IFF_UP) &&
786	    (ifp->if_drv_flags & IFF_DRV_RUNNING))
787		igb_start(ifp);
788
789	IGB_CORE_UNLOCK(adapter);
790
791	return bus_generic_resume(dev);
792}
793
794
795/*********************************************************************
796 *  Transmit entry point
797 *
798 *  igb_start is called by the stack to initiate a transmit.
799 *  The driver will remain in this routine as long as there are
800 *  packets to transmit and transmit resources are available.
801 *  In case resources are not available stack is notified and
802 *  the packet is requeued.
803 **********************************************************************/
804
805static void
806igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
807{
808	struct adapter	*adapter = ifp->if_softc;
809	struct mbuf	*m_head;
810
811	IGB_TX_LOCK_ASSERT(txr);
812
813	if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
814	    IFF_DRV_RUNNING)
815		return;
816	if (!adapter->link_active)
817		return;
818
819	/* Call cleanup if number of TX descriptors low */
820	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
821		igb_txeof(txr);
822
823	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
824		if (txr->tx_avail <= IGB_MAX_SCATTER) {
825			txr->queue_status |= IGB_QUEUE_DEPLETED;
826			break;
827		}
828		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
829		if (m_head == NULL)
830			break;
831		/*
832		 *  Encapsulation can modify our pointer, and or make it
833		 *  NULL on failure.  In that event, we can't requeue.
834		 */
835		if (igb_xmit(txr, &m_head)) {
836			if (m_head != NULL)
837				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
838			if (txr->tx_avail <= IGB_MAX_SCATTER)
839				txr->queue_status |= IGB_QUEUE_DEPLETED;
840			break;
841		}
842
843		/* Send a copy of the frame to the BPF listener */
844		ETHER_BPF_MTAP(ifp, m_head);
845
846		/* Set watchdog on */
847		txr->watchdog_time = ticks;
848		txr->queue_status |= IGB_QUEUE_WORKING;
849	}
850}
851
852/*
853 * Legacy TX driver routine, called from the
854 * stack, always uses tx[0], and spins for it.
855 * Should not be used with multiqueue tx
856 */
857static void
858igb_start(struct ifnet *ifp)
859{
860	struct adapter	*adapter = ifp->if_softc;
861	struct tx_ring	*txr = adapter->tx_rings;
862
863	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
864		IGB_TX_LOCK(txr);
865		igb_start_locked(txr, ifp);
866		IGB_TX_UNLOCK(txr);
867	}
868	return;
869}
870
871#if __FreeBSD_version >= 800000
872/*
873** Multiqueue Transmit driver
874**
875*/
876static int
877igb_mq_start(struct ifnet *ifp, struct mbuf *m)
878{
879	struct adapter		*adapter = ifp->if_softc;
880	struct igb_queue	*que;
881	struct tx_ring		*txr;
882	int 			i, err = 0;
883	bool			moveable = TRUE;
884
885	/* Which queue to use */
886	if ((m->m_flags & M_FLOWID) != 0) {
887		i = m->m_pkthdr.flowid % adapter->num_queues;
888		moveable = FALSE;
889	} else
890		i = curcpu % adapter->num_queues;
891
892	txr = &adapter->tx_rings[i];
893	que = &adapter->queues[i];
894	if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
895	    IGB_TX_TRYLOCK(txr)) {
896		err = igb_mq_start_locked(ifp, txr, m);
897		IGB_TX_UNLOCK(txr);
898	} else {
899		err = drbr_enqueue(ifp, txr->br, m);
900		taskqueue_enqueue(que->tq, &que->que_task);
901	}
902
903	return (err);
904}
905
906static int
907igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr, struct mbuf *m)
908{
909	struct adapter  *adapter = txr->adapter;
910        struct mbuf     *next;
911        int             err = 0, enq;
912
913	IGB_TX_LOCK_ASSERT(txr);
914
915	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
916	    (txr->queue_status == IGB_QUEUE_DEPLETED) ||
917	    adapter->link_active == 0) {
918		if (m != NULL)
919			err = drbr_enqueue(ifp, txr->br, m);
920		return (err);
921	}
922
923	enq = 0;
924	if (m == NULL) {
925		next = drbr_dequeue(ifp, txr->br);
926	} else if (drbr_needs_enqueue(ifp, txr->br)) {
927		if ((err = drbr_enqueue(ifp, txr->br, m)) != 0)
928			return (err);
929		next = drbr_dequeue(ifp, txr->br);
930	} else
931		next = m;
932
933	/* Process the queue */
934	while (next != NULL) {
935		if ((err = igb_xmit(txr, &next)) != 0) {
936			if (next != NULL)
937				err = drbr_enqueue(ifp, txr->br, next);
938			break;
939		}
940		enq++;
941		drbr_stats_update(ifp, next->m_pkthdr.len, next->m_flags);
942		ETHER_BPF_MTAP(ifp, next);
943		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
944			break;
945		next = drbr_dequeue(ifp, txr->br);
946	}
947	if (enq > 0) {
948		/* Set the watchdog */
949		txr->queue_status |= IGB_QUEUE_WORKING;
950		txr->watchdog_time = ticks;
951	}
952	if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
953		igb_txeof(txr);
954	if (txr->tx_avail <= IGB_MAX_SCATTER)
955		txr->queue_status |= IGB_QUEUE_DEPLETED;
956	return (err);
957}
958
959/*
960** Flush all ring buffers
961*/
962static void
963igb_qflush(struct ifnet *ifp)
964{
965	struct adapter	*adapter = ifp->if_softc;
966	struct tx_ring	*txr = adapter->tx_rings;
967	struct mbuf	*m;
968
969	for (int i = 0; i < adapter->num_queues; i++, txr++) {
970		IGB_TX_LOCK(txr);
971		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
972			m_freem(m);
973		IGB_TX_UNLOCK(txr);
974	}
975	if_qflush(ifp);
976}
977#endif /* __FreeBSD_version >= 800000 */
978
979/*********************************************************************
980 *  Ioctl entry point
981 *
982 *  igb_ioctl is called when the user wants to configure the
983 *  interface.
984 *
985 *  return 0 on success, positive on failure
986 **********************************************************************/
987
988static int
989igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
990{
991	struct adapter	*adapter = ifp->if_softc;
992	struct ifreq	*ifr = (struct ifreq *)data;
993#if defined(INET) || defined(INET6)
994	struct ifaddr	*ifa = (struct ifaddr *)data;
995#endif
996	bool		avoid_reset = FALSE;
997	int		error = 0;
998
999	if (adapter->in_detach)
1000		return (error);
1001
1002	switch (command) {
1003	case SIOCSIFADDR:
1004#ifdef INET
1005		if (ifa->ifa_addr->sa_family == AF_INET)
1006			avoid_reset = TRUE;
1007#endif
1008#ifdef INET6
1009		if (ifa->ifa_addr->sa_family == AF_INET6)
1010			avoid_reset = TRUE;
1011#endif
1012		/*
1013		** Calling init results in link renegotiation,
1014		** so we avoid doing it when possible.
1015		*/
1016		if (avoid_reset) {
1017			ifp->if_flags |= IFF_UP;
1018			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1019				igb_init(adapter);
1020#ifdef INET
1021			if (!(ifp->if_flags & IFF_NOARP))
1022				arp_ifinit(ifp, ifa);
1023#endif
1024		} else
1025			error = ether_ioctl(ifp, command, data);
1026		break;
1027	case SIOCSIFMTU:
1028	    {
1029		int max_frame_size;
1030
1031		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1032
1033		IGB_CORE_LOCK(adapter);
1034		max_frame_size = 9234;
1035		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1036		    ETHER_CRC_LEN) {
1037			IGB_CORE_UNLOCK(adapter);
1038			error = EINVAL;
1039			break;
1040		}
1041
1042		ifp->if_mtu = ifr->ifr_mtu;
1043		adapter->max_frame_size =
1044		    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1045		igb_init_locked(adapter);
1046		IGB_CORE_UNLOCK(adapter);
1047		break;
1048	    }
1049	case SIOCSIFFLAGS:
1050		IOCTL_DEBUGOUT("ioctl rcv'd:\
1051		    SIOCSIFFLAGS (Set Interface Flags)");
1052		IGB_CORE_LOCK(adapter);
1053		if (ifp->if_flags & IFF_UP) {
1054			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1055				if ((ifp->if_flags ^ adapter->if_flags) &
1056				    (IFF_PROMISC | IFF_ALLMULTI)) {
1057					igb_disable_promisc(adapter);
1058					igb_set_promisc(adapter);
1059				}
1060			} else
1061				igb_init_locked(adapter);
1062		} else
1063			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1064				igb_stop(adapter);
1065		adapter->if_flags = ifp->if_flags;
1066		IGB_CORE_UNLOCK(adapter);
1067		break;
1068	case SIOCADDMULTI:
1069	case SIOCDELMULTI:
1070		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1071		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1072			IGB_CORE_LOCK(adapter);
1073			igb_disable_intr(adapter);
1074			igb_set_multi(adapter);
1075#ifdef DEVICE_POLLING
1076			if (!(ifp->if_capenable & IFCAP_POLLING))
1077#endif
1078				igb_enable_intr(adapter);
1079			IGB_CORE_UNLOCK(adapter);
1080		}
1081		break;
1082	case SIOCSIFMEDIA:
1083		/* Check SOL/IDER usage */
1084		IGB_CORE_LOCK(adapter);
1085		if (e1000_check_reset_block(&adapter->hw)) {
1086			IGB_CORE_UNLOCK(adapter);
1087			device_printf(adapter->dev, "Media change is"
1088			    " blocked due to SOL/IDER session.\n");
1089			break;
1090		}
1091		IGB_CORE_UNLOCK(adapter);
1092	case SIOCGIFMEDIA:
1093		IOCTL_DEBUGOUT("ioctl rcv'd: \
1094		    SIOCxIFMEDIA (Get/Set Interface Media)");
1095		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1096		break;
1097	case SIOCSIFCAP:
1098	    {
1099		int mask, reinit;
1100
1101		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1102		reinit = 0;
1103		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1104#ifdef DEVICE_POLLING
1105		if (mask & IFCAP_POLLING) {
1106			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1107				error = ether_poll_register(igb_poll, ifp);
1108				if (error)
1109					return (error);
1110				IGB_CORE_LOCK(adapter);
1111				igb_disable_intr(adapter);
1112				ifp->if_capenable |= IFCAP_POLLING;
1113				IGB_CORE_UNLOCK(adapter);
1114			} else {
1115				error = ether_poll_deregister(ifp);
1116				/* Enable interrupt even in error case */
1117				IGB_CORE_LOCK(adapter);
1118				igb_enable_intr(adapter);
1119				ifp->if_capenable &= ~IFCAP_POLLING;
1120				IGB_CORE_UNLOCK(adapter);
1121			}
1122		}
1123#endif
1124		if (mask & IFCAP_HWCSUM) {
1125			ifp->if_capenable ^= IFCAP_HWCSUM;
1126			reinit = 1;
1127		}
1128		if (mask & IFCAP_TSO4) {
1129			ifp->if_capenable ^= IFCAP_TSO4;
1130			reinit = 1;
1131		}
1132		if (mask & IFCAP_VLAN_HWTAGGING) {
1133			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1134			reinit = 1;
1135		}
1136		if (mask & IFCAP_VLAN_HWFILTER) {
1137			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1138			reinit = 1;
1139		}
1140		if (mask & IFCAP_VLAN_HWTSO) {
1141			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1142			reinit = 1;
1143		}
1144		if (mask & IFCAP_LRO) {
1145			ifp->if_capenable ^= IFCAP_LRO;
1146			reinit = 1;
1147		}
1148		if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1149			igb_init(adapter);
1150		VLAN_CAPABILITIES(ifp);
1151		break;
1152	    }
1153
1154	default:
1155		error = ether_ioctl(ifp, command, data);
1156		break;
1157	}
1158
1159	return (error);
1160}
1161
1162
1163/*********************************************************************
1164 *  Init entry point
1165 *
1166 *  This routine is used in two ways. It is used by the stack as
1167 *  init entry point in network interface structure. It is also used
1168 *  by the driver as a hw/sw initialization routine to get to a
1169 *  consistent state.
1170 *
1171 *  return 0 on success, positive on failure
1172 **********************************************************************/
1173
1174static void
1175igb_init_locked(struct adapter *adapter)
1176{
1177	struct ifnet	*ifp = adapter->ifp;
1178	device_t	dev = adapter->dev;
1179
1180	INIT_DEBUGOUT("igb_init: begin");
1181
1182	IGB_CORE_LOCK_ASSERT(adapter);
1183
1184	igb_disable_intr(adapter);
1185	callout_stop(&adapter->timer);
1186
1187	/* Get the latest mac address, User can use a LAA */
1188        bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1189              ETHER_ADDR_LEN);
1190
1191	/* Put the address into the Receive Address Array */
1192	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1193
1194	igb_reset(adapter);
1195	igb_update_link_status(adapter);
1196
1197	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1198
1199	/* Set hardware offload abilities */
1200	ifp->if_hwassist = 0;
1201	if (ifp->if_capenable & IFCAP_TXCSUM) {
1202		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1203#if __FreeBSD_version >= 800000
1204		if (adapter->hw.mac.type == e1000_82576)
1205			ifp->if_hwassist |= CSUM_SCTP;
1206#endif
1207	}
1208
1209	if (ifp->if_capenable & IFCAP_TSO4)
1210		ifp->if_hwassist |= CSUM_TSO;
1211
1212	/* Configure for OS presence */
1213	igb_init_manageability(adapter);
1214
1215	/* Prepare transmit descriptors and buffers */
1216	igb_setup_transmit_structures(adapter);
1217	igb_initialize_transmit_units(adapter);
1218
1219	/* Setup Multicast table */
1220	igb_set_multi(adapter);
1221
1222	/*
1223	** Figure out the desired mbuf pool
1224	** for doing jumbo/packetsplit
1225	*/
1226	if (adapter->max_frame_size <= 2048)
1227		adapter->rx_mbuf_sz = MCLBYTES;
1228	else if (adapter->max_frame_size <= 4096)
1229		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1230	else
1231		adapter->rx_mbuf_sz = MJUM9BYTES;
1232
1233	/* Prepare receive descriptors and buffers */
1234	if (igb_setup_receive_structures(adapter)) {
1235		device_printf(dev, "Could not setup receive structures\n");
1236		return;
1237	}
1238	igb_initialize_receive_units(adapter);
1239
1240        /* Enable VLAN support */
1241	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1242		igb_setup_vlan_hw_support(adapter);
1243
1244	/* Don't lose promiscuous settings */
1245	igb_set_promisc(adapter);
1246
1247	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1248	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1249
1250	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1251	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1252
1253	if (adapter->msix > 1) /* Set up queue routing */
1254		igb_configure_queues(adapter);
1255
1256	/* this clears any pending interrupts */
1257	E1000_READ_REG(&adapter->hw, E1000_ICR);
1258#ifdef DEVICE_POLLING
1259	/*
1260	 * Only enable interrupts if we are not polling, make sure
1261	 * they are off otherwise.
1262	 */
1263	if (ifp->if_capenable & IFCAP_POLLING)
1264		igb_disable_intr(adapter);
1265	else
1266#endif /* DEVICE_POLLING */
1267	{
1268		igb_enable_intr(adapter);
1269		E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1270	}
1271
1272	/* Set Energy Efficient Ethernet */
1273
1274	e1000_set_eee_i350(&adapter->hw);
1275}
1276
1277static void
1278igb_init(void *arg)
1279{
1280	struct adapter *adapter = arg;
1281
1282	IGB_CORE_LOCK(adapter);
1283	igb_init_locked(adapter);
1284	IGB_CORE_UNLOCK(adapter);
1285}
1286
1287
1288static void
1289igb_handle_que(void *context, int pending)
1290{
1291	struct igb_queue *que = context;
1292	struct adapter *adapter = que->adapter;
1293	struct tx_ring *txr = que->txr;
1294	struct ifnet	*ifp = adapter->ifp;
1295
1296	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1297		bool	more;
1298
1299		more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1300
1301		IGB_TX_LOCK(txr);
1302		if (igb_txeof(txr))
1303			more = TRUE;
1304#if __FreeBSD_version >= 800000
1305		/* Process the stack queue only if not depleted */
1306		if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1307		    !drbr_empty(ifp, txr->br))
1308			igb_mq_start_locked(ifp, txr, NULL);
1309#else
1310		igb_start_locked(txr, ifp);
1311#endif
1312		IGB_TX_UNLOCK(txr);
1313		/* Do we need another? */
1314		if (more || (ifp->if_drv_flags & IFF_DRV_OACTIVE)) {
1315			taskqueue_enqueue(que->tq, &que->que_task);
1316			return;
1317		}
1318	}
1319
1320#ifdef DEVICE_POLLING
1321	if (ifp->if_capenable & IFCAP_POLLING)
1322		return;
1323#endif
1324	/* Reenable this interrupt */
1325	if (que->eims)
1326		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1327	else
1328		igb_enable_intr(adapter);
1329}
1330
1331/* Deal with link in a sleepable context */
1332static void
1333igb_handle_link(void *context, int pending)
1334{
1335	struct adapter *adapter = context;
1336
1337	adapter->hw.mac.get_link_status = 1;
1338	igb_update_link_status(adapter);
1339}
1340
1341/*********************************************************************
1342 *
1343 *  MSI/Legacy Deferred
1344 *  Interrupt Service routine
1345 *
1346 *********************************************************************/
1347static int
1348igb_irq_fast(void *arg)
1349{
1350	struct adapter		*adapter = arg;
1351	struct igb_queue	*que = adapter->queues;
1352	u32			reg_icr;
1353
1354
1355	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1356
1357	/* Hot eject?  */
1358	if (reg_icr == 0xffffffff)
1359		return FILTER_STRAY;
1360
1361	/* Definitely not our interrupt.  */
1362	if (reg_icr == 0x0)
1363		return FILTER_STRAY;
1364
1365	if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1366		return FILTER_STRAY;
1367
1368	/*
1369	 * Mask interrupts until the taskqueue is finished running.  This is
1370	 * cheap, just assume that it is needed.  This also works around the
1371	 * MSI message reordering errata on certain systems.
1372	 */
1373	igb_disable_intr(adapter);
1374	taskqueue_enqueue(que->tq, &que->que_task);
1375
1376	/* Link status change */
1377	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1378		taskqueue_enqueue(que->tq, &adapter->link_task);
1379
1380	if (reg_icr & E1000_ICR_RXO)
1381		adapter->rx_overruns++;
1382	return FILTER_HANDLED;
1383}
1384
1385#ifdef DEVICE_POLLING
1386/*********************************************************************
1387 *
1388 *  Legacy polling routine : if using this code you MUST be sure that
1389 *  multiqueue is not defined, ie, set igb_num_queues to 1.
1390 *
1391 *********************************************************************/
1392#if __FreeBSD_version >= 800000
1393#define POLL_RETURN_COUNT(a) (a)
1394static int
1395#else
1396#define POLL_RETURN_COUNT(a)
1397static void
1398#endif
1399igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1400{
1401	struct adapter		*adapter = ifp->if_softc;
1402	struct igb_queue	*que = adapter->queues;
1403	struct tx_ring		*txr = adapter->tx_rings;
1404	u32			reg_icr, rx_done = 0;
1405	u32			loop = IGB_MAX_LOOP;
1406	bool			more;
1407
1408	IGB_CORE_LOCK(adapter);
1409	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1410		IGB_CORE_UNLOCK(adapter);
1411		return POLL_RETURN_COUNT(rx_done);
1412	}
1413
1414	if (cmd == POLL_AND_CHECK_STATUS) {
1415		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1416		/* Link status change */
1417		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1418			igb_handle_link(adapter, 0);
1419
1420		if (reg_icr & E1000_ICR_RXO)
1421			adapter->rx_overruns++;
1422	}
1423	IGB_CORE_UNLOCK(adapter);
1424
1425	igb_rxeof(que, count, &rx_done);
1426
1427	IGB_TX_LOCK(txr);
1428	do {
1429		more = igb_txeof(txr);
1430	} while (loop-- && more);
1431#if __FreeBSD_version >= 800000
1432	if (!drbr_empty(ifp, txr->br))
1433		igb_mq_start_locked(ifp, txr, NULL);
1434#else
1435	igb_start_locked(txr, ifp);
1436#endif
1437	IGB_TX_UNLOCK(txr);
1438	return POLL_RETURN_COUNT(rx_done);
1439}
1440#endif /* DEVICE_POLLING */
1441
1442/*********************************************************************
1443 *
1444 *  MSIX Que Interrupt Service routine
1445 *
1446 **********************************************************************/
1447static void
1448igb_msix_que(void *arg)
1449{
1450	struct igb_queue *que = arg;
1451	struct adapter *adapter = que->adapter;
1452	struct tx_ring *txr = que->txr;
1453	struct rx_ring *rxr = que->rxr;
1454	u32		newitr = 0;
1455	bool		more_tx, more_rx;
1456
1457	E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1458	++que->irqs;
1459
1460	IGB_TX_LOCK(txr);
1461	more_tx = igb_txeof(txr);
1462	IGB_TX_UNLOCK(txr);
1463
1464	more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1465
1466	if (igb_enable_aim == FALSE)
1467		goto no_calc;
1468	/*
1469	** Do Adaptive Interrupt Moderation:
1470        **  - Write out last calculated setting
1471	**  - Calculate based on average size over
1472	**    the last interval.
1473	*/
1474        if (que->eitr_setting)
1475                E1000_WRITE_REG(&adapter->hw,
1476                    E1000_EITR(que->msix), que->eitr_setting);
1477
1478        que->eitr_setting = 0;
1479
1480        /* Idle, do nothing */
1481        if ((txr->bytes == 0) && (rxr->bytes == 0))
1482                goto no_calc;
1483
1484        /* Used half Default if sub-gig */
1485        if (adapter->link_speed != 1000)
1486                newitr = IGB_DEFAULT_ITR / 2;
1487        else {
1488		if ((txr->bytes) && (txr->packets))
1489                	newitr = txr->bytes/txr->packets;
1490		if ((rxr->bytes) && (rxr->packets))
1491			newitr = max(newitr,
1492			    (rxr->bytes / rxr->packets));
1493                newitr += 24; /* account for hardware frame, crc */
1494		/* set an upper boundary */
1495		newitr = min(newitr, 3000);
1496		/* Be nice to the mid range */
1497                if ((newitr > 300) && (newitr < 1200))
1498                        newitr = (newitr / 3);
1499                else
1500                        newitr = (newitr / 2);
1501        }
1502        newitr &= 0x7FFC;  /* Mask invalid bits */
1503        if (adapter->hw.mac.type == e1000_82575)
1504                newitr |= newitr << 16;
1505        else
1506                newitr |= E1000_EITR_CNT_IGNR;
1507
1508        /* save for next interrupt */
1509        que->eitr_setting = newitr;
1510
1511        /* Reset state */
1512        txr->bytes = 0;
1513        txr->packets = 0;
1514        rxr->bytes = 0;
1515        rxr->packets = 0;
1516
1517no_calc:
1518	/* Schedule a clean task if needed*/
1519	if (more_tx || more_rx)
1520		taskqueue_enqueue(que->tq, &que->que_task);
1521	else
1522		/* Reenable this interrupt */
1523		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1524	return;
1525}
1526
1527
1528/*********************************************************************
1529 *
1530 *  MSIX Link Interrupt Service routine
1531 *
1532 **********************************************************************/
1533
1534static void
1535igb_msix_link(void *arg)
1536{
1537	struct adapter	*adapter = arg;
1538	u32       	icr;
1539
1540	++adapter->link_irq;
1541	icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1542	if (!(icr & E1000_ICR_LSC))
1543		goto spurious;
1544	igb_handle_link(adapter, 0);
1545
1546spurious:
1547	/* Rearm */
1548	E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1549	E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1550	return;
1551}
1552
1553
1554/*********************************************************************
1555 *
1556 *  Media Ioctl callback
1557 *
1558 *  This routine is called whenever the user queries the status of
1559 *  the interface using ifconfig.
1560 *
1561 **********************************************************************/
1562static void
1563igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1564{
1565	struct adapter *adapter = ifp->if_softc;
1566	u_char fiber_type = IFM_1000_SX;
1567
1568	INIT_DEBUGOUT("igb_media_status: begin");
1569
1570	IGB_CORE_LOCK(adapter);
1571	igb_update_link_status(adapter);
1572
1573	ifmr->ifm_status = IFM_AVALID;
1574	ifmr->ifm_active = IFM_ETHER;
1575
1576	if (!adapter->link_active) {
1577		IGB_CORE_UNLOCK(adapter);
1578		return;
1579	}
1580
1581	ifmr->ifm_status |= IFM_ACTIVE;
1582
1583	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1584	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes))
1585		ifmr->ifm_active |= fiber_type | IFM_FDX;
1586	else {
1587		switch (adapter->link_speed) {
1588		case 10:
1589			ifmr->ifm_active |= IFM_10_T;
1590			break;
1591		case 100:
1592			ifmr->ifm_active |= IFM_100_TX;
1593			break;
1594		case 1000:
1595			ifmr->ifm_active |= IFM_1000_T;
1596			break;
1597		}
1598		if (adapter->link_duplex == FULL_DUPLEX)
1599			ifmr->ifm_active |= IFM_FDX;
1600		else
1601			ifmr->ifm_active |= IFM_HDX;
1602	}
1603	IGB_CORE_UNLOCK(adapter);
1604}
1605
1606/*********************************************************************
1607 *
1608 *  Media Ioctl callback
1609 *
1610 *  This routine is called when the user changes speed/duplex using
1611 *  media/mediopt option with ifconfig.
1612 *
1613 **********************************************************************/
1614static int
1615igb_media_change(struct ifnet *ifp)
1616{
1617	struct adapter *adapter = ifp->if_softc;
1618	struct ifmedia  *ifm = &adapter->media;
1619
1620	INIT_DEBUGOUT("igb_media_change: begin");
1621
1622	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1623		return (EINVAL);
1624
1625	IGB_CORE_LOCK(adapter);
1626	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1627	case IFM_AUTO:
1628		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1629		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1630		break;
1631	case IFM_1000_LX:
1632	case IFM_1000_SX:
1633	case IFM_1000_T:
1634		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1635		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1636		break;
1637	case IFM_100_TX:
1638		adapter->hw.mac.autoneg = FALSE;
1639		adapter->hw.phy.autoneg_advertised = 0;
1640		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1641			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1642		else
1643			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1644		break;
1645	case IFM_10_T:
1646		adapter->hw.mac.autoneg = FALSE;
1647		adapter->hw.phy.autoneg_advertised = 0;
1648		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1649			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1650		else
1651			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1652		break;
1653	default:
1654		device_printf(adapter->dev, "Unsupported media type\n");
1655	}
1656
1657	igb_init_locked(adapter);
1658	IGB_CORE_UNLOCK(adapter);
1659
1660	return (0);
1661}
1662
1663
1664/*********************************************************************
1665 *
1666 *  This routine maps the mbufs to Advanced TX descriptors.
1667 *
1668 **********************************************************************/
1669static int
1670igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1671{
1672	struct adapter		*adapter = txr->adapter;
1673	bus_dma_segment_t	segs[IGB_MAX_SCATTER];
1674	bus_dmamap_t		map;
1675	struct igb_tx_buffer	*tx_buffer, *tx_buffer_mapped;
1676	union e1000_adv_tx_desc	*txd = NULL;
1677	struct mbuf		*m_head = *m_headp;
1678	struct ether_vlan_header *eh = NULL;
1679	struct ip		*ip = NULL;
1680	struct tcphdr		*th = NULL;
1681	u32			hdrlen, cmd_type_len, olinfo_status = 0;
1682	int			ehdrlen, poff;
1683	int			nsegs, i, first, last = 0;
1684	int			error, do_tso, remap = 1;
1685
1686	/* Set basic descriptor constants */
1687	cmd_type_len = E1000_ADVTXD_DTYP_DATA;
1688	cmd_type_len |= E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
1689	if (m_head->m_flags & M_VLANTAG)
1690		cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1691
1692retry:
1693	m_head = *m_headp;
1694	do_tso = ((m_head->m_pkthdr.csum_flags & CSUM_TSO) != 0);
1695	hdrlen = ehdrlen = poff = 0;
1696
1697	/*
1698	 * Intel recommends entire IP/TCP header length reside in a single
1699	 * buffer. If multiple descriptors are used to describe the IP and
1700	 * TCP header, each descriptor should describe one or more
1701	 * complete headers; descriptors referencing only parts of headers
1702	 * are not supported. If all layer headers are not coalesced into
1703	 * a single buffer, each buffer should not cross a 4KB boundary,
1704	 * or be larger than the maximum read request size.
1705	 * Controller also requires modifing IP/TCP header to make TSO work
1706	 * so we firstly get a writable mbuf chain then coalesce ethernet/
1707	 * IP/TCP header into a single buffer to meet the requirement of
1708	 * controller. This also simplifies IP/TCP/UDP checksum offloading
1709	 * which also has similiar restrictions.
1710	 */
1711	if (do_tso || m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
1712		if (do_tso || (m_head->m_next != NULL &&
1713		    m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)) {
1714			if (M_WRITABLE(*m_headp) == 0) {
1715				m_head = m_dup(*m_headp, M_DONTWAIT);
1716				m_freem(*m_headp);
1717				if (m_head == NULL) {
1718					*m_headp = NULL;
1719					return (ENOBUFS);
1720				}
1721				*m_headp = m_head;
1722			}
1723		}
1724		/*
1725		 * Assume IPv4, we don't have TSO/checksum offload support
1726		 * for IPv6 yet.
1727		 */
1728		ehdrlen = sizeof(struct ether_header);
1729		m_head = m_pullup(m_head, ehdrlen);
1730		if (m_head == NULL) {
1731			*m_headp = NULL;
1732			return (ENOBUFS);
1733		}
1734		eh = mtod(m_head, struct ether_vlan_header *);
1735		if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1736			ehdrlen = sizeof(struct ether_vlan_header);
1737			m_head = m_pullup(m_head, ehdrlen);
1738			if (m_head == NULL) {
1739				*m_headp = NULL;
1740				return (ENOBUFS);
1741			}
1742		}
1743		m_head = m_pullup(m_head, ehdrlen + sizeof(struct ip));
1744		if (m_head == NULL) {
1745			*m_headp = NULL;
1746			return (ENOBUFS);
1747		}
1748		ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1749		poff = ehdrlen + (ip->ip_hl << 2);
1750		if (do_tso) {
1751			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1752			if (m_head == NULL) {
1753				*m_headp = NULL;
1754				return (ENOBUFS);
1755			}
1756			/*
1757			 * The pseudo TCP checksum does not include TCP payload
1758			 * length so driver should recompute the checksum here
1759			 * what hardware expect to see. This is adherence of
1760			 * Microsoft's Large Send specification.
1761			 */
1762			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1763			th->th_sum = in_pseudo(ip->ip_src.s_addr,
1764			    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1765			/* Keep track of the full header length */
1766			hdrlen = poff + (th->th_off << 2);
1767		} else if (m_head->m_pkthdr.csum_flags & CSUM_TCP) {
1768			m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
1769			if (m_head == NULL) {
1770				*m_headp = NULL;
1771				return (ENOBUFS);
1772			}
1773			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1774			m_head = m_pullup(m_head, poff + (th->th_off << 2));
1775			if (m_head == NULL) {
1776				*m_headp = NULL;
1777				return (ENOBUFS);
1778			}
1779			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1780			th = (struct tcphdr *)(mtod(m_head, char *) + poff);
1781		} else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) {
1782			m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
1783			if (m_head == NULL) {
1784				*m_headp = NULL;
1785				return (ENOBUFS);
1786			}
1787			ip = (struct ip *)(mtod(m_head, char *) + ehdrlen);
1788		}
1789		*m_headp = m_head;
1790	}
1791
1792	/*
1793	 * Map the packet for DMA
1794	 *
1795	 * Capture the first descriptor index,
1796	 * this descriptor will have the index
1797	 * of the EOP which is the only one that
1798	 * now gets a DONE bit writeback.
1799	 */
1800	first = txr->next_avail_desc;
1801	tx_buffer = &txr->tx_buffers[first];
1802	tx_buffer_mapped = tx_buffer;
1803	map = tx_buffer->map;
1804
1805	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1806	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1807
1808	/*
1809	 * There are two types of errors we can (try) to handle:
1810	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1811	 *   out of segments.  Defragment the mbuf chain and try again.
1812	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1813	 *   at this point in time.  Defer sending and try again later.
1814	 * All other errors, in particular EINVAL, are fatal and prevent the
1815	 * mbuf chain from ever going through.  Drop it and report error.
1816	 */
1817	if (error == EFBIG && remap) {
1818		struct mbuf *m;
1819
1820		m = m_defrag(*m_headp, M_DONTWAIT);
1821		if (m == NULL) {
1822			adapter->mbuf_defrag_failed++;
1823			m_freem(*m_headp);
1824			*m_headp = NULL;
1825			return (ENOBUFS);
1826		}
1827		*m_headp = m;
1828
1829		/* Try it again, but only once */
1830		remap = 0;
1831		goto retry;
1832	} else if (error == ENOMEM) {
1833		adapter->no_tx_dma_setup++;
1834		return (error);
1835	} else if (error != 0) {
1836		adapter->no_tx_dma_setup++;
1837		m_freem(*m_headp);
1838		*m_headp = NULL;
1839		return (error);
1840	}
1841
1842	/*
1843	** Make sure we don't overrun the ring,
1844	** we need nsegs descriptors and one for
1845	** the context descriptor used for the
1846	** offloads.
1847	*/
1848        if ((nsegs + 1) > (txr->tx_avail - 2)) {
1849                txr->no_desc_avail++;
1850		bus_dmamap_unload(txr->txtag, map);
1851		return (ENOBUFS);
1852        }
1853	m_head = *m_headp;
1854
1855	/* Do hardware assists:
1856         * Set up the context descriptor, used
1857         * when any hardware offload is done.
1858         * This includes CSUM, VLAN, and TSO.
1859         * It will use the first descriptor.
1860         */
1861
1862	if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1863		if (igb_tso_setup(txr, m_head, ehdrlen, ip, th)) {
1864			cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
1865			olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
1866			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1867		} else
1868			return (ENXIO);
1869	} else if (igb_tx_ctx_setup(txr, m_head))
1870			olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
1871
1872	/* Calculate payload length */
1873	olinfo_status |= ((m_head->m_pkthdr.len - hdrlen)
1874	    << E1000_ADVTXD_PAYLEN_SHIFT);
1875
1876	/* 82575 needs the queue index added */
1877	if (adapter->hw.mac.type == e1000_82575)
1878		olinfo_status |= txr->me << 4;
1879
1880	/* Set up our transmit descriptors */
1881	i = txr->next_avail_desc;
1882	for (int j = 0; j < nsegs; j++) {
1883		bus_size_t seg_len;
1884		bus_addr_t seg_addr;
1885
1886		tx_buffer = &txr->tx_buffers[i];
1887		txd = (union e1000_adv_tx_desc *)&txr->tx_base[i];
1888		seg_addr = segs[j].ds_addr;
1889		seg_len  = segs[j].ds_len;
1890
1891		txd->read.buffer_addr = htole64(seg_addr);
1892		txd->read.cmd_type_len = htole32(cmd_type_len | seg_len);
1893		txd->read.olinfo_status = htole32(olinfo_status);
1894		last = i;
1895		if (++i == adapter->num_tx_desc)
1896			i = 0;
1897		tx_buffer->m_head = NULL;
1898		tx_buffer->next_eop = -1;
1899	}
1900
1901	txr->next_avail_desc = i;
1902	txr->tx_avail -= nsegs;
1903        tx_buffer->m_head = m_head;
1904
1905	/*
1906	** Here we swap the map so the last descriptor,
1907	** which gets the completion interrupt has the
1908	** real map, and the first descriptor gets the
1909	** unused map from this descriptor.
1910	*/
1911	tx_buffer_mapped->map = tx_buffer->map;
1912	tx_buffer->map = map;
1913        bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1914
1915        /*
1916         * Last Descriptor of Packet
1917	 * needs End Of Packet (EOP)
1918	 * and Report Status (RS)
1919         */
1920        txd->read.cmd_type_len |=
1921	    htole32(E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS);
1922	/*
1923	 * Keep track in the first buffer which
1924	 * descriptor will be written back
1925	 */
1926	tx_buffer = &txr->tx_buffers[first];
1927	tx_buffer->next_eop = last;
1928	/* Update the watchdog time early and often */
1929	txr->watchdog_time = ticks;
1930
1931	/*
1932	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1933	 * that this frame is available to transmit.
1934	 */
1935	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1936	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1937	E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1938	++txr->tx_packets;
1939
1940	return (0);
1941}
1942static void
1943igb_set_promisc(struct adapter *adapter)
1944{
1945	struct ifnet	*ifp = adapter->ifp;
1946	struct e1000_hw *hw = &adapter->hw;
1947	u32		reg;
1948
1949	if (adapter->vf_ifp) {
1950		e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1951		return;
1952	}
1953
1954	reg = E1000_READ_REG(hw, E1000_RCTL);
1955	if (ifp->if_flags & IFF_PROMISC) {
1956		reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1957		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1958	} else if (ifp->if_flags & IFF_ALLMULTI) {
1959		reg |= E1000_RCTL_MPE;
1960		reg &= ~E1000_RCTL_UPE;
1961		E1000_WRITE_REG(hw, E1000_RCTL, reg);
1962	}
1963}
1964
1965static void
1966igb_disable_promisc(struct adapter *adapter)
1967{
1968	struct e1000_hw *hw = &adapter->hw;
1969	u32		reg;
1970
1971	if (adapter->vf_ifp) {
1972		e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1973		return;
1974	}
1975	reg = E1000_READ_REG(hw, E1000_RCTL);
1976	reg &=  (~E1000_RCTL_UPE);
1977	reg &=  (~E1000_RCTL_MPE);
1978	E1000_WRITE_REG(hw, E1000_RCTL, reg);
1979}
1980
1981
1982/*********************************************************************
1983 *  Multicast Update
1984 *
1985 *  This routine is called whenever multicast address list is updated.
1986 *
1987 **********************************************************************/
1988
1989static void
1990igb_set_multi(struct adapter *adapter)
1991{
1992	struct ifnet	*ifp = adapter->ifp;
1993	struct ifmultiaddr *ifma;
1994	u32 reg_rctl = 0;
1995	u8  *mta;
1996
1997	int mcnt = 0;
1998
1999	IOCTL_DEBUGOUT("igb_set_multi: begin");
2000
2001	mta = adapter->mta;
2002	bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2003	    MAX_NUM_MULTICAST_ADDRESSES);
2004
2005#if __FreeBSD_version < 800000
2006	IF_ADDR_LOCK(ifp);
2007#else
2008	if_maddr_rlock(ifp);
2009#endif
2010	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2011		if (ifma->ifma_addr->sa_family != AF_LINK)
2012			continue;
2013
2014		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2015			break;
2016
2017		bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2018		    &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2019		mcnt++;
2020	}
2021#if __FreeBSD_version < 800000
2022	IF_ADDR_UNLOCK(ifp);
2023#else
2024	if_maddr_runlock(ifp);
2025#endif
2026
2027	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2028		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2029		reg_rctl |= E1000_RCTL_MPE;
2030		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2031	} else
2032		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2033}
2034
2035
2036/*********************************************************************
2037 *  Timer routine:
2038 *  	This routine checks for link status,
2039 *	updates statistics, and does the watchdog.
2040 *
2041 **********************************************************************/
2042
2043static void
2044igb_local_timer(void *arg)
2045{
2046	struct adapter		*adapter = arg;
2047	device_t		dev = adapter->dev;
2048	struct ifnet		*ifp = adapter->ifp;
2049	struct tx_ring		*txr = adapter->tx_rings;
2050	struct igb_queue	*que = adapter->queues;
2051	int			hung = 0, busy = 0;
2052
2053
2054	IGB_CORE_LOCK_ASSERT(adapter);
2055
2056	igb_update_link_status(adapter);
2057	igb_update_stats_counters(adapter);
2058
2059        /*
2060        ** Check the TX queues status
2061	**	- central locked handling of OACTIVE
2062	**	- watchdog only if all queues show hung
2063        */
2064	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2065		if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2066		    (adapter->pause_frames == 0))
2067			++hung;
2068		if (txr->queue_status & IGB_QUEUE_DEPLETED)
2069			++busy;
2070		if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2071			taskqueue_enqueue(que->tq, &que->que_task);
2072	}
2073	if (hung == adapter->num_queues)
2074		goto timeout;
2075	if (busy == adapter->num_queues)
2076		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2077	else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2078	    (busy < adapter->num_queues))
2079		ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2080
2081	adapter->pause_frames = 0;
2082	callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2083#ifndef DEVICE_POLLING
2084	/* Schedule all queue interrupts - deadlock protection */
2085	E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2086#endif
2087	return;
2088
2089timeout:
2090	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2091	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2092            E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2093            E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2094	device_printf(dev,"TX(%d) desc avail = %d,"
2095            "Next TX to Clean = %d\n",
2096            txr->me, txr->tx_avail, txr->next_to_clean);
2097	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2098	adapter->watchdog_events++;
2099	igb_init_locked(adapter);
2100}
2101
2102static void
2103igb_update_link_status(struct adapter *adapter)
2104{
2105	struct e1000_hw *hw = &adapter->hw;
2106	struct ifnet *ifp = adapter->ifp;
2107	device_t dev = adapter->dev;
2108	struct tx_ring *txr = adapter->tx_rings;
2109	u32 link_check, thstat, ctrl;
2110
2111	link_check = thstat = ctrl = 0;
2112
2113	/* Get the cached link value or read for real */
2114        switch (hw->phy.media_type) {
2115        case e1000_media_type_copper:
2116                if (hw->mac.get_link_status) {
2117			/* Do the work to read phy */
2118                        e1000_check_for_link(hw);
2119                        link_check = !hw->mac.get_link_status;
2120                } else
2121                        link_check = TRUE;
2122                break;
2123        case e1000_media_type_fiber:
2124                e1000_check_for_link(hw);
2125                link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2126                                 E1000_STATUS_LU);
2127                break;
2128        case e1000_media_type_internal_serdes:
2129                e1000_check_for_link(hw);
2130                link_check = adapter->hw.mac.serdes_has_link;
2131                break;
2132	/* VF device is type_unknown */
2133        case e1000_media_type_unknown:
2134                e1000_check_for_link(hw);
2135		link_check = !hw->mac.get_link_status;
2136		/* Fall thru */
2137        default:
2138                break;
2139        }
2140
2141	/* Check for thermal downshift or shutdown */
2142	if (hw->mac.type == e1000_i350) {
2143		thstat = E1000_READ_REG(hw, E1000_THSTAT);
2144		ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2145	}
2146
2147	/* Now we check if a transition has happened */
2148	if (link_check && (adapter->link_active == 0)) {
2149		e1000_get_speed_and_duplex(&adapter->hw,
2150		    &adapter->link_speed, &adapter->link_duplex);
2151		if (bootverbose)
2152			device_printf(dev, "Link is up %d Mbps %s\n",
2153			    adapter->link_speed,
2154			    ((adapter->link_duplex == FULL_DUPLEX) ?
2155			    "Full Duplex" : "Half Duplex"));
2156		adapter->link_active = 1;
2157		ifp->if_baudrate = adapter->link_speed * 1000000;
2158		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2159		    (thstat & E1000_THSTAT_LINK_THROTTLE))
2160			device_printf(dev, "Link: thermal downshift\n");
2161		/* This can sleep */
2162		if_link_state_change(ifp, LINK_STATE_UP);
2163	} else if (!link_check && (adapter->link_active == 1)) {
2164		ifp->if_baudrate = adapter->link_speed = 0;
2165		adapter->link_duplex = 0;
2166		if (bootverbose)
2167			device_printf(dev, "Link is Down\n");
2168		if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2169		    (thstat & E1000_THSTAT_PWR_DOWN))
2170			device_printf(dev, "Link: thermal shutdown\n");
2171		adapter->link_active = 0;
2172		/* This can sleep */
2173		if_link_state_change(ifp, LINK_STATE_DOWN);
2174		/* Reset queue state */
2175		for (int i = 0; i < adapter->num_queues; i++, txr++)
2176			txr->queue_status = IGB_QUEUE_IDLE;
2177	}
2178}
2179
2180/*********************************************************************
2181 *
2182 *  This routine disables all traffic on the adapter by issuing a
2183 *  global reset on the MAC and deallocates TX/RX buffers.
2184 *
2185 **********************************************************************/
2186
2187static void
2188igb_stop(void *arg)
2189{
2190	struct adapter	*adapter = arg;
2191	struct ifnet	*ifp = adapter->ifp;
2192	struct tx_ring *txr = adapter->tx_rings;
2193
2194	IGB_CORE_LOCK_ASSERT(adapter);
2195
2196	INIT_DEBUGOUT("igb_stop: begin");
2197
2198	igb_disable_intr(adapter);
2199
2200	callout_stop(&adapter->timer);
2201
2202	/* Tell the stack that the interface is no longer active */
2203	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2204	ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2205
2206	/* Disarm watchdog timer. */
2207	for (int i = 0; i < adapter->num_queues; i++, txr++) {
2208		IGB_TX_LOCK(txr);
2209		txr->queue_status = IGB_QUEUE_IDLE;
2210		IGB_TX_UNLOCK(txr);
2211	}
2212
2213	e1000_reset_hw(&adapter->hw);
2214	E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2215
2216	e1000_led_off(&adapter->hw);
2217	e1000_cleanup_led(&adapter->hw);
2218}
2219
2220
2221/*********************************************************************
2222 *
2223 *  Determine hardware revision.
2224 *
2225 **********************************************************************/
2226static void
2227igb_identify_hardware(struct adapter *adapter)
2228{
2229	device_t dev = adapter->dev;
2230
2231	/* Make sure our PCI config space has the necessary stuff set */
2232	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2233	if (!((adapter->hw.bus.pci_cmd_word & PCIM_CMD_BUSMASTEREN) &&
2234	    (adapter->hw.bus.pci_cmd_word & PCIM_CMD_MEMEN))) {
2235		INIT_DEBUGOUT("Memory Access and/or Bus Master "
2236		    "bits were not set!\n");
2237		adapter->hw.bus.pci_cmd_word |=
2238		(PCIM_CMD_BUSMASTEREN | PCIM_CMD_MEMEN);
2239		pci_write_config(dev, PCIR_COMMAND,
2240		    adapter->hw.bus.pci_cmd_word, 2);
2241	}
2242
2243	/* Save off the information about this board */
2244	adapter->hw.vendor_id = pci_get_vendor(dev);
2245	adapter->hw.device_id = pci_get_device(dev);
2246	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2247	adapter->hw.subsystem_vendor_id =
2248	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2249	adapter->hw.subsystem_device_id =
2250	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2251
2252	/* Set MAC type early for PCI setup */
2253	e1000_set_mac_type(&adapter->hw);
2254
2255	/* Are we a VF device? */
2256	if ((adapter->hw.mac.type == e1000_vfadapt) ||
2257	    (adapter->hw.mac.type == e1000_vfadapt_i350))
2258		adapter->vf_ifp = 1;
2259	else
2260		adapter->vf_ifp = 0;
2261}
2262
2263static int
2264igb_allocate_pci_resources(struct adapter *adapter)
2265{
2266	device_t	dev = adapter->dev;
2267	int		rid;
2268
2269	rid = PCIR_BAR(0);
2270	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2271	    &rid, RF_ACTIVE);
2272	if (adapter->pci_mem == NULL) {
2273		device_printf(dev, "Unable to allocate bus resource: memory\n");
2274		return (ENXIO);
2275	}
2276	adapter->osdep.mem_bus_space_tag =
2277	    rman_get_bustag(adapter->pci_mem);
2278	adapter->osdep.mem_bus_space_handle =
2279	    rman_get_bushandle(adapter->pci_mem);
2280	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2281
2282	adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2283
2284	/* This will setup either MSI/X or MSI */
2285	adapter->msix = igb_setup_msix(adapter);
2286	adapter->hw.back = &adapter->osdep;
2287
2288	return (0);
2289}
2290
2291/*********************************************************************
2292 *
2293 *  Setup the Legacy or MSI Interrupt handler
2294 *
2295 **********************************************************************/
2296static int
2297igb_allocate_legacy(struct adapter *adapter)
2298{
2299	device_t		dev = adapter->dev;
2300	struct igb_queue	*que = adapter->queues;
2301	int			error, rid = 0;
2302
2303	/* Turn off all interrupts */
2304	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2305
2306	/* MSI RID is 1 */
2307	if (adapter->msix == 1)
2308		rid = 1;
2309
2310	/* We allocate a single interrupt resource */
2311	adapter->res = bus_alloc_resource_any(dev,
2312	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2313	if (adapter->res == NULL) {
2314		device_printf(dev, "Unable to allocate bus resource: "
2315		    "interrupt\n");
2316		return (ENXIO);
2317	}
2318
2319	/*
2320	 * Try allocating a fast interrupt and the associated deferred
2321	 * processing contexts.
2322	 */
2323	TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2324	/* Make tasklet for deferred link handling */
2325	TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2326	que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2327	    taskqueue_thread_enqueue, &que->tq);
2328	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2329	    device_get_nameunit(adapter->dev));
2330	if ((error = bus_setup_intr(dev, adapter->res,
2331	    INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2332	    adapter, &adapter->tag)) != 0) {
2333		device_printf(dev, "Failed to register fast interrupt "
2334			    "handler: %d\n", error);
2335		taskqueue_free(que->tq);
2336		que->tq = NULL;
2337		return (error);
2338	}
2339
2340	return (0);
2341}
2342
2343
2344/*********************************************************************
2345 *
2346 *  Setup the MSIX Queue Interrupt handlers:
2347 *
2348 **********************************************************************/
2349static int
2350igb_allocate_msix(struct adapter *adapter)
2351{
2352	device_t		dev = adapter->dev;
2353	struct igb_queue	*que = adapter->queues;
2354	int			error, rid, vector = 0;
2355
2356	/* Be sure to start with all interrupts disabled */
2357	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2358	E1000_WRITE_FLUSH(&adapter->hw);
2359
2360	for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2361		rid = vector +1;
2362		que->res = bus_alloc_resource_any(dev,
2363		    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2364		if (que->res == NULL) {
2365			device_printf(dev,
2366			    "Unable to allocate bus resource: "
2367			    "MSIX Queue Interrupt\n");
2368			return (ENXIO);
2369		}
2370		error = bus_setup_intr(dev, que->res,
2371	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2372		    igb_msix_que, que, &que->tag);
2373		if (error) {
2374			que->res = NULL;
2375			device_printf(dev, "Failed to register Queue handler");
2376			return (error);
2377		}
2378#if __FreeBSD_version >= 800504
2379		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2380#endif
2381		que->msix = vector;
2382		if (adapter->hw.mac.type == e1000_82575)
2383			que->eims = E1000_EICR_TX_QUEUE0 << i;
2384		else
2385			que->eims = 1 << vector;
2386		/*
2387		** Bind the msix vector, and thus the
2388		** rings to the corresponding cpu.
2389		*/
2390		if (adapter->num_queues > 1)
2391			bus_bind_intr(dev, que->res, i);
2392		/* Make tasklet for deferred handling */
2393		TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2394		que->tq = taskqueue_create_fast("igb_que", M_NOWAIT,
2395		    taskqueue_thread_enqueue, &que->tq);
2396		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2397		    device_get_nameunit(adapter->dev));
2398	}
2399
2400	/* And Link */
2401	rid = vector + 1;
2402	adapter->res = bus_alloc_resource_any(dev,
2403	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2404	if (adapter->res == NULL) {
2405		device_printf(dev,
2406		    "Unable to allocate bus resource: "
2407		    "MSIX Link Interrupt\n");
2408		return (ENXIO);
2409	}
2410	if ((error = bus_setup_intr(dev, adapter->res,
2411	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2412	    igb_msix_link, adapter, &adapter->tag)) != 0) {
2413		device_printf(dev, "Failed to register Link handler");
2414		return (error);
2415	}
2416#if __FreeBSD_version >= 800504
2417	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2418#endif
2419	adapter->linkvec = vector;
2420
2421	return (0);
2422}
2423
2424
2425static void
2426igb_configure_queues(struct adapter *adapter)
2427{
2428	struct	e1000_hw	*hw = &adapter->hw;
2429	struct	igb_queue	*que;
2430	u32			tmp, ivar = 0, newitr = 0;
2431
2432	/* First turn on RSS capability */
2433	if (adapter->hw.mac.type != e1000_82575)
2434		E1000_WRITE_REG(hw, E1000_GPIE,
2435		    E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2436		    E1000_GPIE_PBA | E1000_GPIE_NSICR);
2437
2438	/* Turn on MSIX */
2439	switch (adapter->hw.mac.type) {
2440	case e1000_82580:
2441	case e1000_i350:
2442	case e1000_vfadapt:
2443	case e1000_vfadapt_i350:
2444		/* RX entries */
2445		for (int i = 0; i < adapter->num_queues; i++) {
2446			u32 index = i >> 1;
2447			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2448			que = &adapter->queues[i];
2449			if (i & 1) {
2450				ivar &= 0xFF00FFFF;
2451				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2452			} else {
2453				ivar &= 0xFFFFFF00;
2454				ivar |= que->msix | E1000_IVAR_VALID;
2455			}
2456			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2457		}
2458		/* TX entries */
2459		for (int i = 0; i < adapter->num_queues; i++) {
2460			u32 index = i >> 1;
2461			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2462			que = &adapter->queues[i];
2463			if (i & 1) {
2464				ivar &= 0x00FFFFFF;
2465				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2466			} else {
2467				ivar &= 0xFFFF00FF;
2468				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2469			}
2470			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2471			adapter->que_mask |= que->eims;
2472		}
2473
2474		/* And for the link interrupt */
2475		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2476		adapter->link_mask = 1 << adapter->linkvec;
2477		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2478		break;
2479	case e1000_82576:
2480		/* RX entries */
2481		for (int i = 0; i < adapter->num_queues; i++) {
2482			u32 index = i & 0x7; /* Each IVAR has two entries */
2483			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2484			que = &adapter->queues[i];
2485			if (i < 8) {
2486				ivar &= 0xFFFFFF00;
2487				ivar |= que->msix | E1000_IVAR_VALID;
2488			} else {
2489				ivar &= 0xFF00FFFF;
2490				ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2491			}
2492			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2493			adapter->que_mask |= que->eims;
2494		}
2495		/* TX entries */
2496		for (int i = 0; i < adapter->num_queues; i++) {
2497			u32 index = i & 0x7; /* Each IVAR has two entries */
2498			ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2499			que = &adapter->queues[i];
2500			if (i < 8) {
2501				ivar &= 0xFFFF00FF;
2502				ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2503			} else {
2504				ivar &= 0x00FFFFFF;
2505				ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2506			}
2507			E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2508			adapter->que_mask |= que->eims;
2509		}
2510
2511		/* And for the link interrupt */
2512		ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2513		adapter->link_mask = 1 << adapter->linkvec;
2514		E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2515		break;
2516
2517	case e1000_82575:
2518                /* enable MSI-X support*/
2519		tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2520                tmp |= E1000_CTRL_EXT_PBA_CLR;
2521                /* Auto-Mask interrupts upon ICR read. */
2522                tmp |= E1000_CTRL_EXT_EIAME;
2523                tmp |= E1000_CTRL_EXT_IRCA;
2524                E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2525
2526		/* Queues */
2527		for (int i = 0; i < adapter->num_queues; i++) {
2528			que = &adapter->queues[i];
2529			tmp = E1000_EICR_RX_QUEUE0 << i;
2530			tmp |= E1000_EICR_TX_QUEUE0 << i;
2531			que->eims = tmp;
2532			E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2533			    i, que->eims);
2534			adapter->que_mask |= que->eims;
2535		}
2536
2537		/* Link */
2538		E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2539		    E1000_EIMS_OTHER);
2540		adapter->link_mask |= E1000_EIMS_OTHER;
2541	default:
2542		break;
2543	}
2544
2545	/* Set the starting interrupt rate */
2546	if (igb_max_interrupt_rate > 0)
2547		newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2548
2549        if (hw->mac.type == e1000_82575)
2550                newitr |= newitr << 16;
2551        else
2552                newitr |= E1000_EITR_CNT_IGNR;
2553
2554	for (int i = 0; i < adapter->num_queues; i++) {
2555		que = &adapter->queues[i];
2556		E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2557	}
2558
2559	return;
2560}
2561
2562
2563static void
2564igb_free_pci_resources(struct adapter *adapter)
2565{
2566	struct		igb_queue *que = adapter->queues;
2567	device_t	dev = adapter->dev;
2568	int		rid;
2569
2570	/*
2571	** There is a slight possibility of a failure mode
2572	** in attach that will result in entering this function
2573	** before interrupt resources have been initialized, and
2574	** in that case we do not want to execute the loops below
2575	** We can detect this reliably by the state of the adapter
2576	** res pointer.
2577	*/
2578	if (adapter->res == NULL)
2579		goto mem;
2580
2581	/*
2582	 * First release all the interrupt resources:
2583	 */
2584	for (int i = 0; i < adapter->num_queues; i++, que++) {
2585		rid = que->msix + 1;
2586		if (que->tag != NULL) {
2587			bus_teardown_intr(dev, que->res, que->tag);
2588			que->tag = NULL;
2589		}
2590		if (que->res != NULL)
2591			bus_release_resource(dev,
2592			    SYS_RES_IRQ, rid, que->res);
2593	}
2594
2595	/* Clean the Legacy or Link interrupt last */
2596	if (adapter->linkvec) /* we are doing MSIX */
2597		rid = adapter->linkvec + 1;
2598	else
2599		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2600
2601	if (adapter->tag != NULL) {
2602		bus_teardown_intr(dev, adapter->res, adapter->tag);
2603		adapter->tag = NULL;
2604	}
2605	if (adapter->res != NULL)
2606		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2607
2608mem:
2609	if (adapter->msix)
2610		pci_release_msi(dev);
2611
2612	if (adapter->msix_mem != NULL)
2613		bus_release_resource(dev, SYS_RES_MEMORY,
2614		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2615
2616	if (adapter->pci_mem != NULL)
2617		bus_release_resource(dev, SYS_RES_MEMORY,
2618		    PCIR_BAR(0), adapter->pci_mem);
2619
2620}
2621
2622/*
2623 * Setup Either MSI/X or MSI
2624 */
2625static int
2626igb_setup_msix(struct adapter *adapter)
2627{
2628	device_t dev = adapter->dev;
2629	int rid, want, queues, msgs;
2630
2631	/* tuneable override */
2632	if (igb_enable_msix == 0)
2633		goto msi;
2634
2635	/* First try MSI/X */
2636	rid = PCIR_BAR(IGB_MSIX_BAR);
2637	adapter->msix_mem = bus_alloc_resource_any(dev,
2638	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2639       	if (!adapter->msix_mem) {
2640		/* May not be enabled */
2641		device_printf(adapter->dev,
2642		    "Unable to map MSIX table \n");
2643		goto msi;
2644	}
2645
2646	msgs = pci_msix_count(dev);
2647	if (msgs == 0) { /* system has msix disabled */
2648		bus_release_resource(dev, SYS_RES_MEMORY,
2649		    PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2650		adapter->msix_mem = NULL;
2651		goto msi;
2652	}
2653
2654	/* Figure out a reasonable auto config value */
2655	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2656
2657	/* Manual override */
2658	if (igb_num_queues != 0)
2659		queues = igb_num_queues;
2660	if (queues > 8)  /* max queues */
2661		queues = 8;
2662
2663	/* Can have max of 4 queues on 82575 */
2664	if ((adapter->hw.mac.type == e1000_82575) && (queues > 4))
2665		queues = 4;
2666
2667	/* Limit the VF devices to one queue */
2668	if (adapter->vf_ifp)
2669		queues = 1;
2670
2671	/*
2672	** One vector (RX/TX pair) per queue
2673	** plus an additional for Link interrupt
2674	*/
2675	want = queues + 1;
2676	if (msgs >= want)
2677		msgs = want;
2678	else {
2679               	device_printf(adapter->dev,
2680		    "MSIX Configuration Problem, "
2681		    "%d vectors configured, but %d queues wanted!\n",
2682		    msgs, want);
2683		return (ENXIO);
2684	}
2685	if ((msgs) && pci_alloc_msix(dev, &msgs) == 0) {
2686               	device_printf(adapter->dev,
2687		    "Using MSIX interrupts with %d vectors\n", msgs);
2688		adapter->num_queues = queues;
2689		return (msgs);
2690	}
2691msi:
2692       	msgs = pci_msi_count(dev);
2693       	if (msgs == 1 && pci_alloc_msi(dev, &msgs) == 0)
2694               	device_printf(adapter->dev,"Using MSI interrupt\n");
2695	return (msgs);
2696}
2697
2698/*********************************************************************
2699 *
2700 *  Set up an fresh starting state
2701 *
2702 **********************************************************************/
2703static void
2704igb_reset(struct adapter *adapter)
2705{
2706	device_t	dev = adapter->dev;
2707	struct e1000_hw *hw = &adapter->hw;
2708	struct e1000_fc_info *fc = &hw->fc;
2709	struct ifnet	*ifp = adapter->ifp;
2710	u32		pba = 0;
2711	u16		hwm;
2712
2713	INIT_DEBUGOUT("igb_reset: begin");
2714
2715	/* Let the firmware know the OS is in control */
2716	igb_get_hw_control(adapter);
2717
2718	/*
2719	 * Packet Buffer Allocation (PBA)
2720	 * Writing PBA sets the receive portion of the buffer
2721	 * the remainder is used for the transmit buffer.
2722	 */
2723	switch (hw->mac.type) {
2724	case e1000_82575:
2725		pba = E1000_PBA_32K;
2726		break;
2727	case e1000_82576:
2728	case e1000_vfadapt:
2729		pba = E1000_READ_REG(hw, E1000_RXPBS);
2730		pba &= E1000_RXPBS_SIZE_MASK_82576;
2731		break;
2732	case e1000_82580:
2733	case e1000_i350:
2734	case e1000_vfadapt_i350:
2735		pba = E1000_READ_REG(hw, E1000_RXPBS);
2736		pba = e1000_rxpbs_adjust_82580(pba);
2737		break;
2738	default:
2739		break;
2740	}
2741
2742	/* Special needs in case of Jumbo frames */
2743	if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
2744		u32 tx_space, min_tx, min_rx;
2745		pba = E1000_READ_REG(hw, E1000_PBA);
2746		tx_space = pba >> 16;
2747		pba &= 0xffff;
2748		min_tx = (adapter->max_frame_size +
2749		    sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
2750		min_tx = roundup2(min_tx, 1024);
2751		min_tx >>= 10;
2752                min_rx = adapter->max_frame_size;
2753                min_rx = roundup2(min_rx, 1024);
2754                min_rx >>= 10;
2755		if (tx_space < min_tx &&
2756		    ((min_tx - tx_space) < pba)) {
2757			pba = pba - (min_tx - tx_space);
2758			/*
2759                         * if short on rx space, rx wins
2760                         * and must trump tx adjustment
2761			 */
2762                        if (pba < min_rx)
2763                                pba = min_rx;
2764		}
2765		E1000_WRITE_REG(hw, E1000_PBA, pba);
2766	}
2767
2768	INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
2769
2770	/*
2771	 * These parameters control the automatic generation (Tx) and
2772	 * response (Rx) to Ethernet PAUSE frames.
2773	 * - High water mark should allow for at least two frames to be
2774	 *   received after sending an XOFF.
2775	 * - Low water mark works best when it is very near the high water mark.
2776	 *   This allows the receiver to restart by sending XON when it has
2777	 *   drained a bit.
2778	 */
2779	hwm = min(((pba << 10) * 9 / 10),
2780	    ((pba << 10) - 2 * adapter->max_frame_size));
2781
2782	if (hw->mac.type < e1000_82576) {
2783		fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
2784		fc->low_water = fc->high_water - 8;
2785	} else {
2786		fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
2787		fc->low_water = fc->high_water - 16;
2788	}
2789
2790	fc->pause_time = IGB_FC_PAUSE_TIME;
2791	fc->send_xon = TRUE;
2792	if (adapter->fc)
2793		fc->requested_mode = adapter->fc;
2794	else
2795		fc->requested_mode = e1000_fc_default;
2796
2797	/* Issue a global reset */
2798	e1000_reset_hw(hw);
2799	E1000_WRITE_REG(hw, E1000_WUC, 0);
2800
2801	if (e1000_init_hw(hw) < 0)
2802		device_printf(dev, "Hardware Initialization Failed\n");
2803
2804	/* Setup DMA Coalescing */
2805	if (hw->mac.type == e1000_i350) {
2806		u32 reg = ~E1000_DMACR_DMAC_EN;
2807
2808		if (adapter->dmac == 0) { /* Disabling it */
2809			E1000_WRITE_REG(hw, E1000_DMACR, reg);
2810			goto reset_out;
2811		}
2812
2813		hwm = (pba - 4) << 10;
2814		reg = (((pba-6) << E1000_DMACR_DMACTHR_SHIFT)
2815		    & E1000_DMACR_DMACTHR_MASK);
2816
2817		/* transition to L0x or L1 if available..*/
2818		reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2819
2820		/* timer = value in adapter->dmac in 32usec intervals */
2821		reg |= (adapter->dmac >> 5);
2822		E1000_WRITE_REG(hw, E1000_DMACR, reg);
2823
2824		/* No lower threshold */
2825		E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2826
2827		/* set hwm to PBA -  2 * max frame size */
2828		E1000_WRITE_REG(hw, E1000_FCRTC, hwm);
2829
2830		/* Set the interval before transition */
2831		reg = E1000_READ_REG(hw, E1000_DMCTLX);
2832		reg |= 0x800000FF; /* 255 usec */
2833		E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2834
2835		/* free space in tx packet buffer to wake from DMA coal */
2836		E1000_WRITE_REG(hw, E1000_DMCTXTH,
2837		    (20480 - (2 * adapter->max_frame_size)) >> 6);
2838
2839		/* make low power state decision controlled by DMA coal */
2840		reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2841		E1000_WRITE_REG(hw, E1000_PCIEMISC,
2842		    reg | E1000_PCIEMISC_LX_DECISION);
2843		device_printf(dev, "DMA Coalescing enabled\n");
2844	}
2845
2846reset_out:
2847	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
2848	e1000_get_phy_info(hw);
2849	e1000_check_for_link(hw);
2850	return;
2851}
2852
2853/*********************************************************************
2854 *
2855 *  Setup networking device structure and register an interface.
2856 *
2857 **********************************************************************/
2858static int
2859igb_setup_interface(device_t dev, struct adapter *adapter)
2860{
2861	struct ifnet   *ifp;
2862
2863	INIT_DEBUGOUT("igb_setup_interface: begin");
2864
2865	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2866	if (ifp == NULL) {
2867		device_printf(dev, "can not allocate ifnet structure\n");
2868		return (-1);
2869	}
2870	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2871	ifp->if_mtu = ETHERMTU;
2872	ifp->if_init =  igb_init;
2873	ifp->if_softc = adapter;
2874	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2875	ifp->if_ioctl = igb_ioctl;
2876	ifp->if_start = igb_start;
2877#if __FreeBSD_version >= 800000
2878	ifp->if_transmit = igb_mq_start;
2879	ifp->if_qflush = igb_qflush;
2880#endif
2881	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
2882	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
2883	IFQ_SET_READY(&ifp->if_snd);
2884
2885	ether_ifattach(ifp, adapter->hw.mac.addr);
2886
2887	ifp->if_capabilities = ifp->if_capenable = 0;
2888
2889	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
2890	ifp->if_capabilities |= IFCAP_TSO4;
2891	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2892	ifp->if_capenable = ifp->if_capabilities;
2893
2894	/* Don't enable LRO by default */
2895	ifp->if_capabilities |= IFCAP_LRO;
2896
2897#ifdef DEVICE_POLLING
2898	ifp->if_capabilities |= IFCAP_POLLING;
2899#endif
2900
2901	/*
2902	 * Tell the upper layer(s) we
2903	 * support full VLAN capability.
2904	 */
2905	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2906	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2907			     |  IFCAP_VLAN_HWTSO
2908			     |  IFCAP_VLAN_MTU;
2909	ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
2910			  |  IFCAP_VLAN_HWTSO
2911			  |  IFCAP_VLAN_MTU;
2912
2913	/*
2914	** Don't turn this on by default, if vlans are
2915	** created on another pseudo device (eg. lagg)
2916	** then vlan events are not passed thru, breaking
2917	** operation, but with HW FILTER off it works. If
2918	** using vlans directly on the igb driver you can
2919	** enable this and get full hardware tag filtering.
2920	*/
2921	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2922
2923	/*
2924	 * Specify the media types supported by this adapter and register
2925	 * callbacks to update media and link information
2926	 */
2927	ifmedia_init(&adapter->media, IFM_IMASK,
2928	    igb_media_change, igb_media_status);
2929	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2930	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2931		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
2932			    0, NULL);
2933		ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
2934	} else {
2935		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2936		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2937			    0, NULL);
2938		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2939			    0, NULL);
2940		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2941			    0, NULL);
2942		if (adapter->hw.phy.type != e1000_phy_ife) {
2943			ifmedia_add(&adapter->media,
2944				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2945			ifmedia_add(&adapter->media,
2946				IFM_ETHER | IFM_1000_T, 0, NULL);
2947		}
2948	}
2949	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2950	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2951	return (0);
2952}
2953
2954
2955/*
2956 * Manage DMA'able memory.
2957 */
2958static void
2959igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2960{
2961	if (error)
2962		return;
2963	*(bus_addr_t *) arg = segs[0].ds_addr;
2964}
2965
2966static int
2967igb_dma_malloc(struct adapter *adapter, bus_size_t size,
2968        struct igb_dma_alloc *dma, int mapflags)
2969{
2970	int error;
2971
2972	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2973				IGB_DBA_ALIGN, 0,	/* alignment, bounds */
2974				BUS_SPACE_MAXADDR,	/* lowaddr */
2975				BUS_SPACE_MAXADDR,	/* highaddr */
2976				NULL, NULL,		/* filter, filterarg */
2977				size,			/* maxsize */
2978				1,			/* nsegments */
2979				size,			/* maxsegsize */
2980				0,			/* flags */
2981				NULL,			/* lockfunc */
2982				NULL,			/* lockarg */
2983				&dma->dma_tag);
2984	if (error) {
2985		device_printf(adapter->dev,
2986		    "%s: bus_dma_tag_create failed: %d\n",
2987		    __func__, error);
2988		goto fail_0;
2989	}
2990
2991	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2992	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2993	if (error) {
2994		device_printf(adapter->dev,
2995		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2996		    __func__, (uintmax_t)size, error);
2997		goto fail_2;
2998	}
2999
3000	dma->dma_paddr = 0;
3001	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3002	    size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3003	if (error || dma->dma_paddr == 0) {
3004		device_printf(adapter->dev,
3005		    "%s: bus_dmamap_load failed: %d\n",
3006		    __func__, error);
3007		goto fail_3;
3008	}
3009
3010	return (0);
3011
3012fail_3:
3013	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3014fail_2:
3015	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3016	bus_dma_tag_destroy(dma->dma_tag);
3017fail_0:
3018	dma->dma_map = NULL;
3019	dma->dma_tag = NULL;
3020
3021	return (error);
3022}
3023
3024static void
3025igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3026{
3027	if (dma->dma_tag == NULL)
3028		return;
3029	if (dma->dma_map != NULL) {
3030		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3031		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3032		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3033		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3034		dma->dma_map = NULL;
3035	}
3036	bus_dma_tag_destroy(dma->dma_tag);
3037	dma->dma_tag = NULL;
3038}
3039
3040
3041/*********************************************************************
3042 *
3043 *  Allocate memory for the transmit and receive rings, and then
3044 *  the descriptors associated with each, called only once at attach.
3045 *
3046 **********************************************************************/
3047static int
3048igb_allocate_queues(struct adapter *adapter)
3049{
3050	device_t dev = adapter->dev;
3051	struct igb_queue	*que = NULL;
3052	struct tx_ring		*txr = NULL;
3053	struct rx_ring		*rxr = NULL;
3054	int rsize, tsize, error = E1000_SUCCESS;
3055	int txconf = 0, rxconf = 0;
3056
3057	/* First allocate the top level queue structs */
3058	if (!(adapter->queues =
3059	    (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3060	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3061		device_printf(dev, "Unable to allocate queue memory\n");
3062		error = ENOMEM;
3063		goto fail;
3064	}
3065
3066	/* Next allocate the TX ring struct memory */
3067	if (!(adapter->tx_rings =
3068	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3069	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3070		device_printf(dev, "Unable to allocate TX ring memory\n");
3071		error = ENOMEM;
3072		goto tx_fail;
3073	}
3074
3075	/* Now allocate the RX */
3076	if (!(adapter->rx_rings =
3077	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3078	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3079		device_printf(dev, "Unable to allocate RX ring memory\n");
3080		error = ENOMEM;
3081		goto rx_fail;
3082	}
3083
3084	tsize = roundup2(adapter->num_tx_desc *
3085	    sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3086	/*
3087	 * Now set up the TX queues, txconf is needed to handle the
3088	 * possibility that things fail midcourse and we need to
3089	 * undo memory gracefully
3090	 */
3091	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3092		/* Set up some basics */
3093		txr = &adapter->tx_rings[i];
3094		txr->adapter = adapter;
3095		txr->me = i;
3096
3097		/* Initialize the TX lock */
3098		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3099		    device_get_nameunit(dev), txr->me);
3100		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3101
3102		if (igb_dma_malloc(adapter, tsize,
3103			&txr->txdma, BUS_DMA_NOWAIT)) {
3104			device_printf(dev,
3105			    "Unable to allocate TX Descriptor memory\n");
3106			error = ENOMEM;
3107			goto err_tx_desc;
3108		}
3109		txr->tx_base = (struct e1000_tx_desc *)txr->txdma.dma_vaddr;
3110		bzero((void *)txr->tx_base, tsize);
3111
3112        	/* Now allocate transmit buffers for the ring */
3113        	if (igb_allocate_transmit_buffers(txr)) {
3114			device_printf(dev,
3115			    "Critical Failure setting up transmit buffers\n");
3116			error = ENOMEM;
3117			goto err_tx_desc;
3118        	}
3119#if __FreeBSD_version >= 800000
3120		/* Allocate a buf ring */
3121		txr->br = buf_ring_alloc(IGB_BR_SIZE, M_DEVBUF,
3122		    M_WAITOK, &txr->tx_mtx);
3123#endif
3124	}
3125
3126	/*
3127	 * Next the RX queues...
3128	 */
3129	rsize = roundup2(adapter->num_rx_desc *
3130	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3131	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3132		rxr = &adapter->rx_rings[i];
3133		rxr->adapter = adapter;
3134		rxr->me = i;
3135
3136		/* Initialize the RX lock */
3137		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3138		    device_get_nameunit(dev), txr->me);
3139		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3140
3141		if (igb_dma_malloc(adapter, rsize,
3142			&rxr->rxdma, BUS_DMA_NOWAIT)) {
3143			device_printf(dev,
3144			    "Unable to allocate RxDescriptor memory\n");
3145			error = ENOMEM;
3146			goto err_rx_desc;
3147		}
3148		rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3149		bzero((void *)rxr->rx_base, rsize);
3150
3151        	/* Allocate receive buffers for the ring*/
3152		if (igb_allocate_receive_buffers(rxr)) {
3153			device_printf(dev,
3154			    "Critical Failure setting up receive buffers\n");
3155			error = ENOMEM;
3156			goto err_rx_desc;
3157		}
3158	}
3159
3160	/*
3161	** Finally set up the queue holding structs
3162	*/
3163	for (int i = 0; i < adapter->num_queues; i++) {
3164		que = &adapter->queues[i];
3165		que->adapter = adapter;
3166		que->txr = &adapter->tx_rings[i];
3167		que->rxr = &adapter->rx_rings[i];
3168	}
3169
3170	return (0);
3171
3172err_rx_desc:
3173	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3174		igb_dma_free(adapter, &rxr->rxdma);
3175err_tx_desc:
3176	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3177		igb_dma_free(adapter, &txr->txdma);
3178	free(adapter->rx_rings, M_DEVBUF);
3179rx_fail:
3180#if __FreeBSD_version >= 800000
3181	buf_ring_free(txr->br, M_DEVBUF);
3182#endif
3183	free(adapter->tx_rings, M_DEVBUF);
3184tx_fail:
3185	free(adapter->queues, M_DEVBUF);
3186fail:
3187	return (error);
3188}
3189
3190/*********************************************************************
3191 *
3192 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
3193 *  the information needed to transmit a packet on the wire. This is
3194 *  called only once at attach, setup is done every reset.
3195 *
3196 **********************************************************************/
3197static int
3198igb_allocate_transmit_buffers(struct tx_ring *txr)
3199{
3200	struct adapter *adapter = txr->adapter;
3201	device_t dev = adapter->dev;
3202	struct igb_tx_buffer *txbuf;
3203	int error, i;
3204
3205	/*
3206	 * Setup DMA descriptor areas.
3207	 */
3208	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3209			       1, 0,			/* alignment, bounds */
3210			       BUS_SPACE_MAXADDR,	/* lowaddr */
3211			       BUS_SPACE_MAXADDR,	/* highaddr */
3212			       NULL, NULL,		/* filter, filterarg */
3213			       IGB_TSO_SIZE,		/* maxsize */
3214			       IGB_MAX_SCATTER,		/* nsegments */
3215			       PAGE_SIZE,		/* maxsegsize */
3216			       0,			/* flags */
3217			       NULL,			/* lockfunc */
3218			       NULL,			/* lockfuncarg */
3219			       &txr->txtag))) {
3220		device_printf(dev,"Unable to allocate TX DMA tag\n");
3221		goto fail;
3222	}
3223
3224	if (!(txr->tx_buffers =
3225	    (struct igb_tx_buffer *) malloc(sizeof(struct igb_tx_buffer) *
3226	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3227		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3228		error = ENOMEM;
3229		goto fail;
3230	}
3231
3232        /* Create the descriptor buffer dma maps */
3233	txbuf = txr->tx_buffers;
3234	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3235		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3236		if (error != 0) {
3237			device_printf(dev, "Unable to create TX DMA map\n");
3238			goto fail;
3239		}
3240	}
3241
3242	return 0;
3243fail:
3244	/* We free all, it handles case where we are in the middle */
3245	igb_free_transmit_structures(adapter);
3246	return (error);
3247}
3248
3249/*********************************************************************
3250 *
3251 *  Initialize a transmit ring.
3252 *
3253 **********************************************************************/
3254static void
3255igb_setup_transmit_ring(struct tx_ring *txr)
3256{
3257	struct adapter *adapter = txr->adapter;
3258	struct igb_tx_buffer *txbuf;
3259	int i;
3260
3261	/* Clear the old descriptor contents */
3262	IGB_TX_LOCK(txr);
3263	bzero((void *)txr->tx_base,
3264	      (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3265	/* Reset indices */
3266	txr->next_avail_desc = 0;
3267	txr->next_to_clean = 0;
3268
3269	/* Free any existing tx buffers. */
3270        txbuf = txr->tx_buffers;
3271	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3272		if (txbuf->m_head != NULL) {
3273			bus_dmamap_sync(txr->txtag, txbuf->map,
3274			    BUS_DMASYNC_POSTWRITE);
3275			bus_dmamap_unload(txr->txtag, txbuf->map);
3276			m_freem(txbuf->m_head);
3277			txbuf->m_head = NULL;
3278		}
3279		/* clear the watch index */
3280		txbuf->next_eop = -1;
3281        }
3282
3283	/* Set number of descriptors available */
3284	txr->tx_avail = adapter->num_tx_desc;
3285
3286	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3287	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3288	IGB_TX_UNLOCK(txr);
3289}
3290
3291/*********************************************************************
3292 *
3293 *  Initialize all transmit rings.
3294 *
3295 **********************************************************************/
3296static void
3297igb_setup_transmit_structures(struct adapter *adapter)
3298{
3299	struct tx_ring *txr = adapter->tx_rings;
3300
3301	for (int i = 0; i < adapter->num_queues; i++, txr++)
3302		igb_setup_transmit_ring(txr);
3303
3304	return;
3305}
3306
3307/*********************************************************************
3308 *
3309 *  Enable transmit unit.
3310 *
3311 **********************************************************************/
3312static void
3313igb_initialize_transmit_units(struct adapter *adapter)
3314{
3315	struct tx_ring	*txr = adapter->tx_rings;
3316	struct e1000_hw *hw = &adapter->hw;
3317	u32		tctl, txdctl;
3318
3319	INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3320	tctl = txdctl = 0;
3321
3322	/* Setup the Tx Descriptor Rings */
3323	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3324		u64 bus_addr = txr->txdma.dma_paddr;
3325
3326		E1000_WRITE_REG(hw, E1000_TDLEN(i),
3327		    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3328		E1000_WRITE_REG(hw, E1000_TDBAH(i),
3329		    (uint32_t)(bus_addr >> 32));
3330		E1000_WRITE_REG(hw, E1000_TDBAL(i),
3331		    (uint32_t)bus_addr);
3332
3333		/* Setup the HW Tx Head and Tail descriptor pointers */
3334		E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3335		E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3336
3337		HW_DEBUGOUT2("Base = %x, Length = %x\n",
3338		    E1000_READ_REG(hw, E1000_TDBAL(i)),
3339		    E1000_READ_REG(hw, E1000_TDLEN(i)));
3340
3341		txr->queue_status = IGB_QUEUE_IDLE;
3342
3343		txdctl |= IGB_TX_PTHRESH;
3344		txdctl |= IGB_TX_HTHRESH << 8;
3345		txdctl |= IGB_TX_WTHRESH << 16;
3346		txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3347		E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3348	}
3349
3350	if (adapter->vf_ifp)
3351		return;
3352
3353	e1000_config_collision_dist(hw);
3354
3355	/* Program the Transmit Control Register */
3356	tctl = E1000_READ_REG(hw, E1000_TCTL);
3357	tctl &= ~E1000_TCTL_CT;
3358	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3359		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3360
3361	/* This write will effectively turn on the transmit unit. */
3362	E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3363}
3364
3365/*********************************************************************
3366 *
3367 *  Free all transmit rings.
3368 *
3369 **********************************************************************/
3370static void
3371igb_free_transmit_structures(struct adapter *adapter)
3372{
3373	struct tx_ring *txr = adapter->tx_rings;
3374
3375	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3376		IGB_TX_LOCK(txr);
3377		igb_free_transmit_buffers(txr);
3378		igb_dma_free(adapter, &txr->txdma);
3379		IGB_TX_UNLOCK(txr);
3380		IGB_TX_LOCK_DESTROY(txr);
3381	}
3382	free(adapter->tx_rings, M_DEVBUF);
3383}
3384
3385/*********************************************************************
3386 *
3387 *  Free transmit ring related data structures.
3388 *
3389 **********************************************************************/
3390static void
3391igb_free_transmit_buffers(struct tx_ring *txr)
3392{
3393	struct adapter *adapter = txr->adapter;
3394	struct igb_tx_buffer *tx_buffer;
3395	int             i;
3396
3397	INIT_DEBUGOUT("free_transmit_ring: begin");
3398
3399	if (txr->tx_buffers == NULL)
3400		return;
3401
3402	tx_buffer = txr->tx_buffers;
3403	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3404		if (tx_buffer->m_head != NULL) {
3405			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3406			    BUS_DMASYNC_POSTWRITE);
3407			bus_dmamap_unload(txr->txtag,
3408			    tx_buffer->map);
3409			m_freem(tx_buffer->m_head);
3410			tx_buffer->m_head = NULL;
3411			if (tx_buffer->map != NULL) {
3412				bus_dmamap_destroy(txr->txtag,
3413				    tx_buffer->map);
3414				tx_buffer->map = NULL;
3415			}
3416		} else if (tx_buffer->map != NULL) {
3417			bus_dmamap_unload(txr->txtag,
3418			    tx_buffer->map);
3419			bus_dmamap_destroy(txr->txtag,
3420			    tx_buffer->map);
3421			tx_buffer->map = NULL;
3422		}
3423	}
3424#if __FreeBSD_version >= 800000
3425	if (txr->br != NULL)
3426		buf_ring_free(txr->br, M_DEVBUF);
3427#endif
3428	if (txr->tx_buffers != NULL) {
3429		free(txr->tx_buffers, M_DEVBUF);
3430		txr->tx_buffers = NULL;
3431	}
3432	if (txr->txtag != NULL) {
3433		bus_dma_tag_destroy(txr->txtag);
3434		txr->txtag = NULL;
3435	}
3436	return;
3437}
3438
3439/**********************************************************************
3440 *
3441 *  Setup work for hardware segmentation offload (TSO)
3442 *
3443 **********************************************************************/
3444static boolean_t
3445igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, int ehdrlen,
3446	struct ip *ip, struct tcphdr *th)
3447{
3448	struct adapter *adapter = txr->adapter;
3449	struct e1000_adv_tx_context_desc *TXD;
3450	struct igb_tx_buffer        *tx_buffer;
3451	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3452	u32 mss_l4len_idx = 0;
3453	u16 vtag = 0;
3454	int ctxd, ip_hlen, tcp_hlen;
3455
3456	ctxd = txr->next_avail_desc;
3457	tx_buffer = &txr->tx_buffers[ctxd];
3458	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3459
3460	ip->ip_sum = 0;
3461	ip_hlen = ip->ip_hl << 2;
3462	tcp_hlen = th->th_off << 2;
3463
3464	/* VLAN MACLEN IPLEN */
3465	if (mp->m_flags & M_VLANTAG) {
3466		vtag = htole16(mp->m_pkthdr.ether_vtag);
3467		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3468	}
3469
3470	vlan_macip_lens |= (ehdrlen << E1000_ADVTXD_MACLEN_SHIFT);
3471	vlan_macip_lens |= ip_hlen;
3472	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3473
3474	/* ADV DTYPE TUCMD */
3475	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3476	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3477	type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3478	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3479
3480	/* MSS L4LEN IDX */
3481	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3482	mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3483	/* 82575 needs the queue index added */
3484	if (adapter->hw.mac.type == e1000_82575)
3485		mss_l4len_idx |= txr->me << 4;
3486	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3487
3488	TXD->seqnum_seed = htole32(0);
3489	tx_buffer->m_head = NULL;
3490	tx_buffer->next_eop = -1;
3491
3492	if (++ctxd == adapter->num_tx_desc)
3493		ctxd = 0;
3494
3495	txr->tx_avail--;
3496	txr->next_avail_desc = ctxd;
3497	return TRUE;
3498}
3499
3500
3501/*********************************************************************
3502 *
3503 *  Context Descriptor setup for VLAN or CSUM
3504 *
3505 **********************************************************************/
3506
3507static bool
3508igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp)
3509{
3510	struct adapter *adapter = txr->adapter;
3511	struct e1000_adv_tx_context_desc *TXD;
3512	struct igb_tx_buffer        *tx_buffer;
3513	u32 vlan_macip_lens, type_tucmd_mlhl, mss_l4len_idx;
3514	struct ether_vlan_header *eh;
3515	struct ip *ip = NULL;
3516	struct ip6_hdr *ip6;
3517	int  ehdrlen, ctxd, ip_hlen = 0;
3518	u16	etype, vtag = 0;
3519	u8	ipproto = 0;
3520	bool	offload = TRUE;
3521
3522	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3523		offload = FALSE;
3524
3525	vlan_macip_lens = type_tucmd_mlhl = mss_l4len_idx = 0;
3526	ctxd = txr->next_avail_desc;
3527	tx_buffer = &txr->tx_buffers[ctxd];
3528	TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3529
3530	/*
3531	** In advanced descriptors the vlan tag must
3532	** be placed into the context descriptor, thus
3533	** we need to be here just for that setup.
3534	*/
3535	if (mp->m_flags & M_VLANTAG) {
3536		vtag = htole16(mp->m_pkthdr.ether_vtag);
3537		vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3538	} else if (offload == FALSE)
3539		return FALSE;
3540
3541	/*
3542	 * Determine where frame payload starts.
3543	 * Jump over vlan headers if already present,
3544	 * helpful for QinQ too.
3545	 */
3546	eh = mtod(mp, struct ether_vlan_header *);
3547	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3548		etype = ntohs(eh->evl_proto);
3549		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3550	} else {
3551		etype = ntohs(eh->evl_encap_proto);
3552		ehdrlen = ETHER_HDR_LEN;
3553	}
3554
3555	/* Set the ether header length */
3556	vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3557
3558	switch (etype) {
3559		case ETHERTYPE_IP:
3560			ip = (struct ip *)(mp->m_data + ehdrlen);
3561			ip_hlen = ip->ip_hl << 2;
3562			if (mp->m_len < ehdrlen + ip_hlen) {
3563				offload = FALSE;
3564				break;
3565			}
3566			ipproto = ip->ip_p;
3567			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3568			break;
3569		case ETHERTYPE_IPV6:
3570			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3571			ip_hlen = sizeof(struct ip6_hdr);
3572			ipproto = ip6->ip6_nxt;
3573			type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3574			break;
3575		default:
3576			offload = FALSE;
3577			break;
3578	}
3579
3580	vlan_macip_lens |= ip_hlen;
3581	type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3582
3583	switch (ipproto) {
3584		case IPPROTO_TCP:
3585			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3586				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3587			break;
3588		case IPPROTO_UDP:
3589			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3590				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3591			break;
3592#if __FreeBSD_version >= 800000
3593		case IPPROTO_SCTP:
3594			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3595				type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3596			break;
3597#endif
3598		default:
3599			offload = FALSE;
3600			break;
3601	}
3602
3603	/* 82575 needs the queue index added */
3604	if (adapter->hw.mac.type == e1000_82575)
3605		mss_l4len_idx = txr->me << 4;
3606
3607	/* Now copy bits into descriptor */
3608	TXD->vlan_macip_lens |= htole32(vlan_macip_lens);
3609	TXD->type_tucmd_mlhl |= htole32(type_tucmd_mlhl);
3610	TXD->seqnum_seed = htole32(0);
3611	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3612
3613	tx_buffer->m_head = NULL;
3614	tx_buffer->next_eop = -1;
3615
3616	/* We've consumed the first desc, adjust counters */
3617	if (++ctxd == adapter->num_tx_desc)
3618		ctxd = 0;
3619	txr->next_avail_desc = ctxd;
3620	--txr->tx_avail;
3621
3622        return (offload);
3623}
3624
3625
3626/**********************************************************************
3627 *
3628 *  Examine each tx_buffer in the used queue. If the hardware is done
3629 *  processing the packet then free associated resources. The
3630 *  tx_buffer is put back on the free queue.
3631 *
3632 *  TRUE return means there's work in the ring to clean, FALSE its empty.
3633 **********************************************************************/
3634static bool
3635igb_txeof(struct tx_ring *txr)
3636{
3637	struct adapter	*adapter = txr->adapter;
3638        int first, last, done, processed;
3639        struct igb_tx_buffer *tx_buffer;
3640        struct e1000_tx_desc   *tx_desc, *eop_desc;
3641	struct ifnet   *ifp = adapter->ifp;
3642
3643	IGB_TX_LOCK_ASSERT(txr);
3644
3645        if (txr->tx_avail == adapter->num_tx_desc) {
3646		txr->queue_status = IGB_QUEUE_IDLE;
3647                return FALSE;
3648	}
3649
3650	processed = 0;
3651        first = txr->next_to_clean;
3652        tx_desc = &txr->tx_base[first];
3653        tx_buffer = &txr->tx_buffers[first];
3654	last = tx_buffer->next_eop;
3655        eop_desc = &txr->tx_base[last];
3656
3657	/*
3658	 * What this does is get the index of the
3659	 * first descriptor AFTER the EOP of the
3660	 * first packet, that way we can do the
3661	 * simple comparison on the inner while loop.
3662	 */
3663	if (++last == adapter->num_tx_desc)
3664 		last = 0;
3665	done = last;
3666
3667        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3668            BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3669
3670        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3671		/* We clean the range of the packet */
3672		while (first != done) {
3673                	tx_desc->upper.data = 0;
3674                	tx_desc->lower.data = 0;
3675                	tx_desc->buffer_addr = 0;
3676                	++txr->tx_avail;
3677			++processed;
3678
3679			if (tx_buffer->m_head) {
3680				txr->bytes +=
3681				    tx_buffer->m_head->m_pkthdr.len;
3682				bus_dmamap_sync(txr->txtag,
3683				    tx_buffer->map,
3684				    BUS_DMASYNC_POSTWRITE);
3685				bus_dmamap_unload(txr->txtag,
3686				    tx_buffer->map);
3687
3688                        	m_freem(tx_buffer->m_head);
3689                        	tx_buffer->m_head = NULL;
3690                	}
3691			tx_buffer->next_eop = -1;
3692			txr->watchdog_time = ticks;
3693
3694	                if (++first == adapter->num_tx_desc)
3695				first = 0;
3696
3697	                tx_buffer = &txr->tx_buffers[first];
3698			tx_desc = &txr->tx_base[first];
3699		}
3700		++txr->packets;
3701		++ifp->if_opackets;
3702		/* See if we can continue to the next packet */
3703		last = tx_buffer->next_eop;
3704		if (last != -1) {
3705        		eop_desc = &txr->tx_base[last];
3706			/* Get new done point */
3707			if (++last == adapter->num_tx_desc) last = 0;
3708			done = last;
3709		} else
3710			break;
3711        }
3712        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3713            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3714
3715        txr->next_to_clean = first;
3716
3717	/*
3718	** Watchdog calculation, we know there's
3719	** work outstanding or the first return
3720	** would have been taken, so none processed
3721	** for too long indicates a hang.
3722	*/
3723	if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
3724		txr->queue_status |= IGB_QUEUE_HUNG;
3725        /*
3726         * If we have a minimum free,
3727         * clear depleted state bit
3728         */
3729        if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
3730                txr->queue_status &= ~IGB_QUEUE_DEPLETED;
3731
3732	/* All clean, turn off the watchdog */
3733	if (txr->tx_avail == adapter->num_tx_desc) {
3734		txr->queue_status = IGB_QUEUE_IDLE;
3735		return (FALSE);
3736        }
3737
3738	return (TRUE);
3739}
3740
3741/*********************************************************************
3742 *
3743 *  Refresh mbuf buffers for RX descriptor rings
3744 *   - now keeps its own state so discards due to resource
3745 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3746 *     it just returns, keeping its placeholder, thus it can simply
3747 *     be recalled to try again.
3748 *
3749 **********************************************************************/
3750static void
3751igb_refresh_mbufs(struct rx_ring *rxr, int limit)
3752{
3753	struct adapter		*adapter = rxr->adapter;
3754	bus_dma_segment_t	hseg[1];
3755	bus_dma_segment_t	pseg[1];
3756	struct igb_rx_buf	*rxbuf;
3757	struct mbuf		*mh, *mp;
3758	int			i, j, nsegs, error;
3759	bool			refreshed = FALSE;
3760
3761	i = j = rxr->next_to_refresh;
3762	/*
3763	** Get one descriptor beyond
3764	** our work mark to control
3765	** the loop.
3766        */
3767	if (++j == adapter->num_rx_desc)
3768		j = 0;
3769
3770	while (j != limit) {
3771		rxbuf = &rxr->rx_buffers[i];
3772		/* No hdr mbuf used with header split off */
3773		if (rxr->hdr_split == FALSE)
3774			goto no_split;
3775		if (rxbuf->m_head == NULL) {
3776			mh = m_gethdr(M_DONTWAIT, MT_DATA);
3777			if (mh == NULL)
3778				goto update;
3779		} else
3780			mh = rxbuf->m_head;
3781
3782		mh->m_pkthdr.len = mh->m_len = MHLEN;
3783		mh->m_len = MHLEN;
3784		mh->m_flags |= M_PKTHDR;
3785		/* Get the memory mapping */
3786		error = bus_dmamap_load_mbuf_sg(rxr->htag,
3787		    rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
3788		if (error != 0) {
3789			printf("Refresh mbufs: hdr dmamap load"
3790			    " failure - %d\n", error);
3791			m_free(mh);
3792			rxbuf->m_head = NULL;
3793			goto update;
3794		}
3795		rxbuf->m_head = mh;
3796		bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3797		    BUS_DMASYNC_PREREAD);
3798		rxr->rx_base[i].read.hdr_addr =
3799		    htole64(hseg[0].ds_addr);
3800no_split:
3801		if (rxbuf->m_pack == NULL) {
3802			mp = m_getjcl(M_DONTWAIT, MT_DATA,
3803			    M_PKTHDR, adapter->rx_mbuf_sz);
3804			if (mp == NULL)
3805				goto update;
3806		} else
3807			mp = rxbuf->m_pack;
3808
3809		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
3810		/* Get the memory mapping */
3811		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3812		    rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
3813		if (error != 0) {
3814			printf("Refresh mbufs: payload dmamap load"
3815			    " failure - %d\n", error);
3816			m_free(mp);
3817			rxbuf->m_pack = NULL;
3818			goto update;
3819		}
3820		rxbuf->m_pack = mp;
3821		bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3822		    BUS_DMASYNC_PREREAD);
3823		rxr->rx_base[i].read.pkt_addr =
3824		    htole64(pseg[0].ds_addr);
3825		refreshed = TRUE; /* I feel wefreshed :) */
3826
3827		i = j; /* our next is precalculated */
3828		rxr->next_to_refresh = i;
3829		if (++j == adapter->num_rx_desc)
3830			j = 0;
3831	}
3832update:
3833	if (refreshed) /* update tail */
3834		E1000_WRITE_REG(&adapter->hw,
3835		    E1000_RDT(rxr->me), rxr->next_to_refresh);
3836	return;
3837}
3838
3839
3840/*********************************************************************
3841 *
3842 *  Allocate memory for rx_buffer structures. Since we use one
3843 *  rx_buffer per received packet, the maximum number of rx_buffer's
3844 *  that we'll need is equal to the number of receive descriptors
3845 *  that we've allocated.
3846 *
3847 **********************************************************************/
3848static int
3849igb_allocate_receive_buffers(struct rx_ring *rxr)
3850{
3851	struct	adapter 	*adapter = rxr->adapter;
3852	device_t 		dev = adapter->dev;
3853	struct igb_rx_buf	*rxbuf;
3854	int             	i, bsize, error;
3855
3856	bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
3857	if (!(rxr->rx_buffers =
3858	    (struct igb_rx_buf *) malloc(bsize,
3859	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3860		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3861		error = ENOMEM;
3862		goto fail;
3863	}
3864
3865	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3866				   1, 0,		/* alignment, bounds */
3867				   BUS_SPACE_MAXADDR,	/* lowaddr */
3868				   BUS_SPACE_MAXADDR,	/* highaddr */
3869				   NULL, NULL,		/* filter, filterarg */
3870				   MSIZE,		/* maxsize */
3871				   1,			/* nsegments */
3872				   MSIZE,		/* maxsegsize */
3873				   0,			/* flags */
3874				   NULL,		/* lockfunc */
3875				   NULL,		/* lockfuncarg */
3876				   &rxr->htag))) {
3877		device_printf(dev, "Unable to create RX DMA tag\n");
3878		goto fail;
3879	}
3880
3881	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3882				   1, 0,		/* alignment, bounds */
3883				   BUS_SPACE_MAXADDR,	/* lowaddr */
3884				   BUS_SPACE_MAXADDR,	/* highaddr */
3885				   NULL, NULL,		/* filter, filterarg */
3886				   MJUM9BYTES,		/* maxsize */
3887				   1,			/* nsegments */
3888				   MJUM9BYTES,		/* maxsegsize */
3889				   0,			/* flags */
3890				   NULL,		/* lockfunc */
3891				   NULL,		/* lockfuncarg */
3892				   &rxr->ptag))) {
3893		device_printf(dev, "Unable to create RX payload DMA tag\n");
3894		goto fail;
3895	}
3896
3897	for (i = 0; i < adapter->num_rx_desc; i++) {
3898		rxbuf = &rxr->rx_buffers[i];
3899		error = bus_dmamap_create(rxr->htag,
3900		    BUS_DMA_NOWAIT, &rxbuf->hmap);
3901		if (error) {
3902			device_printf(dev,
3903			    "Unable to create RX head DMA maps\n");
3904			goto fail;
3905		}
3906		error = bus_dmamap_create(rxr->ptag,
3907		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3908		if (error) {
3909			device_printf(dev,
3910			    "Unable to create RX packet DMA maps\n");
3911			goto fail;
3912		}
3913	}
3914
3915	return (0);
3916
3917fail:
3918	/* Frees all, but can handle partial completion */
3919	igb_free_receive_structures(adapter);
3920	return (error);
3921}
3922
3923
3924static void
3925igb_free_receive_ring(struct rx_ring *rxr)
3926{
3927	struct	adapter		*adapter = rxr->adapter;
3928	struct igb_rx_buf	*rxbuf;
3929
3930
3931	for (int i = 0; i < adapter->num_rx_desc; i++) {
3932		rxbuf = &rxr->rx_buffers[i];
3933		if (rxbuf->m_head != NULL) {
3934			bus_dmamap_sync(rxr->htag, rxbuf->hmap,
3935			    BUS_DMASYNC_POSTREAD);
3936			bus_dmamap_unload(rxr->htag, rxbuf->hmap);
3937			rxbuf->m_head->m_flags |= M_PKTHDR;
3938			m_freem(rxbuf->m_head);
3939		}
3940		if (rxbuf->m_pack != NULL) {
3941			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3942			    BUS_DMASYNC_POSTREAD);
3943			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3944			rxbuf->m_pack->m_flags |= M_PKTHDR;
3945			m_freem(rxbuf->m_pack);
3946		}
3947		rxbuf->m_head = NULL;
3948		rxbuf->m_pack = NULL;
3949	}
3950}
3951
3952
3953/*********************************************************************
3954 *
3955 *  Initialize a receive ring and its buffers.
3956 *
3957 **********************************************************************/
3958static int
3959igb_setup_receive_ring(struct rx_ring *rxr)
3960{
3961	struct	adapter		*adapter;
3962	struct  ifnet		*ifp;
3963	device_t		dev;
3964	struct igb_rx_buf	*rxbuf;
3965	bus_dma_segment_t	pseg[1], hseg[1];
3966	struct lro_ctrl		*lro = &rxr->lro;
3967	int			rsize, nsegs, error = 0;
3968
3969	adapter = rxr->adapter;
3970	dev = adapter->dev;
3971	ifp = adapter->ifp;
3972
3973	/* Clear the ring contents */
3974	IGB_RX_LOCK(rxr);
3975	rsize = roundup2(adapter->num_rx_desc *
3976	    sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3977	bzero((void *)rxr->rx_base, rsize);
3978
3979	/*
3980	** Free current RX buffer structures and their mbufs
3981	*/
3982	igb_free_receive_ring(rxr);
3983
3984	/* Configure for header split? */
3985	if (igb_header_split)
3986		rxr->hdr_split = TRUE;
3987
3988        /* Now replenish the ring mbufs */
3989	for (int j = 0; j < adapter->num_rx_desc; ++j) {
3990		struct mbuf	*mh, *mp;
3991
3992		rxbuf = &rxr->rx_buffers[j];
3993		if (rxr->hdr_split == FALSE)
3994			goto skip_head;
3995
3996		/* First the header */
3997		rxbuf->m_head = m_gethdr(M_DONTWAIT, MT_DATA);
3998		if (rxbuf->m_head == NULL) {
3999			error = ENOBUFS;
4000                        goto fail;
4001		}
4002		m_adj(rxbuf->m_head, ETHER_ALIGN);
4003		mh = rxbuf->m_head;
4004		mh->m_len = mh->m_pkthdr.len = MHLEN;
4005		mh->m_flags |= M_PKTHDR;
4006		/* Get the memory mapping */
4007		error = bus_dmamap_load_mbuf_sg(rxr->htag,
4008		    rxbuf->hmap, rxbuf->m_head, hseg,
4009		    &nsegs, BUS_DMA_NOWAIT);
4010		if (error != 0) /* Nothing elegant to do here */
4011                        goto fail;
4012		bus_dmamap_sync(rxr->htag,
4013		    rxbuf->hmap, BUS_DMASYNC_PREREAD);
4014		/* Update descriptor */
4015		rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4016
4017skip_head:
4018		/* Now the payload cluster */
4019		rxbuf->m_pack = m_getjcl(M_DONTWAIT, MT_DATA,
4020		    M_PKTHDR, adapter->rx_mbuf_sz);
4021		if (rxbuf->m_pack == NULL) {
4022			error = ENOBUFS;
4023                        goto fail;
4024		}
4025		mp = rxbuf->m_pack;
4026		mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4027		/* Get the memory mapping */
4028		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4029		    rxbuf->pmap, mp, pseg,
4030		    &nsegs, BUS_DMA_NOWAIT);
4031		if (error != 0)
4032                        goto fail;
4033		bus_dmamap_sync(rxr->ptag,
4034		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4035		/* Update descriptor */
4036		rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4037        }
4038
4039	/* Setup our descriptor indices */
4040	rxr->next_to_check = 0;
4041	rxr->next_to_refresh = adapter->num_rx_desc - 1;
4042	rxr->lro_enabled = FALSE;
4043	rxr->rx_split_packets = 0;
4044	rxr->rx_bytes = 0;
4045
4046	rxr->fmp = NULL;
4047	rxr->lmp = NULL;
4048	rxr->discard = FALSE;
4049
4050	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4051	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4052
4053	/*
4054	** Now set up the LRO interface, we
4055	** also only do head split when LRO
4056	** is enabled, since so often they
4057	** are undesireable in similar setups.
4058	*/
4059	if (ifp->if_capenable & IFCAP_LRO) {
4060		error = tcp_lro_init(lro);
4061		if (error) {
4062			device_printf(dev, "LRO Initialization failed!\n");
4063			goto fail;
4064		}
4065		INIT_DEBUGOUT("RX LRO Initialized\n");
4066		rxr->lro_enabled = TRUE;
4067		lro->ifp = adapter->ifp;
4068	}
4069
4070	IGB_RX_UNLOCK(rxr);
4071	return (0);
4072
4073fail:
4074	igb_free_receive_ring(rxr);
4075	IGB_RX_UNLOCK(rxr);
4076	return (error);
4077}
4078
4079
4080/*********************************************************************
4081 *
4082 *  Initialize all receive rings.
4083 *
4084 **********************************************************************/
4085static int
4086igb_setup_receive_structures(struct adapter *adapter)
4087{
4088	struct rx_ring *rxr = adapter->rx_rings;
4089	int i;
4090
4091	for (i = 0; i < adapter->num_queues; i++, rxr++)
4092		if (igb_setup_receive_ring(rxr))
4093			goto fail;
4094
4095	return (0);
4096fail:
4097	/*
4098	 * Free RX buffers allocated so far, we will only handle
4099	 * the rings that completed, the failing case will have
4100	 * cleaned up for itself. 'i' is the endpoint.
4101	 */
4102	for (int j = 0; j > i; ++j) {
4103		rxr = &adapter->rx_rings[i];
4104		IGB_RX_LOCK(rxr);
4105		igb_free_receive_ring(rxr);
4106		IGB_RX_UNLOCK(rxr);
4107	}
4108
4109	return (ENOBUFS);
4110}
4111
4112/*********************************************************************
4113 *
4114 *  Enable receive unit.
4115 *
4116 **********************************************************************/
4117static void
4118igb_initialize_receive_units(struct adapter *adapter)
4119{
4120	struct rx_ring	*rxr = adapter->rx_rings;
4121	struct ifnet	*ifp = adapter->ifp;
4122	struct e1000_hw *hw = &adapter->hw;
4123	u32		rctl, rxcsum, psize, srrctl = 0;
4124
4125	INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4126
4127	/*
4128	 * Make sure receives are disabled while setting
4129	 * up the descriptor ring
4130	 */
4131	rctl = E1000_READ_REG(hw, E1000_RCTL);
4132	E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4133
4134	/*
4135	** Set up for header split
4136	*/
4137	if (igb_header_split) {
4138		/* Use a standard mbuf for the header */
4139		srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4140		srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4141	} else
4142		srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4143
4144	/*
4145	** Set up for jumbo frames
4146	*/
4147	if (ifp->if_mtu > ETHERMTU) {
4148		rctl |= E1000_RCTL_LPE;
4149		if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4150			srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4151			rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4152		} else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4153			srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4154			rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4155		}
4156		/* Set maximum packet len */
4157		psize = adapter->max_frame_size;
4158		/* are we on a vlan? */
4159		if (adapter->ifp->if_vlantrunk != NULL)
4160			psize += VLAN_TAG_SIZE;
4161		E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4162	} else {
4163		rctl &= ~E1000_RCTL_LPE;
4164		srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4165		rctl |= E1000_RCTL_SZ_2048;
4166	}
4167
4168	/* Setup the Base and Length of the Rx Descriptor Rings */
4169	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4170		u64 bus_addr = rxr->rxdma.dma_paddr;
4171		u32 rxdctl;
4172
4173		E1000_WRITE_REG(hw, E1000_RDLEN(i),
4174		    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4175		E1000_WRITE_REG(hw, E1000_RDBAH(i),
4176		    (uint32_t)(bus_addr >> 32));
4177		E1000_WRITE_REG(hw, E1000_RDBAL(i),
4178		    (uint32_t)bus_addr);
4179		E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4180		/* Enable this Queue */
4181		rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4182		rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4183		rxdctl &= 0xFFF00000;
4184		rxdctl |= IGB_RX_PTHRESH;
4185		rxdctl |= IGB_RX_HTHRESH << 8;
4186		rxdctl |= IGB_RX_WTHRESH << 16;
4187		E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4188	}
4189
4190	/*
4191	** Setup for RX MultiQueue
4192	*/
4193	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4194	if (adapter->num_queues >1) {
4195		u32 random[10], mrqc, shift = 0;
4196		union igb_reta {
4197			u32 dword;
4198			u8  bytes[4];
4199		} reta;
4200
4201		arc4rand(&random, sizeof(random), 0);
4202		if (adapter->hw.mac.type == e1000_82575)
4203			shift = 6;
4204		/* Warning FM follows */
4205		for (int i = 0; i < 128; i++) {
4206			reta.bytes[i & 3] =
4207			    (i % adapter->num_queues) << shift;
4208			if ((i & 3) == 3)
4209				E1000_WRITE_REG(hw,
4210				    E1000_RETA(i >> 2), reta.dword);
4211		}
4212		/* Now fill in hash table */
4213		mrqc = E1000_MRQC_ENABLE_RSS_4Q;
4214		for (int i = 0; i < 10; i++)
4215			E1000_WRITE_REG_ARRAY(hw,
4216			    E1000_RSSRK(0), i, random[i]);
4217
4218		mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4219		    E1000_MRQC_RSS_FIELD_IPV4_TCP);
4220		mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4221		    E1000_MRQC_RSS_FIELD_IPV6_TCP);
4222		mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4223		    E1000_MRQC_RSS_FIELD_IPV6_UDP);
4224		mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4225		    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4226
4227		E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4228
4229		/*
4230		** NOTE: Receive Full-Packet Checksum Offload
4231		** is mutually exclusive with Multiqueue. However
4232		** this is not the same as TCP/IP checksums which
4233		** still work.
4234		*/
4235		rxcsum |= E1000_RXCSUM_PCSD;
4236#if __FreeBSD_version >= 800000
4237		/* For SCTP Offload */
4238		if ((hw->mac.type == e1000_82576)
4239		    && (ifp->if_capenable & IFCAP_RXCSUM))
4240			rxcsum |= E1000_RXCSUM_CRCOFL;
4241#endif
4242	} else {
4243		/* Non RSS setup */
4244		if (ifp->if_capenable & IFCAP_RXCSUM) {
4245			rxcsum |= E1000_RXCSUM_IPPCSE;
4246#if __FreeBSD_version >= 800000
4247			if (adapter->hw.mac.type == e1000_82576)
4248				rxcsum |= E1000_RXCSUM_CRCOFL;
4249#endif
4250		} else
4251			rxcsum &= ~E1000_RXCSUM_TUOFL;
4252	}
4253	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4254
4255	/* Setup the Receive Control Register */
4256	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4257	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4258		   E1000_RCTL_RDMTS_HALF |
4259		   (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4260	/* Strip CRC bytes. */
4261	rctl |= E1000_RCTL_SECRC;
4262	/* Make sure VLAN Filters are off */
4263	rctl &= ~E1000_RCTL_VFE;
4264	/* Don't store bad packets */
4265	rctl &= ~E1000_RCTL_SBP;
4266
4267	/* Enable Receives */
4268	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4269
4270	/*
4271	 * Setup the HW Rx Head and Tail Descriptor Pointers
4272	 *   - needs to be after enable
4273	 */
4274	for (int i = 0; i < adapter->num_queues; i++) {
4275		rxr = &adapter->rx_rings[i];
4276		E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4277		E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4278	}
4279	return;
4280}
4281
4282/*********************************************************************
4283 *
4284 *  Free receive rings.
4285 *
4286 **********************************************************************/
4287static void
4288igb_free_receive_structures(struct adapter *adapter)
4289{
4290	struct rx_ring *rxr = adapter->rx_rings;
4291
4292	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4293		struct lro_ctrl	*lro = &rxr->lro;
4294		igb_free_receive_buffers(rxr);
4295		tcp_lro_free(lro);
4296		igb_dma_free(adapter, &rxr->rxdma);
4297	}
4298
4299	free(adapter->rx_rings, M_DEVBUF);
4300}
4301
4302/*********************************************************************
4303 *
4304 *  Free receive ring data structures.
4305 *
4306 **********************************************************************/
4307static void
4308igb_free_receive_buffers(struct rx_ring *rxr)
4309{
4310	struct adapter		*adapter = rxr->adapter;
4311	struct igb_rx_buf	*rxbuf;
4312	int i;
4313
4314	INIT_DEBUGOUT("free_receive_structures: begin");
4315
4316	/* Cleanup any existing buffers */
4317	if (rxr->rx_buffers != NULL) {
4318		for (i = 0; i < adapter->num_rx_desc; i++) {
4319			rxbuf = &rxr->rx_buffers[i];
4320			if (rxbuf->m_head != NULL) {
4321				bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4322				    BUS_DMASYNC_POSTREAD);
4323				bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4324				rxbuf->m_head->m_flags |= M_PKTHDR;
4325				m_freem(rxbuf->m_head);
4326			}
4327			if (rxbuf->m_pack != NULL) {
4328				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4329				    BUS_DMASYNC_POSTREAD);
4330				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4331				rxbuf->m_pack->m_flags |= M_PKTHDR;
4332				m_freem(rxbuf->m_pack);
4333			}
4334			rxbuf->m_head = NULL;
4335			rxbuf->m_pack = NULL;
4336			if (rxbuf->hmap != NULL) {
4337				bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4338				rxbuf->hmap = NULL;
4339			}
4340			if (rxbuf->pmap != NULL) {
4341				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4342				rxbuf->pmap = NULL;
4343			}
4344		}
4345		if (rxr->rx_buffers != NULL) {
4346			free(rxr->rx_buffers, M_DEVBUF);
4347			rxr->rx_buffers = NULL;
4348		}
4349	}
4350
4351	if (rxr->htag != NULL) {
4352		bus_dma_tag_destroy(rxr->htag);
4353		rxr->htag = NULL;
4354	}
4355	if (rxr->ptag != NULL) {
4356		bus_dma_tag_destroy(rxr->ptag);
4357		rxr->ptag = NULL;
4358	}
4359}
4360
4361static __inline void
4362igb_rx_discard(struct rx_ring *rxr, int i)
4363{
4364	struct igb_rx_buf	*rbuf;
4365
4366	rbuf = &rxr->rx_buffers[i];
4367
4368	/* Partially received? Free the chain */
4369	if (rxr->fmp != NULL) {
4370		rxr->fmp->m_flags |= M_PKTHDR;
4371		m_freem(rxr->fmp);
4372		rxr->fmp = NULL;
4373		rxr->lmp = NULL;
4374	}
4375
4376	/*
4377	** With advanced descriptors the writeback
4378	** clobbers the buffer addrs, so its easier
4379	** to just free the existing mbufs and take
4380	** the normal refresh path to get new buffers
4381	** and mapping.
4382	*/
4383	if (rbuf->m_head) {
4384		m_free(rbuf->m_head);
4385		rbuf->m_head = NULL;
4386	}
4387
4388	if (rbuf->m_pack) {
4389		m_free(rbuf->m_pack);
4390		rbuf->m_pack = NULL;
4391	}
4392
4393	return;
4394}
4395
4396static __inline void
4397igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4398{
4399
4400	/*
4401	 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4402	 * should be computed by hardware. Also it should not have VLAN tag in
4403	 * ethernet header.
4404	 */
4405	if (rxr->lro_enabled &&
4406	    (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4407	    (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4408	    (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4409	    (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4410	    (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4411	    (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4412		/*
4413		 * Send to the stack if:
4414		 **  - LRO not enabled, or
4415		 **  - no LRO resources, or
4416		 **  - lro enqueue fails
4417		 */
4418		if (rxr->lro.lro_cnt != 0)
4419			if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4420				return;
4421	}
4422	IGB_RX_UNLOCK(rxr);
4423	(*ifp->if_input)(ifp, m);
4424	IGB_RX_LOCK(rxr);
4425}
4426
4427/*********************************************************************
4428 *
4429 *  This routine executes in interrupt context. It replenishes
4430 *  the mbufs in the descriptor and sends data which has been
4431 *  dma'ed into host memory to upper layer.
4432 *
4433 *  We loop at most count times if count is > 0, or until done if
4434 *  count < 0.
4435 *
4436 *  Return TRUE if more to clean, FALSE otherwise
4437 *********************************************************************/
4438static bool
4439igb_rxeof(struct igb_queue *que, int count, int *done)
4440{
4441	struct adapter		*adapter = que->adapter;
4442	struct rx_ring		*rxr = que->rxr;
4443	struct ifnet		*ifp = adapter->ifp;
4444	struct lro_ctrl		*lro = &rxr->lro;
4445	struct lro_entry	*queued;
4446	int			i, processed = 0, rxdone = 0;
4447	u32			ptype, staterr = 0;
4448	union e1000_adv_rx_desc	*cur;
4449
4450	IGB_RX_LOCK(rxr);
4451	/* Sync the ring. */
4452	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4453	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4454
4455	/* Main clean loop */
4456	for (i = rxr->next_to_check; count != 0;) {
4457		struct mbuf		*sendmp, *mh, *mp;
4458		struct igb_rx_buf	*rxbuf;
4459		u16			hlen, plen, hdr, vtag;
4460		bool			eop = FALSE;
4461
4462		cur = &rxr->rx_base[i];
4463		staterr = le32toh(cur->wb.upper.status_error);
4464		if ((staterr & E1000_RXD_STAT_DD) == 0)
4465			break;
4466		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4467			break;
4468		count--;
4469		sendmp = mh = mp = NULL;
4470		cur->wb.upper.status_error = 0;
4471		rxbuf = &rxr->rx_buffers[i];
4472		plen = le16toh(cur->wb.upper.length);
4473		ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4474		if ((adapter->hw.mac.type == e1000_i350) &&
4475		    (staterr & E1000_RXDEXT_STATERR_LB))
4476			vtag = be16toh(cur->wb.upper.vlan);
4477		else
4478			vtag = le16toh(cur->wb.upper.vlan);
4479		hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4480		eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4481
4482		/* Make sure all segments of a bad packet are discarded */
4483		if (((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0) ||
4484		    (rxr->discard)) {
4485			ifp->if_ierrors++;
4486			++rxr->rx_discarded;
4487			if (!eop) /* Catch subsequent segs */
4488				rxr->discard = TRUE;
4489			else
4490				rxr->discard = FALSE;
4491			igb_rx_discard(rxr, i);
4492			goto next_desc;
4493		}
4494
4495		/*
4496		** The way the hardware is configured to
4497		** split, it will ONLY use the header buffer
4498		** when header split is enabled, otherwise we
4499		** get normal behavior, ie, both header and
4500		** payload are DMA'd into the payload buffer.
4501		**
4502		** The fmp test is to catch the case where a
4503		** packet spans multiple descriptors, in that
4504		** case only the first header is valid.
4505		*/
4506		if (rxr->hdr_split && rxr->fmp == NULL) {
4507			hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
4508			    E1000_RXDADV_HDRBUFLEN_SHIFT;
4509			if (hlen > IGB_HDR_BUF)
4510				hlen = IGB_HDR_BUF;
4511			mh = rxr->rx_buffers[i].m_head;
4512			mh->m_len = hlen;
4513			/* clear buf pointer for refresh */
4514			rxbuf->m_head = NULL;
4515			/*
4516			** Get the payload length, this
4517			** could be zero if its a small
4518			** packet.
4519			*/
4520			if (plen > 0) {
4521				mp = rxr->rx_buffers[i].m_pack;
4522				mp->m_len = plen;
4523				mh->m_next = mp;
4524				/* clear buf pointer */
4525				rxbuf->m_pack = NULL;
4526				rxr->rx_split_packets++;
4527			}
4528		} else {
4529			/*
4530			** Either no header split, or a
4531			** secondary piece of a fragmented
4532			** split packet.
4533			*/
4534			mh = rxr->rx_buffers[i].m_pack;
4535			mh->m_len = plen;
4536			/* clear buf info for refresh */
4537			rxbuf->m_pack = NULL;
4538		}
4539
4540		++processed; /* So we know when to refresh */
4541
4542		/* Initial frame - setup */
4543		if (rxr->fmp == NULL) {
4544			mh->m_pkthdr.len = mh->m_len;
4545			/* Save the head of the chain */
4546			rxr->fmp = mh;
4547			rxr->lmp = mh;
4548			if (mp != NULL) {
4549				/* Add payload if split */
4550				mh->m_pkthdr.len += mp->m_len;
4551				rxr->lmp = mh->m_next;
4552			}
4553		} else {
4554			/* Chain mbuf's together */
4555			rxr->lmp->m_next = mh;
4556			rxr->lmp = rxr->lmp->m_next;
4557			rxr->fmp->m_pkthdr.len += mh->m_len;
4558		}
4559
4560		if (eop) {
4561			rxr->fmp->m_pkthdr.rcvif = ifp;
4562			ifp->if_ipackets++;
4563			rxr->rx_packets++;
4564			/* capture data for AIM */
4565			rxr->packets++;
4566			rxr->bytes += rxr->fmp->m_pkthdr.len;
4567			rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
4568
4569			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4570				igb_rx_checksum(staterr, rxr->fmp, ptype);
4571
4572			if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4573			    (staterr & E1000_RXD_STAT_VP) != 0) {
4574				rxr->fmp->m_pkthdr.ether_vtag = vtag;
4575				rxr->fmp->m_flags |= M_VLANTAG;
4576			}
4577#if __FreeBSD_version >= 800000
4578			rxr->fmp->m_pkthdr.flowid = que->msix;
4579			rxr->fmp->m_flags |= M_FLOWID;
4580#endif
4581			sendmp = rxr->fmp;
4582			/* Make sure to set M_PKTHDR. */
4583			sendmp->m_flags |= M_PKTHDR;
4584			rxr->fmp = NULL;
4585			rxr->lmp = NULL;
4586		}
4587
4588next_desc:
4589		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4590		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4591
4592		/* Advance our pointers to the next descriptor. */
4593		if (++i == adapter->num_rx_desc)
4594			i = 0;
4595		/*
4596		** Send to the stack or LRO
4597		*/
4598		if (sendmp != NULL) {
4599			rxr->next_to_check = i;
4600			igb_rx_input(rxr, ifp, sendmp, ptype);
4601			i = rxr->next_to_check;
4602			rxdone++;
4603		}
4604
4605		/* Every 8 descriptors we go to refresh mbufs */
4606		if (processed == 8) {
4607                        igb_refresh_mbufs(rxr, i);
4608                        processed = 0;
4609		}
4610	}
4611
4612	/* Catch any remainders */
4613	if (igb_rx_unrefreshed(rxr))
4614		igb_refresh_mbufs(rxr, i);
4615
4616	rxr->next_to_check = i;
4617
4618	/*
4619	 * Flush any outstanding LRO work
4620	 */
4621	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4622		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4623		tcp_lro_flush(lro, queued);
4624	}
4625
4626	if (done != NULL)
4627		*done = rxdone;
4628
4629	IGB_RX_UNLOCK(rxr);
4630	return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
4631}
4632
4633/*********************************************************************
4634 *
4635 *  Verify that the hardware indicated that the checksum is valid.
4636 *  Inform the stack about the status of checksum so that stack
4637 *  doesn't spend time verifying the checksum.
4638 *
4639 *********************************************************************/
4640static void
4641igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
4642{
4643	u16 status = (u16)staterr;
4644	u8  errors = (u8) (staterr >> 24);
4645	int sctp;
4646
4647	/* Ignore Checksum bit is set */
4648	if (status & E1000_RXD_STAT_IXSM) {
4649		mp->m_pkthdr.csum_flags = 0;
4650		return;
4651	}
4652
4653	if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4654	    (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
4655		sctp = 1;
4656	else
4657		sctp = 0;
4658	if (status & E1000_RXD_STAT_IPCS) {
4659		/* Did it pass? */
4660		if (!(errors & E1000_RXD_ERR_IPE)) {
4661			/* IP Checksum Good */
4662			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4663			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4664		} else
4665			mp->m_pkthdr.csum_flags = 0;
4666	}
4667
4668	if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
4669		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4670#if __FreeBSD_version >= 800000
4671		if (sctp) /* reassign */
4672			type = CSUM_SCTP_VALID;
4673#endif
4674		/* Did it pass? */
4675		if (!(errors & E1000_RXD_ERR_TCPE)) {
4676			mp->m_pkthdr.csum_flags |= type;
4677			if (sctp == 0)
4678				mp->m_pkthdr.csum_data = htons(0xffff);
4679		}
4680	}
4681	return;
4682}
4683
4684/*
4685 * This routine is run via an vlan
4686 * config EVENT
4687 */
4688static void
4689igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4690{
4691	struct adapter	*adapter = ifp->if_softc;
4692	u32		index, bit;
4693
4694	if (ifp->if_softc !=  arg)   /* Not our event */
4695		return;
4696
4697	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4698                return;
4699
4700	IGB_CORE_LOCK(adapter);
4701	index = (vtag >> 5) & 0x7F;
4702	bit = vtag & 0x1F;
4703	adapter->shadow_vfta[index] |= (1 << bit);
4704	++adapter->num_vlans;
4705	/* Change hw filter setting */
4706	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4707		igb_setup_vlan_hw_support(adapter);
4708	IGB_CORE_UNLOCK(adapter);
4709}
4710
4711/*
4712 * This routine is run via an vlan
4713 * unconfig EVENT
4714 */
4715static void
4716igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4717{
4718	struct adapter	*adapter = ifp->if_softc;
4719	u32		index, bit;
4720
4721	if (ifp->if_softc !=  arg)
4722		return;
4723
4724	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
4725                return;
4726
4727	IGB_CORE_LOCK(adapter);
4728	index = (vtag >> 5) & 0x7F;
4729	bit = vtag & 0x1F;
4730	adapter->shadow_vfta[index] &= ~(1 << bit);
4731	--adapter->num_vlans;
4732	/* Change hw filter setting */
4733	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
4734		igb_setup_vlan_hw_support(adapter);
4735	IGB_CORE_UNLOCK(adapter);
4736}
4737
4738static void
4739igb_setup_vlan_hw_support(struct adapter *adapter)
4740{
4741	struct e1000_hw *hw = &adapter->hw;
4742	struct ifnet	*ifp = adapter->ifp;
4743	u32             reg;
4744
4745	if (adapter->vf_ifp) {
4746		e1000_rlpml_set_vf(hw,
4747		    adapter->max_frame_size + VLAN_TAG_SIZE);
4748		return;
4749	}
4750
4751	reg = E1000_READ_REG(hw, E1000_CTRL);
4752	reg |= E1000_CTRL_VME;
4753	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4754
4755	/* Enable the Filter Table */
4756	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4757		reg = E1000_READ_REG(hw, E1000_RCTL);
4758		reg &= ~E1000_RCTL_CFIEN;
4759		reg |= E1000_RCTL_VFE;
4760		E1000_WRITE_REG(hw, E1000_RCTL, reg);
4761	}
4762
4763	/* Update the frame size */
4764	E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
4765	    adapter->max_frame_size + VLAN_TAG_SIZE);
4766
4767	/* Don't bother with table if no vlans */
4768	if ((adapter->num_vlans == 0) ||
4769	    ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
4770                return;
4771	/*
4772	** A soft reset zero's out the VFTA, so
4773	** we need to repopulate it now.
4774	*/
4775	for (int i = 0; i < IGB_VFTA_SIZE; i++)
4776                if (adapter->shadow_vfta[i] != 0) {
4777			if (adapter->vf_ifp)
4778				e1000_vfta_set_vf(hw,
4779				    adapter->shadow_vfta[i], TRUE);
4780			else
4781				e1000_write_vfta(hw,
4782				    i, adapter->shadow_vfta[i]);
4783		}
4784}
4785
4786static void
4787igb_enable_intr(struct adapter *adapter)
4788{
4789	/* With RSS set up what to auto clear */
4790	if (adapter->msix_mem) {
4791		u32 mask = (adapter->que_mask | adapter->link_mask);
4792		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
4793		E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
4794		E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
4795		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4796		    E1000_IMS_LSC);
4797	} else {
4798		E1000_WRITE_REG(&adapter->hw, E1000_IMS,
4799		    IMS_ENABLE_MASK);
4800	}
4801	E1000_WRITE_FLUSH(&adapter->hw);
4802
4803	return;
4804}
4805
4806static void
4807igb_disable_intr(struct adapter *adapter)
4808{
4809	if (adapter->msix_mem) {
4810		E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
4811		E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
4812	}
4813	E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
4814	E1000_WRITE_FLUSH(&adapter->hw);
4815	return;
4816}
4817
4818/*
4819 * Bit of a misnomer, what this really means is
4820 * to enable OS management of the system... aka
4821 * to disable special hardware management features
4822 */
4823static void
4824igb_init_manageability(struct adapter *adapter)
4825{
4826	if (adapter->has_manage) {
4827		int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
4828		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4829
4830		/* disable hardware interception of ARP */
4831		manc &= ~(E1000_MANC_ARP_EN);
4832
4833                /* enable receiving management packets to the host */
4834		manc |= E1000_MANC_EN_MNG2HOST;
4835		manc2h |= 1 << 5;  /* Mng Port 623 */
4836		manc2h |= 1 << 6;  /* Mng Port 664 */
4837		E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
4838		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4839	}
4840}
4841
4842/*
4843 * Give control back to hardware management
4844 * controller if there is one.
4845 */
4846static void
4847igb_release_manageability(struct adapter *adapter)
4848{
4849	if (adapter->has_manage) {
4850		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4851
4852		/* re-enable hardware interception of ARP */
4853		manc |= E1000_MANC_ARP_EN;
4854		manc &= ~E1000_MANC_EN_MNG2HOST;
4855
4856		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4857	}
4858}
4859
4860/*
4861 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
4862 * For ASF and Pass Through versions of f/w this means that
4863 * the driver is loaded.
4864 *
4865 */
4866static void
4867igb_get_hw_control(struct adapter *adapter)
4868{
4869	u32 ctrl_ext;
4870
4871	if (adapter->vf_ifp)
4872		return;
4873
4874	/* Let firmware know the driver has taken over */
4875	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4876	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4877	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4878}
4879
4880/*
4881 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
4882 * For ASF and Pass Through versions of f/w this means that the
4883 * driver is no longer loaded.
4884 *
4885 */
4886static void
4887igb_release_hw_control(struct adapter *adapter)
4888{
4889	u32 ctrl_ext;
4890
4891	if (adapter->vf_ifp)
4892		return;
4893
4894	/* Let firmware taken over control of h/w */
4895	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4896	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4897	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4898}
4899
4900static int
4901igb_is_valid_ether_addr(uint8_t *addr)
4902{
4903	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4904
4905	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4906		return (FALSE);
4907	}
4908
4909	return (TRUE);
4910}
4911
4912
4913/*
4914 * Enable PCI Wake On Lan capability
4915 */
4916static void
4917igb_enable_wakeup(device_t dev)
4918{
4919	u16     cap, status;
4920	u8      id;
4921
4922	/* First find the capabilities pointer*/
4923	cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
4924	/* Read the PM Capabilities */
4925	id = pci_read_config(dev, cap, 1);
4926	if (id != PCIY_PMG)     /* Something wrong */
4927		return;
4928	/* OK, we have the power capabilities, so
4929	   now get the status register */
4930	cap += PCIR_POWER_STATUS;
4931	status = pci_read_config(dev, cap, 2);
4932	status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4933	pci_write_config(dev, cap, status, 2);
4934	return;
4935}
4936
4937static void
4938igb_led_func(void *arg, int onoff)
4939{
4940	struct adapter	*adapter = arg;
4941
4942	IGB_CORE_LOCK(adapter);
4943	if (onoff) {
4944		e1000_setup_led(&adapter->hw);
4945		e1000_led_on(&adapter->hw);
4946	} else {
4947		e1000_led_off(&adapter->hw);
4948		e1000_cleanup_led(&adapter->hw);
4949	}
4950	IGB_CORE_UNLOCK(adapter);
4951}
4952
4953/**********************************************************************
4954 *
4955 *  Update the board statistics counters.
4956 *
4957 **********************************************************************/
4958static void
4959igb_update_stats_counters(struct adapter *adapter)
4960{
4961	struct ifnet		*ifp;
4962        struct e1000_hw		*hw = &adapter->hw;
4963	struct e1000_hw_stats	*stats;
4964
4965	/*
4966	** The virtual function adapter has only a
4967	** small controlled set of stats, do only
4968	** those and return.
4969	*/
4970	if (adapter->vf_ifp) {
4971		igb_update_vf_stats_counters(adapter);
4972		return;
4973	}
4974
4975	stats = (struct e1000_hw_stats	*)adapter->stats;
4976
4977	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4978	   (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
4979		stats->symerrs +=
4980		    E1000_READ_REG(hw,E1000_SYMERRS);
4981		stats->sec += E1000_READ_REG(hw, E1000_SEC);
4982	}
4983
4984	stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
4985	stats->mpc += E1000_READ_REG(hw, E1000_MPC);
4986	stats->scc += E1000_READ_REG(hw, E1000_SCC);
4987	stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
4988
4989	stats->mcc += E1000_READ_REG(hw, E1000_MCC);
4990	stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
4991	stats->colc += E1000_READ_REG(hw, E1000_COLC);
4992	stats->dc += E1000_READ_REG(hw, E1000_DC);
4993	stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
4994	stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
4995	stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
4996	/*
4997	** For watchdog management we need to know if we have been
4998	** paused during the last interval, so capture that here.
4999	*/
5000        adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5001        stats->xoffrxc += adapter->pause_frames;
5002	stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5003	stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5004	stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5005	stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5006	stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5007	stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5008	stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5009	stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5010	stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5011	stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5012	stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5013	stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5014
5015	/* For the 64-bit byte counters the low dword must be read first. */
5016	/* Both registers clear on the read of the high dword */
5017
5018	stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5019	    ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5020	stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5021	    ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5022
5023	stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5024	stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5025	stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5026	stats->roc += E1000_READ_REG(hw, E1000_ROC);
5027	stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5028
5029	stats->tor += E1000_READ_REG(hw, E1000_TORH);
5030	stats->tot += E1000_READ_REG(hw, E1000_TOTH);
5031
5032	stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5033	stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5034	stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5035	stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5036	stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5037	stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5038	stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5039	stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5040	stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5041	stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5042
5043	/* Interrupt Counts */
5044
5045	stats->iac += E1000_READ_REG(hw, E1000_IAC);
5046	stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5047	stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5048	stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5049	stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5050	stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5051	stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5052	stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5053	stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5054
5055	/* Host to Card Statistics */
5056
5057	stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5058	stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5059	stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5060	stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5061	stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5062	stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5063	stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5064	stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5065	    ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5066	stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5067	    ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5068	stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5069	stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5070	stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5071
5072	stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5073	stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5074	stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5075	stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5076	stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5077	stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5078
5079	ifp = adapter->ifp;
5080	ifp->if_collisions = stats->colc;
5081
5082	/* Rx Errors */
5083	ifp->if_ierrors = adapter->dropped_pkts + stats->rxerrc +
5084	    stats->crcerrs + stats->algnerrc +
5085	    stats->ruc + stats->roc + stats->mpc + stats->cexterr;
5086
5087	/* Tx Errors */
5088	ifp->if_oerrors = stats->ecol +
5089	    stats->latecol + adapter->watchdog_events;
5090
5091	/* Driver specific counters */
5092	adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5093	adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5094	adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5095	adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5096	adapter->packet_buf_alloc_tx =
5097	    ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5098	adapter->packet_buf_alloc_rx =
5099	    (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5100}
5101
5102
5103/**********************************************************************
5104 *
5105 *  Initialize the VF board statistics counters.
5106 *
5107 **********************************************************************/
5108static void
5109igb_vf_init_stats(struct adapter *adapter)
5110{
5111        struct e1000_hw *hw = &adapter->hw;
5112	struct e1000_vf_stats	*stats;
5113
5114	stats = (struct e1000_vf_stats	*)adapter->stats;
5115	if (stats == NULL)
5116		return;
5117        stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5118        stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5119        stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5120        stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5121        stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5122}
5123
5124/**********************************************************************
5125 *
5126 *  Update the VF board statistics counters.
5127 *
5128 **********************************************************************/
5129static void
5130igb_update_vf_stats_counters(struct adapter *adapter)
5131{
5132	struct e1000_hw *hw = &adapter->hw;
5133	struct e1000_vf_stats	*stats;
5134
5135	if (adapter->link_speed == 0)
5136		return;
5137
5138	stats = (struct e1000_vf_stats	*)adapter->stats;
5139
5140	UPDATE_VF_REG(E1000_VFGPRC,
5141	    stats->last_gprc, stats->gprc);
5142	UPDATE_VF_REG(E1000_VFGORC,
5143	    stats->last_gorc, stats->gorc);
5144	UPDATE_VF_REG(E1000_VFGPTC,
5145	    stats->last_gptc, stats->gptc);
5146	UPDATE_VF_REG(E1000_VFGOTC,
5147	    stats->last_gotc, stats->gotc);
5148	UPDATE_VF_REG(E1000_VFMPRC,
5149	    stats->last_mprc, stats->mprc);
5150}
5151
5152/* Export a single 32-bit register via a read-only sysctl. */
5153static int
5154igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5155{
5156	struct adapter *adapter;
5157	u_int val;
5158
5159	adapter = oidp->oid_arg1;
5160	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5161	return (sysctl_handle_int(oidp, &val, 0, req));
5162}
5163
5164/*
5165**  Tuneable interrupt rate handler
5166*/
5167static int
5168igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5169{
5170	struct igb_queue	*que = ((struct igb_queue *)oidp->oid_arg1);
5171	int			error;
5172	u32			reg, usec, rate;
5173
5174	reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5175	usec = ((reg & 0x7FFC) >> 2);
5176	if (usec > 0)
5177		rate = 1000000 / usec;
5178	else
5179		rate = 0;
5180	error = sysctl_handle_int(oidp, &rate, 0, req);
5181	if (error || !req->newptr)
5182		return error;
5183	return 0;
5184}
5185
5186/*
5187 * Add sysctl variables, one per statistic, to the system.
5188 */
5189static void
5190igb_add_hw_stats(struct adapter *adapter)
5191{
5192	device_t dev = adapter->dev;
5193
5194	struct tx_ring *txr = adapter->tx_rings;
5195	struct rx_ring *rxr = adapter->rx_rings;
5196
5197	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5198	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5199	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5200	struct e1000_hw_stats *stats = adapter->stats;
5201
5202	struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5203	struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5204
5205#define QUEUE_NAME_LEN 32
5206	char namebuf[QUEUE_NAME_LEN];
5207
5208	/* Driver Statistics */
5209	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "link_irq",
5210			CTLFLAG_RD, &adapter->link_irq, 0,
5211			"Link MSIX IRQ Handled");
5212	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5213			CTLFLAG_RD, &adapter->dropped_pkts,
5214			"Driver dropped packets");
5215	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5216			CTLFLAG_RD, &adapter->no_tx_dma_setup,
5217			"Driver tx dma failure in xmit");
5218	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5219			CTLFLAG_RD, &adapter->rx_overruns,
5220			"RX overruns");
5221	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5222			CTLFLAG_RD, &adapter->watchdog_events,
5223			"Watchdog timeouts");
5224
5225	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5226			CTLFLAG_RD, &adapter->device_control,
5227			"Device Control Register");
5228	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5229			CTLFLAG_RD, &adapter->rx_control,
5230			"Receiver Control Register");
5231	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5232			CTLFLAG_RD, &adapter->int_mask,
5233			"Interrupt Mask");
5234	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5235			CTLFLAG_RD, &adapter->eint_mask,
5236			"Extended Interrupt Mask");
5237	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5238			CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5239			"Transmit Buffer Packet Allocation");
5240	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5241			CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5242			"Receive Buffer Packet Allocation");
5243	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5244			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5245			"Flow Control High Watermark");
5246	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5247			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5248			"Flow Control Low Watermark");
5249
5250	for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5251		struct lro_ctrl *lro = &rxr->lro;
5252
5253		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5254		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5255					    CTLFLAG_RD, NULL, "Queue Name");
5256		queue_list = SYSCTL_CHILDREN(queue_node);
5257
5258		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5259				CTLFLAG_RD, &adapter->queues[i],
5260				sizeof(&adapter->queues[i]),
5261				igb_sysctl_interrupt_rate_handler,
5262				"IU", "Interrupt Rate");
5263
5264		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5265				CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5266				igb_sysctl_reg_handler, "IU",
5267 				"Transmit Descriptor Head");
5268		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5269				CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5270				igb_sysctl_reg_handler, "IU",
5271 				"Transmit Descriptor Tail");
5272		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5273				CTLFLAG_RD, &txr->no_desc_avail,
5274				"Queue No Descriptor Available");
5275		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5276				CTLFLAG_RD, &txr->tx_packets,
5277				"Queue Packets Transmitted");
5278
5279		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5280				CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5281				igb_sysctl_reg_handler, "IU",
5282				"Receive Descriptor Head");
5283		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5284				CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5285				igb_sysctl_reg_handler, "IU",
5286				"Receive Descriptor Tail");
5287		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5288				CTLFLAG_RD, &rxr->rx_packets,
5289				"Queue Packets Received");
5290		SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5291				CTLFLAG_RD, &rxr->rx_bytes,
5292				"Queue Bytes Received");
5293		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_queued",
5294				CTLFLAG_RD, &lro->lro_queued, 0,
5295				"LRO Queued");
5296		SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "lro_flushed",
5297				CTLFLAG_RD, &lro->lro_flushed, 0,
5298				"LRO Flushed");
5299	}
5300
5301	/* MAC stats get their own sub node */
5302
5303	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5304				    CTLFLAG_RD, NULL, "MAC Statistics");
5305	stat_list = SYSCTL_CHILDREN(stat_node);
5306
5307	/*
5308	** VF adapter has a very limited set of stats
5309	** since its not managing the metal, so to speak.
5310	*/
5311	if (adapter->vf_ifp) {
5312	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5313			CTLFLAG_RD, &stats->gprc,
5314			"Good Packets Received");
5315	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5316			CTLFLAG_RD, &stats->gptc,
5317			"Good Packets Transmitted");
5318 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5319 			CTLFLAG_RD, &stats->gorc,
5320 			"Good Octets Received");
5321 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5322 			CTLFLAG_RD, &stats->gotc,
5323 			"Good Octets Transmitted");
5324	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5325			CTLFLAG_RD, &stats->mprc,
5326			"Multicast Packets Received");
5327		return;
5328	}
5329
5330	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5331			CTLFLAG_RD, &stats->ecol,
5332			"Excessive collisions");
5333	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5334			CTLFLAG_RD, &stats->scc,
5335			"Single collisions");
5336	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5337			CTLFLAG_RD, &stats->mcc,
5338			"Multiple collisions");
5339	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5340			CTLFLAG_RD, &stats->latecol,
5341			"Late collisions");
5342	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5343			CTLFLAG_RD, &stats->colc,
5344			"Collision Count");
5345	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5346			CTLFLAG_RD, &stats->symerrs,
5347			"Symbol Errors");
5348	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5349			CTLFLAG_RD, &stats->sec,
5350			"Sequence Errors");
5351	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5352			CTLFLAG_RD, &stats->dc,
5353			"Defer Count");
5354	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5355			CTLFLAG_RD, &stats->mpc,
5356			"Missed Packets");
5357	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5358			CTLFLAG_RD, &stats->rnbc,
5359			"Receive No Buffers");
5360	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5361			CTLFLAG_RD, &stats->ruc,
5362			"Receive Undersize");
5363	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5364			CTLFLAG_RD, &stats->rfc,
5365			"Fragmented Packets Received ");
5366	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5367			CTLFLAG_RD, &stats->roc,
5368			"Oversized Packets Received");
5369	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5370			CTLFLAG_RD, &stats->rjc,
5371			"Recevied Jabber");
5372	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
5373			CTLFLAG_RD, &stats->rxerrc,
5374			"Receive Errors");
5375	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5376			CTLFLAG_RD, &stats->crcerrs,
5377			"CRC errors");
5378	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
5379			CTLFLAG_RD, &stats->algnerrc,
5380			"Alignment Errors");
5381	/* On 82575 these are collision counts */
5382	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
5383			CTLFLAG_RD, &stats->cexterr,
5384			"Collision/Carrier extension errors");
5385	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5386			CTLFLAG_RD, &stats->xonrxc,
5387			"XON Received");
5388	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5389			CTLFLAG_RD, &stats->xontxc,
5390			"XON Transmitted");
5391	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5392			CTLFLAG_RD, &stats->xoffrxc,
5393			"XOFF Received");
5394	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5395			CTLFLAG_RD, &stats->xofftxc,
5396			"XOFF Transmitted");
5397	/* Packet Reception Stats */
5398	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
5399			CTLFLAG_RD, &stats->tpr,
5400			"Total Packets Received ");
5401	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5402			CTLFLAG_RD, &stats->gprc,
5403			"Good Packets Received");
5404	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
5405			CTLFLAG_RD, &stats->bprc,
5406			"Broadcast Packets Received");
5407	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5408			CTLFLAG_RD, &stats->mprc,
5409			"Multicast Packets Received");
5410	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5411			CTLFLAG_RD, &stats->prc64,
5412			"64 byte frames received ");
5413	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5414			CTLFLAG_RD, &stats->prc127,
5415			"65-127 byte frames received");
5416	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5417			CTLFLAG_RD, &stats->prc255,
5418			"128-255 byte frames received");
5419	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5420			CTLFLAG_RD, &stats->prc511,
5421			"256-511 byte frames received");
5422	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5423			CTLFLAG_RD, &stats->prc1023,
5424			"512-1023 byte frames received");
5425	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5426			CTLFLAG_RD, &stats->prc1522,
5427			"1023-1522 byte frames received");
5428 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5429 			CTLFLAG_RD, &stats->gorc,
5430 			"Good Octets Received");
5431
5432	/* Packet Transmission Stats */
5433 	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5434 			CTLFLAG_RD, &stats->gotc,
5435 			"Good Octets Transmitted");
5436	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5437			CTLFLAG_RD, &stats->tpt,
5438			"Total Packets Transmitted");
5439	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5440			CTLFLAG_RD, &stats->gptc,
5441			"Good Packets Transmitted");
5442	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5443			CTLFLAG_RD, &stats->bptc,
5444			"Broadcast Packets Transmitted");
5445	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5446			CTLFLAG_RD, &stats->mptc,
5447			"Multicast Packets Transmitted");
5448	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5449			CTLFLAG_RD, &stats->ptc64,
5450			"64 byte frames transmitted ");
5451	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5452			CTLFLAG_RD, &stats->ptc127,
5453			"65-127 byte frames transmitted");
5454	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5455			CTLFLAG_RD, &stats->ptc255,
5456			"128-255 byte frames transmitted");
5457	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5458			CTLFLAG_RD, &stats->ptc511,
5459			"256-511 byte frames transmitted");
5460	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5461			CTLFLAG_RD, &stats->ptc1023,
5462			"512-1023 byte frames transmitted");
5463	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5464			CTLFLAG_RD, &stats->ptc1522,
5465			"1024-1522 byte frames transmitted");
5466	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
5467			CTLFLAG_RD, &stats->tsctc,
5468			"TSO Contexts Transmitted");
5469	SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
5470			CTLFLAG_RD, &stats->tsctfc,
5471			"TSO Contexts Failed");
5472
5473
5474	/* Interrupt Stats */
5475
5476	int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
5477				    CTLFLAG_RD, NULL, "Interrupt Statistics");
5478	int_list = SYSCTL_CHILDREN(int_node);
5479
5480	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
5481			CTLFLAG_RD, &stats->iac,
5482			"Interrupt Assertion Count");
5483
5484	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
5485			CTLFLAG_RD, &stats->icrxptc,
5486			"Interrupt Cause Rx Pkt Timer Expire Count");
5487
5488	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
5489			CTLFLAG_RD, &stats->icrxatc,
5490			"Interrupt Cause Rx Abs Timer Expire Count");
5491
5492	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
5493			CTLFLAG_RD, &stats->ictxptc,
5494			"Interrupt Cause Tx Pkt Timer Expire Count");
5495
5496	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
5497			CTLFLAG_RD, &stats->ictxatc,
5498			"Interrupt Cause Tx Abs Timer Expire Count");
5499
5500	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
5501			CTLFLAG_RD, &stats->ictxqec,
5502			"Interrupt Cause Tx Queue Empty Count");
5503
5504	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
5505			CTLFLAG_RD, &stats->ictxqmtc,
5506			"Interrupt Cause Tx Queue Min Thresh Count");
5507
5508	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
5509			CTLFLAG_RD, &stats->icrxdmtc,
5510			"Interrupt Cause Rx Desc Min Thresh Count");
5511
5512	SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
5513			CTLFLAG_RD, &stats->icrxoc,
5514			"Interrupt Cause Receiver Overrun Count");
5515
5516	/* Host to Card Stats */
5517
5518	host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
5519				    CTLFLAG_RD, NULL,
5520				    "Host to Card Statistics");
5521
5522	host_list = SYSCTL_CHILDREN(host_node);
5523
5524	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
5525			CTLFLAG_RD, &stats->cbtmpc,
5526			"Circuit Breaker Tx Packet Count");
5527
5528	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
5529			CTLFLAG_RD, &stats->htdpmc,
5530			"Host Transmit Discarded Packets");
5531
5532	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
5533			CTLFLAG_RD, &stats->rpthc,
5534			"Rx Packets To Host");
5535
5536	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
5537			CTLFLAG_RD, &stats->cbrmpc,
5538			"Circuit Breaker Rx Packet Count");
5539
5540	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
5541			CTLFLAG_RD, &stats->cbrdpc,
5542			"Circuit Breaker Rx Dropped Count");
5543
5544	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
5545			CTLFLAG_RD, &stats->hgptc,
5546			"Host Good Packets Tx Count");
5547
5548	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
5549			CTLFLAG_RD, &stats->htcbdpc,
5550			"Host Tx Circuit Breaker Dropped Count");
5551
5552	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
5553			CTLFLAG_RD, &stats->hgorc,
5554			"Host Good Octets Received Count");
5555
5556	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
5557			CTLFLAG_RD, &stats->hgotc,
5558			"Host Good Octets Transmit Count");
5559
5560	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
5561			CTLFLAG_RD, &stats->lenerrs,
5562			"Length Errors");
5563
5564	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
5565			CTLFLAG_RD, &stats->scvpc,
5566			"SerDes/SGMII Code Violation Pkt Count");
5567
5568	SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
5569			CTLFLAG_RD, &stats->hrmpc,
5570			"Header Redirection Missed Packet Count");
5571}
5572
5573
5574/**********************************************************************
5575 *
5576 *  This routine provides a way to dump out the adapter eeprom,
5577 *  often a useful debug/service tool. This only dumps the first
5578 *  32 words, stuff that matters is in that extent.
5579 *
5580 **********************************************************************/
5581static int
5582igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
5583{
5584	struct adapter *adapter;
5585	int error;
5586	int result;
5587
5588	result = -1;
5589	error = sysctl_handle_int(oidp, &result, 0, req);
5590
5591	if (error || !req->newptr)
5592		return (error);
5593
5594	/*
5595	 * This value will cause a hex dump of the
5596	 * first 32 16-bit words of the EEPROM to
5597	 * the screen.
5598	 */
5599	if (result == 1) {
5600		adapter = (struct adapter *)arg1;
5601		igb_print_nvm_info(adapter);
5602        }
5603
5604	return (error);
5605}
5606
5607static void
5608igb_print_nvm_info(struct adapter *adapter)
5609{
5610	u16	eeprom_data;
5611	int	i, j, row = 0;
5612
5613	/* Its a bit crude, but it gets the job done */
5614	printf("\nInterface EEPROM Dump:\n");
5615	printf("Offset\n0x0000  ");
5616	for (i = 0, j = 0; i < 32; i++, j++) {
5617		if (j == 8) { /* Make the offset block */
5618			j = 0; ++row;
5619			printf("\n0x00%x0  ",row);
5620		}
5621		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
5622		printf("%04x ", eeprom_data);
5623	}
5624	printf("\n");
5625}
5626
5627static void
5628igb_set_sysctl_value(struct adapter *adapter, const char *name,
5629	const char *description, int *limit, int value)
5630{
5631	*limit = value;
5632	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
5633	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
5634	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, limit, value, description);
5635}
5636
5637/*
5638** Set flow control using sysctl:
5639** Flow control values:
5640** 	0 - off
5641**	1 - rx pause
5642**	2 - tx pause
5643**	3 - full
5644*/
5645static int
5646igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
5647{
5648	int		error;
5649	static int	input = 3; /* default is full */
5650	struct adapter	*adapter = (struct adapter *) arg1;
5651
5652	error = sysctl_handle_int(oidp, &input, 0, req);
5653
5654	if ((error) || (req->newptr == NULL))
5655		return (error);
5656
5657	switch (input) {
5658		case e1000_fc_rx_pause:
5659		case e1000_fc_tx_pause:
5660		case e1000_fc_full:
5661		case e1000_fc_none:
5662			adapter->hw.fc.requested_mode = input;
5663			adapter->fc = input;
5664			break;
5665		default:
5666			/* Do nothing */
5667			return (error);
5668	}
5669
5670	adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
5671	e1000_force_mac_fc(&adapter->hw);
5672	return (error);
5673}
5674
5675/*
5676** Manage DMA Coalesce:
5677** Control values:
5678** 	0/1 - off/on
5679**	Legal timer values are:
5680**	250,500,1000-10000 in thousands
5681*/
5682static int
5683igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
5684{
5685	struct adapter *adapter = (struct adapter *) arg1;
5686	int		error;
5687
5688	error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
5689
5690	if ((error) || (req->newptr == NULL))
5691		return (error);
5692
5693	switch (adapter->dmac) {
5694		case 0:
5695			/*Disabling */
5696			break;
5697		case 1: /* Just enable and use default */
5698			adapter->dmac = 1000;
5699			break;
5700		case 250:
5701		case 500:
5702		case 1000:
5703		case 2000:
5704		case 3000:
5705		case 4000:
5706		case 5000:
5707		case 6000:
5708		case 7000:
5709		case 8000:
5710		case 9000:
5711		case 10000:
5712			/* Legal values - allow */
5713			break;
5714		default:
5715			/* Do nothing, illegal value */
5716			adapter->dmac = 0;
5717			return (error);
5718	}
5719	/* Reinit the interface */
5720	igb_init(adapter);
5721	return (error);
5722}
5723