1/******************************************************************************
2
3  Copyright (c) 2001-2013, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD$*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38#include "ixgbe.h"
39
40/*********************************************************************
41 *  Set this to one to display debug statistics
42 *********************************************************************/
43int             ixgbe_display_debug_stats = 0;
44
45/*********************************************************************
46 *  Driver version
47 *********************************************************************/
48char ixgbe_driver_version[] = "2.5.15";
49
50/*********************************************************************
51 *  PCI Device ID Table
52 *
53 *  Used by probe to select devices to load on
54 *  Last field stores an index into ixgbe_strings
55 *  Last entry must be all 0s
56 *
57 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
58 *********************************************************************/
59
60static ixgbe_vendor_info_t ixgbe_vendor_info_array[] =
61{
62	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0},
63	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0},
64	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0},
65	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0},
66	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0},
67	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0},
68	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0},
69	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0},
70	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0},
71	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0},
72	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0},
73	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0},
74	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0},
75	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0},
76	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0},
77	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0},
78	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0},
79	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0},
80	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0},
81	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0},
82	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0},
83	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0},
84	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0},
85	{IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0},
86	/* required last entry */
87	{0, 0, 0, 0, 0}
88};
89
90/*********************************************************************
91 *  Table of branding strings
92 *********************************************************************/
93
94static char    *ixgbe_strings[] = {
95	"Intel(R) PRO/10GbE PCI-Express Network Driver"
96};
97
98/*********************************************************************
99 *  Function prototypes
100 *********************************************************************/
101static int      ixgbe_probe(device_t);
102static int      ixgbe_attach(device_t);
103static int      ixgbe_detach(device_t);
104static int      ixgbe_shutdown(device_t);
105#ifdef IXGBE_LEGACY_TX
106static void     ixgbe_start(struct ifnet *);
107static void     ixgbe_start_locked(struct tx_ring *, struct ifnet *);
108#else /* ! IXGBE_LEGACY_TX */
109static int	ixgbe_mq_start(struct ifnet *, struct mbuf *);
110static int	ixgbe_mq_start_locked(struct ifnet *, struct tx_ring *);
111static void	ixgbe_qflush(struct ifnet *);
112static void	ixgbe_deferred_mq_start(void *, int);
113#endif /* IXGBE_LEGACY_TX */
114static int      ixgbe_ioctl(struct ifnet *, u_long, caddr_t);
115static void	ixgbe_init(void *);
116static void	ixgbe_init_locked(struct adapter *);
117static void     ixgbe_stop(void *);
118static void     ixgbe_media_status(struct ifnet *, struct ifmediareq *);
119static int      ixgbe_media_change(struct ifnet *);
120static void     ixgbe_identify_hardware(struct adapter *);
121static int      ixgbe_allocate_pci_resources(struct adapter *);
122static void	ixgbe_get_slot_info(struct ixgbe_hw *);
123static int      ixgbe_allocate_msix(struct adapter *);
124static int      ixgbe_allocate_legacy(struct adapter *);
125static int	ixgbe_allocate_queues(struct adapter *);
126static int	ixgbe_setup_msix(struct adapter *);
127static void	ixgbe_free_pci_resources(struct adapter *);
128static void	ixgbe_local_timer(void *);
129static int	ixgbe_setup_interface(device_t, struct adapter *);
130static void	ixgbe_config_link(struct adapter *);
131
132static int      ixgbe_allocate_transmit_buffers(struct tx_ring *);
133static int	ixgbe_setup_transmit_structures(struct adapter *);
134static void	ixgbe_setup_transmit_ring(struct tx_ring *);
135static void     ixgbe_initialize_transmit_units(struct adapter *);
136static void     ixgbe_free_transmit_structures(struct adapter *);
137static void     ixgbe_free_transmit_buffers(struct tx_ring *);
138
139static int      ixgbe_allocate_receive_buffers(struct rx_ring *);
140static int      ixgbe_setup_receive_structures(struct adapter *);
141static int	ixgbe_setup_receive_ring(struct rx_ring *);
142static void     ixgbe_initialize_receive_units(struct adapter *);
143static void     ixgbe_free_receive_structures(struct adapter *);
144static void     ixgbe_free_receive_buffers(struct rx_ring *);
145static void	ixgbe_setup_hw_rsc(struct rx_ring *);
146
147static void     ixgbe_enable_intr(struct adapter *);
148static void     ixgbe_disable_intr(struct adapter *);
149static void     ixgbe_update_stats_counters(struct adapter *);
150static void	ixgbe_txeof(struct tx_ring *);
151static bool	ixgbe_rxeof(struct ix_queue *);
152static void	ixgbe_rx_checksum(u32, struct mbuf *, u32);
153static void     ixgbe_set_promisc(struct adapter *);
154static void     ixgbe_set_multi(struct adapter *);
155static void     ixgbe_update_link_status(struct adapter *);
156static void	ixgbe_refresh_mbufs(struct rx_ring *, int);
157static int      ixgbe_xmit(struct tx_ring *, struct mbuf **);
158static int	ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS);
159static int	ixgbe_set_advertise(SYSCTL_HANDLER_ARGS);
160static int	ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS);
161static int	ixgbe_dma_malloc(struct adapter *, bus_size_t,
162		    struct ixgbe_dma_alloc *, int);
163static void     ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
164static int	ixgbe_tx_ctx_setup(struct tx_ring *,
165		    struct mbuf *, u32 *, u32 *);
166static int	ixgbe_tso_setup(struct tx_ring *,
167		    struct mbuf *, u32 *, u32 *);
168static void	ixgbe_set_ivar(struct adapter *, u8, u8, s8);
169static void	ixgbe_configure_ivars(struct adapter *);
170static u8 *	ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *);
171
172static void	ixgbe_setup_vlan_hw_support(struct adapter *);
173static void	ixgbe_register_vlan(void *, struct ifnet *, u16);
174static void	ixgbe_unregister_vlan(void *, struct ifnet *, u16);
175
176static void     ixgbe_add_hw_stats(struct adapter *adapter);
177
178static __inline void ixgbe_rx_discard(struct rx_ring *, int);
179static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
180		    struct mbuf *, u32);
181
182static void	ixgbe_enable_rx_drop(struct adapter *);
183static void	ixgbe_disable_rx_drop(struct adapter *);
184
185/* Support for pluggable optic modules */
186static bool	ixgbe_sfp_probe(struct adapter *);
187static void	ixgbe_setup_optics(struct adapter *);
188
189/* Legacy (single vector interrupt handler */
190static void	ixgbe_legacy_irq(void *);
191
192/* The MSI/X Interrupt handlers */
193static void	ixgbe_msix_que(void *);
194static void	ixgbe_msix_link(void *);
195
196/* Deferred interrupt tasklets */
197static void	ixgbe_handle_que(void *, int);
198static void	ixgbe_handle_link(void *, int);
199static void	ixgbe_handle_msf(void *, int);
200static void	ixgbe_handle_mod(void *, int);
201
202#ifdef IXGBE_FDIR
203static void	ixgbe_atr(struct tx_ring *, struct mbuf *);
204static void	ixgbe_reinit_fdir(void *, int);
205#endif
206
207/* Missing shared code prototype */
208extern void ixgbe_stop_mac_link_on_d3_82599(struct ixgbe_hw *hw);
209
210/*********************************************************************
211 *  FreeBSD Device Interface Entry Points
212 *********************************************************************/
213
214static device_method_t ixgbe_methods[] = {
215	/* Device interface */
216	DEVMETHOD(device_probe, ixgbe_probe),
217	DEVMETHOD(device_attach, ixgbe_attach),
218	DEVMETHOD(device_detach, ixgbe_detach),
219	DEVMETHOD(device_shutdown, ixgbe_shutdown),
220	DEVMETHOD_END
221};
222
223static driver_t ixgbe_driver = {
224	"ix", ixgbe_methods, sizeof(struct adapter),
225};
226
227devclass_t ixgbe_devclass;
228DRIVER_MODULE(ixgbe, pci, ixgbe_driver, ixgbe_devclass, 0, 0);
229
230MODULE_DEPEND(ixgbe, pci, 1, 1, 1);
231MODULE_DEPEND(ixgbe, ether, 1, 1, 1);
232
233/*
234** TUNEABLE PARAMETERS:
235*/
236
237static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0,
238		   "IXGBE driver parameters");
239
240/*
241** AIM: Adaptive Interrupt Moderation
242** which means that the interrupt rate
243** is varied over time based on the
244** traffic for that interrupt vector
245*/
246static int ixgbe_enable_aim = TRUE;
247TUNABLE_INT("hw.ixgbe.enable_aim", &ixgbe_enable_aim);
248SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RW, &ixgbe_enable_aim, 0,
249    "Enable adaptive interrupt moderation");
250
251static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY);
252TUNABLE_INT("hw.ixgbe.max_interrupt_rate", &ixgbe_max_interrupt_rate);
253SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
254    &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second");
255
256/* How many packets rxeof tries to clean at a time */
257static int ixgbe_rx_process_limit = 256;
258TUNABLE_INT("hw.ixgbe.rx_process_limit", &ixgbe_rx_process_limit);
259SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
260    &ixgbe_rx_process_limit, 0,
261    "Maximum number of received packets to process at a time,"
262    "-1 means unlimited");
263
264/* How many packets txeof tries to clean at a time */
265static int ixgbe_tx_process_limit = 256;
266TUNABLE_INT("hw.ixgbe.tx_process_limit", &ixgbe_tx_process_limit);
267SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
268    &ixgbe_tx_process_limit, 0,
269    "Maximum number of sent packets to process at a time,"
270    "-1 means unlimited");
271
272/*
273** Smart speed setting, default to on
274** this only works as a compile option
275** right now as its during attach, set
276** this to 'ixgbe_smart_speed_off' to
277** disable.
278*/
279static int ixgbe_smart_speed = ixgbe_smart_speed_on;
280
281/*
282 * MSIX should be the default for best performance,
283 * but this allows it to be forced off for testing.
284 */
285static int ixgbe_enable_msix = 1;
286TUNABLE_INT("hw.ixgbe.enable_msix", &ixgbe_enable_msix);
287SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0,
288    "Enable MSI-X interrupts");
289
290/*
291 * Number of Queues, can be set to 0,
292 * it then autoconfigures based on the
293 * number of cpus with a max of 8. This
294 * can be overriden manually here.
295 */
296static int ixgbe_num_queues = 0;
297TUNABLE_INT("hw.ixgbe.num_queues", &ixgbe_num_queues);
298SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0,
299    "Number of queues to configure, 0 indicates autoconfigure");
300
301/*
302** Number of TX descriptors per ring,
303** setting higher than RX as this seems
304** the better performing choice.
305*/
306static int ixgbe_txd = PERFORM_TXD;
307TUNABLE_INT("hw.ixgbe.txd", &ixgbe_txd);
308SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0,
309    "Number of receive descriptors per queue");
310
311/* Number of RX descriptors per ring */
312static int ixgbe_rxd = PERFORM_RXD;
313TUNABLE_INT("hw.ixgbe.rxd", &ixgbe_rxd);
314SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0,
315    "Number of receive descriptors per queue");
316
317/*
318** Defining this on will allow the use
319** of unsupported SFP+ modules, note that
320** doing so you are on your own :)
321*/
322static int allow_unsupported_sfp = FALSE;
323TUNABLE_INT("hw.ixgbe.unsupported_sfp", &allow_unsupported_sfp);
324
325/*
326** HW RSC control:
327**  this feature only works with
328**  IPv4, and only on 82599 and later.
329**  Also this will cause IP forwarding to
330**  fail and that can't be controlled by
331**  the stack as LRO can. For all these
332**  reasons I've deemed it best to leave
333**  this off and not bother with a tuneable
334**  interface, this would need to be compiled
335**  to enable.
336*/
337static bool ixgbe_rsc_enable = FALSE;
338
339/* Keep running tab on them for sanity check */
340static int ixgbe_total_ports;
341
342#ifdef IXGBE_FDIR
343/*
344** For Flow Director: this is the
345** number of TX packets we sample
346** for the filter pool, this means
347** every 20th packet will be probed.
348**
349** This feature can be disabled by
350** setting this to 0.
351*/
352static int atr_sample_rate = 20;
353/*
354** Flow Director actually 'steals'
355** part of the packet buffer as its
356** filter pool, this variable controls
357** how much it uses:
358**  0 = 64K, 1 = 128K, 2 = 256K
359*/
360static int fdir_pballoc = 1;
361#endif
362
363#ifdef DEV_NETMAP
364/*
365 * The #ifdef DEV_NETMAP / #endif blocks in this file are meant to
366 * be a reference on how to implement netmap support in a driver.
367 * Additional comments are in ixgbe_netmap.h .
368 *
369 * <dev/netmap/ixgbe_netmap.h> contains functions for netmap support
370 * that extend the standard driver.
371 */
372#include <dev/netmap/ixgbe_netmap.h>
373#endif /* DEV_NETMAP */
374
375/*********************************************************************
376 *  Device identification routine
377 *
378 *  ixgbe_probe determines if the driver should be loaded on
379 *  adapter based on PCI vendor/device id of the adapter.
380 *
381 *  return BUS_PROBE_DEFAULT on success, positive on failure
382 *********************************************************************/
383
384static int
385ixgbe_probe(device_t dev)
386{
387	ixgbe_vendor_info_t *ent;
388
389	u16	pci_vendor_id = 0;
390	u16	pci_device_id = 0;
391	u16	pci_subvendor_id = 0;
392	u16	pci_subdevice_id = 0;
393	char	adapter_name[256];
394
395	INIT_DEBUGOUT("ixgbe_probe: begin");
396
397	pci_vendor_id = pci_get_vendor(dev);
398	if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID)
399		return (ENXIO);
400
401	pci_device_id = pci_get_device(dev);
402	pci_subvendor_id = pci_get_subvendor(dev);
403	pci_subdevice_id = pci_get_subdevice(dev);
404
405	ent = ixgbe_vendor_info_array;
406	while (ent->vendor_id != 0) {
407		if ((pci_vendor_id == ent->vendor_id) &&
408		    (pci_device_id == ent->device_id) &&
409
410		    ((pci_subvendor_id == ent->subvendor_id) ||
411		     (ent->subvendor_id == 0)) &&
412
413		    ((pci_subdevice_id == ent->subdevice_id) ||
414		     (ent->subdevice_id == 0))) {
415			sprintf(adapter_name, "%s, Version - %s",
416				ixgbe_strings[ent->index],
417				ixgbe_driver_version);
418			device_set_desc_copy(dev, adapter_name);
419			++ixgbe_total_ports;
420			return (BUS_PROBE_DEFAULT);
421		}
422		ent++;
423	}
424	return (ENXIO);
425}
426
427/*********************************************************************
428 *  Device initialization routine
429 *
430 *  The attach entry point is called when the driver is being loaded.
431 *  This routine identifies the type of hardware, allocates all resources
432 *  and initializes the hardware.
433 *
434 *  return 0 on success, positive on failure
435 *********************************************************************/
436
437static int
438ixgbe_attach(device_t dev)
439{
440	struct adapter *adapter;
441	struct ixgbe_hw *hw;
442	int             error = 0;
443	u16		csum;
444	u32		ctrl_ext;
445
446	INIT_DEBUGOUT("ixgbe_attach: begin");
447
448	/* Allocate, clear, and link in our adapter structure */
449	adapter = device_get_softc(dev);
450	adapter->dev = adapter->osdep.dev = dev;
451	hw = &adapter->hw;
452
453	/* Core Lock Init*/
454	IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
455
456	/* SYSCTL APIs */
457
458	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
459			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
460			OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW,
461			adapter, 0, ixgbe_set_flowcntl, "I", "Flow Control");
462
463        SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
464			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
465			OID_AUTO, "enable_aim", CTLTYPE_INT|CTLFLAG_RW,
466			&ixgbe_enable_aim, 1, "Interrupt Moderation");
467
468	/*
469	** Allow a kind of speed control by forcing the autoneg
470	** advertised speed list to only a certain value, this
471	** supports 1G on 82599 devices, and 100Mb on x540.
472	*/
473	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
474			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
475			OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW,
476			adapter, 0, ixgbe_set_advertise, "I", "Link Speed");
477
478	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
479			SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
480			OID_AUTO, "ts", CTLTYPE_INT | CTLFLAG_RW, adapter,
481			0, ixgbe_set_thermal_test, "I", "Thermal Test");
482
483	/* Set up the timer callout */
484	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
485
486	/* Determine hardware revision */
487	ixgbe_identify_hardware(adapter);
488
489	/* Do base PCI setup - map BAR0 */
490	if (ixgbe_allocate_pci_resources(adapter)) {
491		device_printf(dev, "Allocation of PCI resources failed\n");
492		error = ENXIO;
493		goto err_out;
494	}
495
496	/* Do descriptor calc and sanity checks */
497	if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 ||
498	    ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) {
499		device_printf(dev, "TXD config issue, using default!\n");
500		adapter->num_tx_desc = DEFAULT_TXD;
501	} else
502		adapter->num_tx_desc = ixgbe_txd;
503
504	/*
505	** With many RX rings it is easy to exceed the
506	** system mbuf allocation. Tuning nmbclusters
507	** can alleviate this.
508	*/
509	if (nmbclusters > 0 ) {
510		int s;
511		s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports;
512		if (s > nmbclusters) {
513			device_printf(dev, "RX Descriptors exceed "
514			    "system mbuf max, using default instead!\n");
515			ixgbe_rxd = DEFAULT_RXD;
516		}
517	}
518
519	if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 ||
520	    ixgbe_rxd < MIN_TXD || ixgbe_rxd > MAX_TXD) {
521		device_printf(dev, "RXD config issue, using default!\n");
522		adapter->num_rx_desc = DEFAULT_RXD;
523	} else
524		adapter->num_rx_desc = ixgbe_rxd;
525
526	/* Allocate our TX/RX Queues */
527	if (ixgbe_allocate_queues(adapter)) {
528		error = ENOMEM;
529		goto err_out;
530	}
531
532	/* Allocate multicast array memory. */
533	adapter->mta = malloc(sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
534	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535	if (adapter->mta == NULL) {
536		device_printf(dev, "Can not allocate multicast setup array\n");
537		error = ENOMEM;
538		goto err_late;
539	}
540
541	/* Initialize the shared code */
542	hw->allow_unsupported_sfp = allow_unsupported_sfp;
543	error = ixgbe_init_shared_code(hw);
544	if (error == IXGBE_ERR_SFP_NOT_PRESENT) {
545		/*
546		** No optics in this port, set up
547		** so the timer routine will probe
548		** for later insertion.
549		*/
550		adapter->sfp_probe = TRUE;
551		error = 0;
552	} else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) {
553		device_printf(dev,"Unsupported SFP+ module detected!\n");
554		error = EIO;
555		goto err_late;
556	} else if (error) {
557		device_printf(dev,"Unable to initialize the shared code\n");
558		error = EIO;
559		goto err_late;
560	}
561
562	/* Make sure we have a good EEPROM before we read from it */
563	if (ixgbe_validate_eeprom_checksum(&adapter->hw, &csum) < 0) {
564		device_printf(dev,"The EEPROM Checksum Is Not Valid\n");
565		error = EIO;
566		goto err_late;
567	}
568
569	error = ixgbe_init_hw(hw);
570	switch (error) {
571	case IXGBE_ERR_EEPROM_VERSION:
572		device_printf(dev, "This device is a pre-production adapter/"
573		    "LOM.  Please be aware there may be issues associated "
574		    "with your hardware.\n If you are experiencing problems "
575		    "please contact your Intel or hardware representative "
576		    "who provided you with this hardware.\n");
577		break;
578	case IXGBE_ERR_SFP_NOT_SUPPORTED:
579		device_printf(dev,"Unsupported SFP+ Module\n");
580		error = EIO;
581		goto err_late;
582	case IXGBE_ERR_SFP_NOT_PRESENT:
583		device_printf(dev,"No SFP+ Module found\n");
584		/* falls thru */
585	default:
586		break;
587	}
588
589	/* Detect and set physical type */
590	ixgbe_setup_optics(adapter);
591
592	if ((adapter->msix > 1) && (ixgbe_enable_msix))
593		error = ixgbe_allocate_msix(adapter);
594	else
595		error = ixgbe_allocate_legacy(adapter);
596	if (error)
597		goto err_late;
598
599	/* Setup OS specific network interface */
600	if (ixgbe_setup_interface(dev, adapter) != 0)
601		goto err_late;
602
603	/* Initialize statistics */
604	ixgbe_update_stats_counters(adapter);
605
606	/* Register for VLAN events */
607	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
608	    ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
609	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
610	    ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
611
612        /*
613	** Check PCIE slot type/speed/width
614	*/
615	ixgbe_get_slot_info(hw);
616
617	/* Set an initial default flow control value */
618	adapter->fc =  ixgbe_fc_full;
619
620	/* let hardware know driver is loaded */
621	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
622	ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD;
623	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
624
625	ixgbe_add_hw_stats(adapter);
626
627#ifdef DEV_NETMAP
628	ixgbe_netmap_attach(adapter);
629#endif /* DEV_NETMAP */
630	INIT_DEBUGOUT("ixgbe_attach: end");
631	return (0);
632err_late:
633	ixgbe_free_transmit_structures(adapter);
634	ixgbe_free_receive_structures(adapter);
635err_out:
636	if (adapter->ifp != NULL)
637		if_free(adapter->ifp);
638	ixgbe_free_pci_resources(adapter);
639	free(adapter->mta, M_DEVBUF);
640	return (error);
641
642}
643
644/*********************************************************************
645 *  Device removal routine
646 *
647 *  The detach entry point is called when the driver is being removed.
648 *  This routine stops the adapter and deallocates all the resources
649 *  that were allocated for driver operation.
650 *
651 *  return 0 on success, positive on failure
652 *********************************************************************/
653
654static int
655ixgbe_detach(device_t dev)
656{
657	struct adapter *adapter = device_get_softc(dev);
658	struct ix_queue *que = adapter->queues;
659	struct tx_ring *txr = adapter->tx_rings;
660	u32	ctrl_ext;
661
662	INIT_DEBUGOUT("ixgbe_detach: begin");
663
664	/* Make sure VLANS are not using driver */
665	if (adapter->ifp->if_vlantrunk != NULL) {
666		device_printf(dev,"Vlan in use, detach first\n");
667		return (EBUSY);
668	}
669
670	IXGBE_CORE_LOCK(adapter);
671	ixgbe_stop(adapter);
672	IXGBE_CORE_UNLOCK(adapter);
673
674	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
675		if (que->tq) {
676#ifndef IXGBE_LEGACY_TX
677			taskqueue_drain(que->tq, &txr->txq_task);
678#endif
679			taskqueue_drain(que->tq, &que->que_task);
680			taskqueue_free(que->tq);
681		}
682	}
683
684	/* Drain the Link queue */
685	if (adapter->tq) {
686		taskqueue_drain(adapter->tq, &adapter->link_task);
687		taskqueue_drain(adapter->tq, &adapter->mod_task);
688		taskqueue_drain(adapter->tq, &adapter->msf_task);
689#ifdef IXGBE_FDIR
690		taskqueue_drain(adapter->tq, &adapter->fdir_task);
691#endif
692		taskqueue_free(adapter->tq);
693	}
694
695	/* let hardware know driver is unloading */
696	ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
697	ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD;
698	IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext);
699
700	/* Unregister VLAN events */
701	if (adapter->vlan_attach != NULL)
702		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
703	if (adapter->vlan_detach != NULL)
704		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
705
706	ether_ifdetach(adapter->ifp);
707	callout_drain(&adapter->timer);
708#ifdef DEV_NETMAP
709	netmap_detach(adapter->ifp);
710#endif /* DEV_NETMAP */
711	ixgbe_free_pci_resources(adapter);
712	bus_generic_detach(dev);
713	if_free(adapter->ifp);
714
715	ixgbe_free_transmit_structures(adapter);
716	ixgbe_free_receive_structures(adapter);
717	free(adapter->mta, M_DEVBUF);
718
719	IXGBE_CORE_LOCK_DESTROY(adapter);
720	return (0);
721}
722
723/*********************************************************************
724 *
725 *  Shutdown entry point
726 *
727 **********************************************************************/
728
729static int
730ixgbe_shutdown(device_t dev)
731{
732	struct adapter *adapter = device_get_softc(dev);
733	IXGBE_CORE_LOCK(adapter);
734	ixgbe_stop(adapter);
735	IXGBE_CORE_UNLOCK(adapter);
736	return (0);
737}
738
739
740#ifdef IXGBE_LEGACY_TX
741/*********************************************************************
742 *  Transmit entry point
743 *
744 *  ixgbe_start is called by the stack to initiate a transmit.
745 *  The driver will remain in this routine as long as there are
746 *  packets to transmit and transmit resources are available.
747 *  In case resources are not available stack is notified and
748 *  the packet is requeued.
749 **********************************************************************/
750
751static void
752ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
753{
754	struct mbuf    *m_head;
755	struct adapter *adapter = txr->adapter;
756
757	IXGBE_TX_LOCK_ASSERT(txr);
758
759	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
760		return;
761	if (!adapter->link_active)
762		return;
763
764	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
765		if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
766			break;
767
768		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
769		if (m_head == NULL)
770			break;
771
772		if (ixgbe_xmit(txr, &m_head)) {
773			if (m_head != NULL)
774				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
775			break;
776		}
777		/* Send a copy of the frame to the BPF listener */
778		ETHER_BPF_MTAP(ifp, m_head);
779
780		/* Set watchdog on */
781		txr->watchdog_time = ticks;
782		txr->queue_status = IXGBE_QUEUE_WORKING;
783
784	}
785	return;
786}
787
788/*
789 * Legacy TX start - called by the stack, this
790 * always uses the first tx ring, and should
791 * not be used with multiqueue tx enabled.
792 */
793static void
794ixgbe_start(struct ifnet *ifp)
795{
796	struct adapter *adapter = ifp->if_softc;
797	struct tx_ring	*txr = adapter->tx_rings;
798
799	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
800		IXGBE_TX_LOCK(txr);
801		ixgbe_start_locked(txr, ifp);
802		IXGBE_TX_UNLOCK(txr);
803	}
804	return;
805}
806
807#else /* ! IXGBE_LEGACY_TX */
808
809/*
810** Multiqueue Transmit driver
811**
812*/
813static int
814ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
815{
816	struct adapter	*adapter = ifp->if_softc;
817	struct ix_queue	*que;
818	struct tx_ring	*txr;
819	int 		i, err = 0;
820
821	/* Which queue to use */
822	if ((m->m_flags & M_FLOWID) != 0)
823		i = m->m_pkthdr.flowid % adapter->num_queues;
824	else
825		i = curcpu % adapter->num_queues;
826
827	txr = &adapter->tx_rings[i];
828	que = &adapter->queues[i];
829
830	err = drbr_enqueue(ifp, txr->br, m);
831	if (err)
832		return (err);
833	if (IXGBE_TX_TRYLOCK(txr)) {
834		err = ixgbe_mq_start_locked(ifp, txr);
835		IXGBE_TX_UNLOCK(txr);
836	} else
837		taskqueue_enqueue(que->tq, &txr->txq_task);
838
839	return (err);
840}
841
842static int
843ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
844{
845	struct adapter  *adapter = txr->adapter;
846        struct mbuf     *next;
847        int             enqueued = 0, err = 0;
848
849	if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
850	    adapter->link_active == 0)
851		return (ENETDOWN);
852
853	/* Process the queue */
854#if __FreeBSD_version < 901504
855	next = drbr_dequeue(ifp, txr->br);
856	while (next != NULL) {
857		if ((err = ixgbe_xmit(txr, &next)) != 0) {
858			if (next != NULL)
859				err = drbr_enqueue(ifp, txr->br, next);
860#else
861	while ((next = drbr_peek(ifp, txr->br)) != NULL) {
862		if ((err = ixgbe_xmit(txr, &next)) != 0) {
863			if (next == NULL) {
864				drbr_advance(ifp, txr->br);
865			} else {
866				drbr_putback(ifp, txr->br, next);
867			}
868#endif
869			break;
870		}
871#if __FreeBSD_version >= 901504
872		drbr_advance(ifp, txr->br);
873#endif
874		enqueued++;
875		/* Send a copy of the frame to the BPF listener */
876		ETHER_BPF_MTAP(ifp, next);
877		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
878			break;
879#if __FreeBSD_version < 901504
880		next = drbr_dequeue(ifp, txr->br);
881#endif
882	}
883
884	if (enqueued > 0) {
885		/* Set watchdog on */
886		txr->queue_status = IXGBE_QUEUE_WORKING;
887		txr->watchdog_time = ticks;
888	}
889
890	if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
891		ixgbe_txeof(txr);
892
893	return (err);
894}
895
896/*
897 * Called from a taskqueue to drain queued transmit packets.
898 */
899static void
900ixgbe_deferred_mq_start(void *arg, int pending)
901{
902	struct tx_ring *txr = arg;
903	struct adapter *adapter = txr->adapter;
904	struct ifnet *ifp = adapter->ifp;
905
906	IXGBE_TX_LOCK(txr);
907	if (!drbr_empty(ifp, txr->br))
908		ixgbe_mq_start_locked(ifp, txr);
909	IXGBE_TX_UNLOCK(txr);
910}
911
912/*
913** Flush all ring buffers
914*/
915static void
916ixgbe_qflush(struct ifnet *ifp)
917{
918	struct adapter	*adapter = ifp->if_softc;
919	struct tx_ring	*txr = adapter->tx_rings;
920	struct mbuf	*m;
921
922	for (int i = 0; i < adapter->num_queues; i++, txr++) {
923		IXGBE_TX_LOCK(txr);
924		while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
925			m_freem(m);
926		IXGBE_TX_UNLOCK(txr);
927	}
928	if_qflush(ifp);
929}
930#endif /* IXGBE_LEGACY_TX */
931
932/*********************************************************************
933 *  Ioctl entry point
934 *
935 *  ixgbe_ioctl is called when the user wants to configure the
936 *  interface.
937 *
938 *  return 0 on success, positive on failure
939 **********************************************************************/
940
941static int
942ixgbe_ioctl(struct ifnet * ifp, u_long command, caddr_t data)
943{
944	struct adapter	*adapter = ifp->if_softc;
945	struct ixgbe_hw *hw = &adapter->hw;
946	struct ifreq	*ifr = (struct ifreq *) data;
947#if defined(INET) || defined(INET6)
948	struct ifaddr *ifa = (struct ifaddr *)data;
949	bool		avoid_reset = FALSE;
950#endif
951	int             error = 0;
952
953	switch (command) {
954
955        case SIOCSIFADDR:
956#ifdef INET
957		if (ifa->ifa_addr->sa_family == AF_INET)
958			avoid_reset = TRUE;
959#endif
960#ifdef INET6
961		if (ifa->ifa_addr->sa_family == AF_INET6)
962			avoid_reset = TRUE;
963#endif
964#if defined(INET) || defined(INET6)
965		/*
966		** Calling init results in link renegotiation,
967		** so we avoid doing it when possible.
968		*/
969		if (avoid_reset) {
970			ifp->if_flags |= IFF_UP;
971			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
972				ixgbe_init(adapter);
973			if (!(ifp->if_flags & IFF_NOARP))
974				arp_ifinit(ifp, ifa);
975		} else
976			error = ether_ioctl(ifp, command, data);
977#endif
978		break;
979	case SIOCSIFMTU:
980		IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)");
981		if (ifr->ifr_mtu > IXGBE_MAX_FRAME_SIZE - ETHER_HDR_LEN) {
982			error = EINVAL;
983		} else {
984			IXGBE_CORE_LOCK(adapter);
985			ifp->if_mtu = ifr->ifr_mtu;
986			adapter->max_frame_size =
987				ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
988			ixgbe_init_locked(adapter);
989			IXGBE_CORE_UNLOCK(adapter);
990		}
991		break;
992	case SIOCSIFFLAGS:
993		IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)");
994		IXGBE_CORE_LOCK(adapter);
995		if (ifp->if_flags & IFF_UP) {
996			if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
997				if ((ifp->if_flags ^ adapter->if_flags) &
998				    (IFF_PROMISC | IFF_ALLMULTI)) {
999					ixgbe_set_promisc(adapter);
1000                                }
1001			} else
1002				ixgbe_init_locked(adapter);
1003		} else
1004			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1005				ixgbe_stop(adapter);
1006		adapter->if_flags = ifp->if_flags;
1007		IXGBE_CORE_UNLOCK(adapter);
1008		break;
1009	case SIOCADDMULTI:
1010	case SIOCDELMULTI:
1011		IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI");
1012		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1013			IXGBE_CORE_LOCK(adapter);
1014			ixgbe_disable_intr(adapter);
1015			ixgbe_set_multi(adapter);
1016			ixgbe_enable_intr(adapter);
1017			IXGBE_CORE_UNLOCK(adapter);
1018		}
1019		break;
1020	case SIOCSIFMEDIA:
1021	case SIOCGIFMEDIA:
1022		IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)");
1023		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1024		break;
1025	case SIOCSIFCAP:
1026	{
1027		int mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1028		IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)");
1029		if (mask & IFCAP_HWCSUM)
1030			ifp->if_capenable ^= IFCAP_HWCSUM;
1031		if (mask & IFCAP_TSO4)
1032			ifp->if_capenable ^= IFCAP_TSO4;
1033		if (mask & IFCAP_TSO6)
1034			ifp->if_capenable ^= IFCAP_TSO6;
1035		if (mask & IFCAP_LRO)
1036			ifp->if_capenable ^= IFCAP_LRO;
1037		if (mask & IFCAP_VLAN_HWTAGGING)
1038			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1039		if (mask & IFCAP_VLAN_HWFILTER)
1040			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1041		if (mask & IFCAP_VLAN_HWTSO)
1042			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1043		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1044			IXGBE_CORE_LOCK(adapter);
1045			ixgbe_init_locked(adapter);
1046			IXGBE_CORE_UNLOCK(adapter);
1047		}
1048		VLAN_CAPABILITIES(ifp);
1049		break;
1050	}
1051	case SIOCGI2C:
1052	{
1053		struct ixgbe_i2c_req	i2c;
1054		IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)");
1055		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
1056		if (error)
1057			break;
1058		if ((i2c.dev_addr != 0xA0) || (i2c.dev_addr != 0xA2)){
1059			error = EINVAL;
1060			break;
1061		}
1062		hw->phy.ops.read_i2c_byte(hw, i2c.offset,
1063		    i2c.dev_addr, i2c.data);
1064		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
1065		break;
1066	}
1067	default:
1068		IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command);
1069		error = ether_ioctl(ifp, command, data);
1070		break;
1071	}
1072
1073	return (error);
1074}
1075
1076/*********************************************************************
1077 *  Init entry point
1078 *
1079 *  This routine is used in two ways. It is used by the stack as
1080 *  init entry point in network interface structure. It is also used
1081 *  by the driver as a hw/sw initialization routine to get to a
1082 *  consistent state.
1083 *
1084 *  return 0 on success, positive on failure
1085 **********************************************************************/
1086#define IXGBE_MHADD_MFS_SHIFT 16
1087
1088static void
1089ixgbe_init_locked(struct adapter *adapter)
1090{
1091	struct ifnet   *ifp = adapter->ifp;
1092	device_t 	dev = adapter->dev;
1093	struct ixgbe_hw *hw = &adapter->hw;
1094	u32		k, txdctl, mhadd, gpie;
1095	u32		rxdctl, rxctrl;
1096
1097	mtx_assert(&adapter->core_mtx, MA_OWNED);
1098	INIT_DEBUGOUT("ixgbe_init_locked: begin");
1099	hw->adapter_stopped = FALSE;
1100	ixgbe_stop_adapter(hw);
1101        callout_stop(&adapter->timer);
1102
1103        /* reprogram the RAR[0] in case user changed it. */
1104        ixgbe_set_rar(hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
1105
1106	/* Get the latest mac address, User can use a LAA */
1107	bcopy(IF_LLADDR(adapter->ifp), hw->mac.addr,
1108	      IXGBE_ETH_LENGTH_OF_ADDRESS);
1109	ixgbe_set_rar(hw, 0, hw->mac.addr, 0, 1);
1110	hw->addr_ctrl.rar_used_count = 1;
1111
1112	/* Set the various hardware offload abilities */
1113	ifp->if_hwassist = 0;
1114	if (ifp->if_capenable & IFCAP_TSO)
1115		ifp->if_hwassist |= CSUM_TSO;
1116	if (ifp->if_capenable & IFCAP_TXCSUM) {
1117		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1118#if __FreeBSD_version >= 800000
1119		if (hw->mac.type != ixgbe_mac_82598EB)
1120			ifp->if_hwassist |= CSUM_SCTP;
1121#endif
1122	}
1123
1124	/* Prepare transmit descriptors and buffers */
1125	if (ixgbe_setup_transmit_structures(adapter)) {
1126		device_printf(dev,"Could not setup transmit structures\n");
1127		ixgbe_stop(adapter);
1128		return;
1129	}
1130
1131	ixgbe_init_hw(hw);
1132	ixgbe_initialize_transmit_units(adapter);
1133
1134	/* Setup Multicast table */
1135	ixgbe_set_multi(adapter);
1136
1137	/*
1138	** Determine the correct mbuf pool
1139	** for doing jumbo frames
1140	*/
1141	if (adapter->max_frame_size <= 2048)
1142		adapter->rx_mbuf_sz = MCLBYTES;
1143	else if (adapter->max_frame_size <= 4096)
1144		adapter->rx_mbuf_sz = MJUMPAGESIZE;
1145	else if (adapter->max_frame_size <= 9216)
1146		adapter->rx_mbuf_sz = MJUM9BYTES;
1147	else
1148		adapter->rx_mbuf_sz = MJUM16BYTES;
1149
1150	/* Prepare receive descriptors and buffers */
1151	if (ixgbe_setup_receive_structures(adapter)) {
1152		device_printf(dev,"Could not setup receive structures\n");
1153		ixgbe_stop(adapter);
1154		return;
1155	}
1156
1157	/* Configure RX settings */
1158	ixgbe_initialize_receive_units(adapter);
1159
1160	gpie = IXGBE_READ_REG(&adapter->hw, IXGBE_GPIE);
1161
1162	/* Enable Fan Failure Interrupt */
1163	gpie |= IXGBE_SDP1_GPIEN;
1164
1165	/* Add for Module detection */
1166	if (hw->mac.type == ixgbe_mac_82599EB)
1167		gpie |= IXGBE_SDP2_GPIEN;
1168
1169	/* Thermal Failure Detection */
1170	if (hw->mac.type == ixgbe_mac_X540)
1171		gpie |= IXGBE_SDP0_GPIEN;
1172
1173	if (adapter->msix > 1) {
1174		/* Enable Enhanced MSIX mode */
1175		gpie |= IXGBE_GPIE_MSIX_MODE;
1176		gpie |= IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT |
1177		    IXGBE_GPIE_OCD;
1178	}
1179	IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie);
1180
1181	/* Set MTU size */
1182	if (ifp->if_mtu > ETHERMTU) {
1183		mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD);
1184		mhadd &= ~IXGBE_MHADD_MFS_MASK;
1185		mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT;
1186		IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd);
1187	}
1188
1189	/* Now enable all the queues */
1190
1191	for (int i = 0; i < adapter->num_queues; i++) {
1192		txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
1193		txdctl |= IXGBE_TXDCTL_ENABLE;
1194		/* Set WTHRESH to 8, burst writeback */
1195		txdctl |= (8 << 16);
1196		/*
1197		 * When the internal queue falls below PTHRESH (32),
1198		 * start prefetching as long as there are at least
1199		 * HTHRESH (1) buffers ready. The values are taken
1200		 * from the Intel linux driver 3.8.21.
1201		 * Prefetching enables tx line rate even with 1 queue.
1202		 */
1203		txdctl |= (32 << 0) | (1 << 8);
1204		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), txdctl);
1205	}
1206
1207	for (int i = 0; i < adapter->num_queues; i++) {
1208		rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
1209		if (hw->mac.type == ixgbe_mac_82598EB) {
1210			/*
1211			** PTHRESH = 21
1212			** HTHRESH = 4
1213			** WTHRESH = 8
1214			*/
1215			rxdctl &= ~0x3FFFFF;
1216			rxdctl |= 0x080420;
1217		}
1218		rxdctl |= IXGBE_RXDCTL_ENABLE;
1219		IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), rxdctl);
1220		for (k = 0; k < 10; k++) {
1221			if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)) &
1222			    IXGBE_RXDCTL_ENABLE)
1223				break;
1224			else
1225				msec_delay(1);
1226		}
1227		wmb();
1228#ifdef DEV_NETMAP
1229		/*
1230		 * In netmap mode, we must preserve the buffers made
1231		 * available to userspace before the if_init()
1232		 * (this is true by default on the TX side, because
1233		 * init makes all buffers available to userspace).
1234		 *
1235		 * netmap_reset() and the device specific routines
1236		 * (e.g. ixgbe_setup_receive_rings()) map these
1237		 * buffers at the end of the NIC ring, so here we
1238		 * must set the RDT (tail) register to make sure
1239		 * they are not overwritten.
1240		 *
1241		 * In this driver the NIC ring starts at RDH = 0,
1242		 * RDT points to the last slot available for reception (?),
1243		 * so RDT = num_rx_desc - 1 means the whole ring is available.
1244		 */
1245		if (ifp->if_capenable & IFCAP_NETMAP) {
1246			struct netmap_adapter *na = NA(adapter->ifp);
1247			struct netmap_kring *kring = &na->rx_rings[i];
1248			int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring);
1249
1250			IXGBE_WRITE_REG(hw, IXGBE_RDT(i), t);
1251		} else
1252#endif /* DEV_NETMAP */
1253		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), adapter->num_rx_desc - 1);
1254	}
1255
1256	/* Set up VLAN support and filter */
1257	ixgbe_setup_vlan_hw_support(adapter);
1258
1259	/* Enable Receive engine */
1260	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
1261	if (hw->mac.type == ixgbe_mac_82598EB)
1262		rxctrl |= IXGBE_RXCTRL_DMBYPS;
1263	rxctrl |= IXGBE_RXCTRL_RXEN;
1264	ixgbe_enable_rx_dma(hw, rxctrl);
1265
1266	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
1267
1268	/* Set up MSI/X routing */
1269	if (ixgbe_enable_msix)  {
1270		ixgbe_configure_ivars(adapter);
1271		/* Set up auto-mask */
1272		if (hw->mac.type == ixgbe_mac_82598EB)
1273			IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1274		else {
1275			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
1276			IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF);
1277		}
1278	} else {  /* Simple settings for Legacy/MSI */
1279                ixgbe_set_ivar(adapter, 0, 0, 0);
1280                ixgbe_set_ivar(adapter, 0, 0, 1);
1281		IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
1282	}
1283
1284#ifdef IXGBE_FDIR
1285	/* Init Flow director */
1286	if (hw->mac.type != ixgbe_mac_82598EB) {
1287		u32 hdrm = 32 << fdir_pballoc;
1288
1289		hw->mac.ops.setup_rxpba(hw, 0, hdrm, PBA_STRATEGY_EQUAL);
1290		ixgbe_init_fdir_signature_82599(&adapter->hw, fdir_pballoc);
1291	}
1292#endif
1293
1294	/*
1295	** Check on any SFP devices that
1296	** need to be kick-started
1297	*/
1298	if (hw->phy.type == ixgbe_phy_none) {
1299		int err = hw->phy.ops.identify(hw);
1300		if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
1301                	device_printf(dev,
1302			    "Unsupported SFP+ module type was detected.\n");
1303			return;
1304        	}
1305	}
1306
1307	/* Set moderation on the Link interrupt */
1308	IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->linkvec), IXGBE_LINK_ITR);
1309
1310	/* Config/Enable Link */
1311	ixgbe_config_link(adapter);
1312
1313	/* Hardware Packet Buffer & Flow Control setup */
1314	{
1315		u32 rxpb, frame, size, tmp;
1316
1317		frame = adapter->max_frame_size;
1318
1319		/* Calculate High Water */
1320		if (hw->mac.type == ixgbe_mac_X540)
1321			tmp = IXGBE_DV_X540(frame, frame);
1322		else
1323			tmp = IXGBE_DV(frame, frame);
1324		size = IXGBE_BT2KB(tmp);
1325		rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10;
1326		hw->fc.high_water[0] = rxpb - size;
1327
1328		/* Now calculate Low Water */
1329		if (hw->mac.type == ixgbe_mac_X540)
1330			tmp = IXGBE_LOW_DV_X540(frame);
1331		else
1332			tmp = IXGBE_LOW_DV(frame);
1333		hw->fc.low_water[0] = IXGBE_BT2KB(tmp);
1334
1335		hw->fc.requested_mode = adapter->fc;
1336		hw->fc.pause_time = IXGBE_FC_PAUSE;
1337		hw->fc.send_xon = TRUE;
1338	}
1339	/* Initialize the FC settings */
1340	ixgbe_start_hw(hw);
1341
1342	/* And now turn on interrupts */
1343	ixgbe_enable_intr(adapter);
1344
1345	/* Now inform the stack we're ready */
1346	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1347
1348	return;
1349}
1350
1351static void
1352ixgbe_init(void *arg)
1353{
1354	struct adapter *adapter = arg;
1355
1356	IXGBE_CORE_LOCK(adapter);
1357	ixgbe_init_locked(adapter);
1358	IXGBE_CORE_UNLOCK(adapter);
1359	return;
1360}
1361
1362
1363/*
1364**
1365** MSIX Interrupt Handlers and Tasklets
1366**
1367*/
1368
1369static inline void
1370ixgbe_enable_queue(struct adapter *adapter, u32 vector)
1371{
1372	struct ixgbe_hw *hw = &adapter->hw;
1373	u64	queue = (u64)(1 << vector);
1374	u32	mask;
1375
1376	if (hw->mac.type == ixgbe_mac_82598EB) {
1377                mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1378                IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
1379	} else {
1380                mask = (queue & 0xFFFFFFFF);
1381                if (mask)
1382                        IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
1383                mask = (queue >> 32);
1384                if (mask)
1385                        IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
1386	}
1387}
1388
1389static inline void
1390ixgbe_disable_queue(struct adapter *adapter, u32 vector)
1391{
1392	struct ixgbe_hw *hw = &adapter->hw;
1393	u64	queue = (u64)(1 << vector);
1394	u32	mask;
1395
1396	if (hw->mac.type == ixgbe_mac_82598EB) {
1397                mask = (IXGBE_EIMS_RTX_QUEUE & queue);
1398                IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
1399	} else {
1400                mask = (queue & 0xFFFFFFFF);
1401                if (mask)
1402                        IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
1403                mask = (queue >> 32);
1404                if (mask)
1405                        IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
1406	}
1407}
1408
1409static void
1410ixgbe_handle_que(void *context, int pending)
1411{
1412	struct ix_queue *que = context;
1413	struct adapter  *adapter = que->adapter;
1414	struct tx_ring  *txr = que->txr;
1415	struct ifnet    *ifp = adapter->ifp;
1416	bool		more;
1417
1418	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1419		more = ixgbe_rxeof(que);
1420		IXGBE_TX_LOCK(txr);
1421		ixgbe_txeof(txr);
1422#ifndef IXGBE_LEGACY_TX
1423		if (!drbr_empty(ifp, txr->br))
1424			ixgbe_mq_start_locked(ifp, txr);
1425#else
1426		if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1427			ixgbe_start_locked(txr, ifp);
1428#endif
1429		IXGBE_TX_UNLOCK(txr);
1430	}
1431
1432	/* Reenable this interrupt */
1433	if (que->res != NULL)
1434		ixgbe_enable_queue(adapter, que->msix);
1435	else
1436		ixgbe_enable_intr(adapter);
1437	return;
1438}
1439
1440
1441/*********************************************************************
1442 *
1443 *  Legacy Interrupt Service routine
1444 *
1445 **********************************************************************/
1446
1447static void
1448ixgbe_legacy_irq(void *arg)
1449{
1450	struct ix_queue *que = arg;
1451	struct adapter	*adapter = que->adapter;
1452	struct ixgbe_hw	*hw = &adapter->hw;
1453	struct ifnet    *ifp = adapter->ifp;
1454	struct 		tx_ring *txr = adapter->tx_rings;
1455	bool		more;
1456	u32       	reg_eicr;
1457
1458
1459	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICR);
1460
1461	++que->irqs;
1462	if (reg_eicr == 0) {
1463		ixgbe_enable_intr(adapter);
1464		return;
1465	}
1466
1467	more = ixgbe_rxeof(que);
1468
1469	IXGBE_TX_LOCK(txr);
1470	ixgbe_txeof(txr);
1471#ifdef IXGBE_LEGACY_TX
1472	if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1473		ixgbe_start_locked(txr, ifp);
1474#else
1475	if (!drbr_empty(ifp, txr->br))
1476		ixgbe_mq_start_locked(ifp, txr);
1477#endif
1478	IXGBE_TX_UNLOCK(txr);
1479
1480	/* Check for fan failure */
1481	if ((hw->phy.media_type == ixgbe_media_type_copper) &&
1482	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1483                device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1484		    "REPLACE IMMEDIATELY!!\n");
1485		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1);
1486	}
1487
1488	/* Link status change */
1489	if (reg_eicr & IXGBE_EICR_LSC)
1490		taskqueue_enqueue(adapter->tq, &adapter->link_task);
1491
1492	if (more)
1493		taskqueue_enqueue(que->tq, &que->que_task);
1494	else
1495		ixgbe_enable_intr(adapter);
1496	return;
1497}
1498
1499
1500/*********************************************************************
1501 *
1502 *  MSIX Queue Interrupt Service routine
1503 *
1504 **********************************************************************/
1505void
1506ixgbe_msix_que(void *arg)
1507{
1508	struct ix_queue	*que = arg;
1509	struct adapter  *adapter = que->adapter;
1510	struct ifnet    *ifp = adapter->ifp;
1511	struct tx_ring	*txr = que->txr;
1512	struct rx_ring	*rxr = que->rxr;
1513	bool		more;
1514	u32		newitr = 0;
1515
1516	/* Protect against spurious interrupts */
1517	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1518		return;
1519
1520	ixgbe_disable_queue(adapter, que->msix);
1521	++que->irqs;
1522
1523	more = ixgbe_rxeof(que);
1524
1525	IXGBE_TX_LOCK(txr);
1526	ixgbe_txeof(txr);
1527#ifdef IXGBE_LEGACY_TX
1528	if (!IFQ_DRV_IS_EMPTY(ifp->if_snd))
1529		ixgbe_start_locked(txr, ifp);
1530#else
1531	if (!drbr_empty(ifp, txr->br))
1532		ixgbe_mq_start_locked(ifp, txr);
1533#endif
1534	IXGBE_TX_UNLOCK(txr);
1535
1536	/* Do AIM now? */
1537
1538	if (ixgbe_enable_aim == FALSE)
1539		goto no_calc;
1540	/*
1541	** Do Adaptive Interrupt Moderation:
1542        **  - Write out last calculated setting
1543	**  - Calculate based on average size over
1544	**    the last interval.
1545	*/
1546        if (que->eitr_setting)
1547                IXGBE_WRITE_REG(&adapter->hw,
1548                    IXGBE_EITR(que->msix), que->eitr_setting);
1549
1550        que->eitr_setting = 0;
1551
1552        /* Idle, do nothing */
1553        if ((txr->bytes == 0) && (rxr->bytes == 0))
1554                goto no_calc;
1555
1556	if ((txr->bytes) && (txr->packets))
1557               	newitr = txr->bytes/txr->packets;
1558	if ((rxr->bytes) && (rxr->packets))
1559		newitr = max(newitr,
1560		    (rxr->bytes / rxr->packets));
1561	newitr += 24; /* account for hardware frame, crc */
1562
1563	/* set an upper boundary */
1564	newitr = min(newitr, 3000);
1565
1566	/* Be nice to the mid range */
1567	if ((newitr > 300) && (newitr < 1200))
1568		newitr = (newitr / 3);
1569	else
1570		newitr = (newitr / 2);
1571
1572        if (adapter->hw.mac.type == ixgbe_mac_82598EB)
1573                newitr |= newitr << 16;
1574        else
1575                newitr |= IXGBE_EITR_CNT_WDIS;
1576
1577        /* save for next interrupt */
1578        que->eitr_setting = newitr;
1579
1580        /* Reset state */
1581        txr->bytes = 0;
1582        txr->packets = 0;
1583        rxr->bytes = 0;
1584        rxr->packets = 0;
1585
1586no_calc:
1587	if (more)
1588		taskqueue_enqueue(que->tq, &que->que_task);
1589	else
1590		ixgbe_enable_queue(adapter, que->msix);
1591	return;
1592}
1593
1594
1595static void
1596ixgbe_msix_link(void *arg)
1597{
1598	struct adapter	*adapter = arg;
1599	struct ixgbe_hw *hw = &adapter->hw;
1600	u32		reg_eicr;
1601
1602	++adapter->link_irq;
1603
1604	/* First get the cause */
1605	reg_eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
1606	/* Be sure the queue bits are not cleared */
1607	reg_eicr &= ~IXGBE_EICR_RTX_QUEUE;
1608	/* Clear interrupt with write */
1609	IXGBE_WRITE_REG(hw, IXGBE_EICR, reg_eicr);
1610
1611	/* Link status change */
1612	if (reg_eicr & IXGBE_EICR_LSC)
1613		taskqueue_enqueue(adapter->tq, &adapter->link_task);
1614
1615	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
1616#ifdef IXGBE_FDIR
1617		if (reg_eicr & IXGBE_EICR_FLOW_DIR) {
1618			/* This is probably overkill :) */
1619			if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1))
1620				return;
1621                	/* Disable the interrupt */
1622			IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_FLOW_DIR);
1623			taskqueue_enqueue(adapter->tq, &adapter->fdir_task);
1624		} else
1625#endif
1626		if (reg_eicr & IXGBE_EICR_ECC) {
1627                	device_printf(adapter->dev, "\nCRITICAL: ECC ERROR!! "
1628			    "Please Reboot!!\n");
1629			IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
1630		} else
1631
1632		if (reg_eicr & IXGBE_EICR_GPI_SDP1) {
1633                	/* Clear the interrupt */
1634                	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1635			taskqueue_enqueue(adapter->tq, &adapter->msf_task);
1636        	} else if (reg_eicr & IXGBE_EICR_GPI_SDP2) {
1637                	/* Clear the interrupt */
1638                	IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP2);
1639			taskqueue_enqueue(adapter->tq, &adapter->mod_task);
1640		}
1641        }
1642
1643	/* Check for fan failure */
1644	if ((hw->device_id == IXGBE_DEV_ID_82598AT) &&
1645	    (reg_eicr & IXGBE_EICR_GPI_SDP1)) {
1646                device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! "
1647		    "REPLACE IMMEDIATELY!!\n");
1648		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1);
1649	}
1650
1651	/* Check for over temp condition */
1652	if ((hw->mac.type == ixgbe_mac_X540) &&
1653	    (reg_eicr & IXGBE_EICR_TS)) {
1654                device_printf(adapter->dev, "\nCRITICAL: OVER TEMP!! "
1655		    "PHY IS SHUT DOWN!!\n");
1656                device_printf(adapter->dev, "System shutdown required\n");
1657		IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS);
1658	}
1659
1660	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_OTHER);
1661	return;
1662}
1663
1664/*********************************************************************
1665 *
1666 *  Media Ioctl callback
1667 *
1668 *  This routine is called whenever the user queries the status of
1669 *  the interface using ifconfig.
1670 *
1671 **********************************************************************/
1672static void
1673ixgbe_media_status(struct ifnet * ifp, struct ifmediareq * ifmr)
1674{
1675	struct adapter *adapter = ifp->if_softc;
1676
1677	INIT_DEBUGOUT("ixgbe_media_status: begin");
1678	IXGBE_CORE_LOCK(adapter);
1679	ixgbe_update_link_status(adapter);
1680
1681	ifmr->ifm_status = IFM_AVALID;
1682	ifmr->ifm_active = IFM_ETHER;
1683
1684	if (!adapter->link_active) {
1685		IXGBE_CORE_UNLOCK(adapter);
1686		return;
1687	}
1688
1689	ifmr->ifm_status |= IFM_ACTIVE;
1690
1691	switch (adapter->link_speed) {
1692		case IXGBE_LINK_SPEED_100_FULL:
1693			ifmr->ifm_active |= IFM_100_TX | IFM_FDX;
1694			break;
1695		case IXGBE_LINK_SPEED_1GB_FULL:
1696			ifmr->ifm_active |= IFM_1000_SX | IFM_FDX;
1697			break;
1698		case IXGBE_LINK_SPEED_10GB_FULL:
1699			ifmr->ifm_active |= adapter->optics | IFM_FDX;
1700			break;
1701	}
1702
1703	IXGBE_CORE_UNLOCK(adapter);
1704
1705	return;
1706}
1707
1708/*********************************************************************
1709 *
1710 *  Media Ioctl callback
1711 *
1712 *  This routine is called when the user changes speed/duplex using
1713 *  media/mediopt option with ifconfig.
1714 *
1715 **********************************************************************/
1716static int
1717ixgbe_media_change(struct ifnet * ifp)
1718{
1719	struct adapter *adapter = ifp->if_softc;
1720	struct ifmedia *ifm = &adapter->media;
1721
1722	INIT_DEBUGOUT("ixgbe_media_change: begin");
1723
1724	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1725		return (EINVAL);
1726
1727        switch (IFM_SUBTYPE(ifm->ifm_media)) {
1728        case IFM_AUTO:
1729                adapter->hw.phy.autoneg_advertised =
1730		    IXGBE_LINK_SPEED_100_FULL |
1731		    IXGBE_LINK_SPEED_1GB_FULL |
1732		    IXGBE_LINK_SPEED_10GB_FULL;
1733                break;
1734        default:
1735                device_printf(adapter->dev, "Only auto media type\n");
1736		return (EINVAL);
1737        }
1738
1739	return (0);
1740}
1741
1742/*********************************************************************
1743 *
1744 *  This routine maps the mbufs to tx descriptors, allowing the
1745 *  TX engine to transmit the packets.
1746 *  	- return 0 on success, positive on failure
1747 *
1748 **********************************************************************/
1749
1750static int
1751ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1752{
1753	struct adapter  *adapter = txr->adapter;
1754	u32		olinfo_status = 0, cmd_type_len;
1755	int             i, j, error, nsegs;
1756	int		first;
1757	bool		remap = TRUE;
1758	struct mbuf	*m_head;
1759	bus_dma_segment_t segs[adapter->num_segs];
1760	bus_dmamap_t	map;
1761	struct ixgbe_tx_buf *txbuf;
1762	union ixgbe_adv_tx_desc *txd = NULL;
1763
1764	m_head = *m_headp;
1765
1766	/* Basic descriptor defines */
1767        cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
1768	    IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
1769
1770	if (m_head->m_flags & M_VLANTAG)
1771        	cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
1772
1773        /*
1774         * Important to capture the first descriptor
1775         * used because it will contain the index of
1776         * the one we tell the hardware to report back
1777         */
1778        first = txr->next_avail_desc;
1779	txbuf = &txr->tx_buffers[first];
1780	map = txbuf->map;
1781
1782	/*
1783	 * Map the packet for DMA.
1784	 */
1785retry:
1786	error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1787	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1788
1789	if (__predict_false(error)) {
1790		struct mbuf *m;
1791
1792		switch (error) {
1793		case EFBIG:
1794			/* Try it again? - one try */
1795			if (remap == TRUE) {
1796				remap = FALSE;
1797				m = m_defrag(*m_headp, M_NOWAIT);
1798				if (m == NULL) {
1799					adapter->mbuf_defrag_failed++;
1800					m_freem(*m_headp);
1801					*m_headp = NULL;
1802					return (ENOBUFS);
1803				}
1804				*m_headp = m;
1805				goto retry;
1806			} else
1807				return (error);
1808		case ENOMEM:
1809			txr->no_tx_dma_setup++;
1810			return (error);
1811		default:
1812			txr->no_tx_dma_setup++;
1813			m_freem(*m_headp);
1814			*m_headp = NULL;
1815			return (error);
1816		}
1817	}
1818
1819	/* Make certain there are enough descriptors */
1820	if (nsegs > txr->tx_avail - 2) {
1821		txr->no_desc_avail++;
1822		bus_dmamap_unload(txr->txtag, map);
1823		return (ENOBUFS);
1824	}
1825	m_head = *m_headp;
1826
1827	/*
1828	** Set up the appropriate offload context
1829	** this will consume the first descriptor
1830	*/
1831	error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1832	if (__predict_false(error)) {
1833		if (error == ENOBUFS)
1834			*m_headp = NULL;
1835		return (error);
1836	}
1837
1838#ifdef IXGBE_FDIR
1839	/* Do the flow director magic */
1840	if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
1841		++txr->atr_count;
1842		if (txr->atr_count >= atr_sample_rate) {
1843			ixgbe_atr(txr, m_head);
1844			txr->atr_count = 0;
1845		}
1846	}
1847#endif
1848
1849	i = txr->next_avail_desc;
1850	for (j = 0; j < nsegs; j++) {
1851		bus_size_t seglen;
1852		bus_addr_t segaddr;
1853
1854		txbuf = &txr->tx_buffers[i];
1855		txd = &txr->tx_base[i];
1856		seglen = segs[j].ds_len;
1857		segaddr = htole64(segs[j].ds_addr);
1858
1859		txd->read.buffer_addr = segaddr;
1860		txd->read.cmd_type_len = htole32(txr->txd_cmd |
1861		    cmd_type_len |seglen);
1862		txd->read.olinfo_status = htole32(olinfo_status);
1863
1864		if (++i == txr->num_desc)
1865			i = 0;
1866	}
1867
1868	txd->read.cmd_type_len |=
1869	    htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
1870	txr->tx_avail -= nsegs;
1871	txr->next_avail_desc = i;
1872
1873	txbuf->m_head = m_head;
1874	/*
1875	** Here we swap the map so the last descriptor,
1876	** which gets the completion interrupt has the
1877	** real map, and the first descriptor gets the
1878	** unused map from this descriptor.
1879	*/
1880	txr->tx_buffers[first].map = txbuf->map;
1881	txbuf->map = map;
1882	bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1883
1884        /* Set the EOP descriptor that will be marked done */
1885        txbuf = &txr->tx_buffers[first];
1886	txbuf->eop = txd;
1887
1888        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1889            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1890	/*
1891	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1892	 * hardware that this frame is available to transmit.
1893	 */
1894	++txr->total_packets;
1895	IXGBE_WRITE_REG(&adapter->hw, IXGBE_TDT(txr->me), i);
1896
1897	return (0);
1898
1899}
1900
1901static void
1902ixgbe_set_promisc(struct adapter *adapter)
1903{
1904	u_int32_t       reg_rctl;
1905	struct ifnet   *ifp = adapter->ifp;
1906	int		mcnt = 0;
1907
1908	reg_rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1909	reg_rctl &= (~IXGBE_FCTRL_UPE);
1910	if (ifp->if_flags & IFF_ALLMULTI)
1911		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1912	else {
1913		struct	ifmultiaddr *ifma;
1914#if __FreeBSD_version < 800000
1915		IF_ADDR_LOCK(ifp);
1916#else
1917		if_maddr_rlock(ifp);
1918#endif
1919		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1920			if (ifma->ifma_addr->sa_family != AF_LINK)
1921				continue;
1922			if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1923				break;
1924			mcnt++;
1925		}
1926#if __FreeBSD_version < 800000
1927		IF_ADDR_UNLOCK(ifp);
1928#else
1929		if_maddr_runlock(ifp);
1930#endif
1931	}
1932	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1933		reg_rctl &= (~IXGBE_FCTRL_MPE);
1934	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1935
1936	if (ifp->if_flags & IFF_PROMISC) {
1937		reg_rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1938		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1939	} else if (ifp->if_flags & IFF_ALLMULTI) {
1940		reg_rctl |= IXGBE_FCTRL_MPE;
1941		reg_rctl &= ~IXGBE_FCTRL_UPE;
1942		IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, reg_rctl);
1943	}
1944	return;
1945}
1946
1947
1948/*********************************************************************
1949 *  Multicast Update
1950 *
1951 *  This routine is called whenever multicast address list is updated.
1952 *
1953 **********************************************************************/
1954#define IXGBE_RAR_ENTRIES 16
1955
1956static void
1957ixgbe_set_multi(struct adapter *adapter)
1958{
1959	u32	fctrl;
1960	u8	*mta;
1961	u8	*update_ptr;
1962	struct	ifmultiaddr *ifma;
1963	int	mcnt = 0;
1964	struct ifnet   *ifp = adapter->ifp;
1965
1966	IOCTL_DEBUGOUT("ixgbe_set_multi: begin");
1967
1968	mta = adapter->mta;
1969	bzero(mta, sizeof(u8) * IXGBE_ETH_LENGTH_OF_ADDRESS *
1970	    MAX_NUM_MULTICAST_ADDRESSES);
1971
1972#if __FreeBSD_version < 800000
1973	IF_ADDR_LOCK(ifp);
1974#else
1975	if_maddr_rlock(ifp);
1976#endif
1977	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1978		if (ifma->ifma_addr->sa_family != AF_LINK)
1979			continue;
1980		if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1981			break;
1982		bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr),
1983		    &mta[mcnt * IXGBE_ETH_LENGTH_OF_ADDRESS],
1984		    IXGBE_ETH_LENGTH_OF_ADDRESS);
1985		mcnt++;
1986	}
1987#if __FreeBSD_version < 800000
1988	IF_ADDR_UNLOCK(ifp);
1989#else
1990	if_maddr_runlock(ifp);
1991#endif
1992
1993	fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL);
1994	fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1995	if (ifp->if_flags & IFF_PROMISC)
1996		fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
1997	else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES ||
1998	    ifp->if_flags & IFF_ALLMULTI) {
1999		fctrl |= IXGBE_FCTRL_MPE;
2000		fctrl &= ~IXGBE_FCTRL_UPE;
2001	} else
2002		fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
2003
2004	IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl);
2005
2006	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) {
2007		update_ptr = mta;
2008		ixgbe_update_mc_addr_list(&adapter->hw,
2009		    update_ptr, mcnt, ixgbe_mc_array_itr, TRUE);
2010	}
2011
2012	return;
2013}
2014
2015/*
2016 * This is an iterator function now needed by the multicast
2017 * shared code. It simply feeds the shared code routine the
2018 * addresses in the array of ixgbe_set_multi() one by one.
2019 */
2020static u8 *
2021ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq)
2022{
2023	u8 *addr = *update_ptr;
2024	u8 *newptr;
2025	*vmdq = 0;
2026
2027	newptr = addr + IXGBE_ETH_LENGTH_OF_ADDRESS;
2028	*update_ptr = newptr;
2029	return addr;
2030}
2031
2032
2033/*********************************************************************
2034 *  Timer routine
2035 *
2036 *  This routine checks for link status,updates statistics,
2037 *  and runs the watchdog check.
2038 *
2039 **********************************************************************/
2040
2041static void
2042ixgbe_local_timer(void *arg)
2043{
2044	struct adapter	*adapter = arg;
2045	device_t	dev = adapter->dev;
2046	struct ix_queue *que = adapter->queues;
2047	struct tx_ring	*txr = adapter->tx_rings;
2048	int		hung = 0, paused = 0;
2049
2050	mtx_assert(&adapter->core_mtx, MA_OWNED);
2051
2052	/* Check for pluggable optics */
2053	if (adapter->sfp_probe)
2054		if (!ixgbe_sfp_probe(adapter))
2055			goto out; /* Nothing to do */
2056
2057	ixgbe_update_link_status(adapter);
2058	ixgbe_update_stats_counters(adapter);
2059
2060	/*
2061	 * If the interface has been paused
2062	 * then don't do the watchdog check
2063	 */
2064	if (IXGBE_READ_REG(&adapter->hw, IXGBE_TFCS) & IXGBE_TFCS_TXOFF)
2065		paused = 1;
2066
2067	/*
2068	** Check the TX queues status
2069	**      - watchdog only if all queues show hung
2070	*/
2071	for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2072		if ((txr->queue_status == IXGBE_QUEUE_HUNG) &&
2073		    (paused == 0))
2074			++hung;
2075		else if (txr->queue_status == IXGBE_QUEUE_WORKING)
2076			taskqueue_enqueue(que->tq, &txr->txq_task);
2077        }
2078	/* Only truely watchdog if all queues show hung */
2079        if (hung == adapter->num_queues)
2080                goto watchdog;
2081
2082out:
2083	callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter);
2084	return;
2085
2086watchdog:
2087	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2088	device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2089	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDH(txr->me)),
2090	    IXGBE_READ_REG(&adapter->hw, IXGBE_TDT(txr->me)));
2091	device_printf(dev,"TX(%d) desc avail = %d,"
2092	    "Next TX to Clean = %d\n",
2093	    txr->me, txr->tx_avail, txr->next_to_clean);
2094	adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2095	adapter->watchdog_events++;
2096	ixgbe_init_locked(adapter);
2097}
2098
2099/*
2100** Note: this routine updates the OS on the link state
2101**	the real check of the hardware only happens with
2102**	a link interrupt.
2103*/
2104static void
2105ixgbe_update_link_status(struct adapter *adapter)
2106{
2107	struct ifnet	*ifp = adapter->ifp;
2108	device_t dev = adapter->dev;
2109
2110
2111	if (adapter->link_up){
2112		if (adapter->link_active == FALSE) {
2113			if (bootverbose)
2114				device_printf(dev,"Link is up %d Gbps %s \n",
2115				    ((adapter->link_speed == 128)? 10:1),
2116				    "Full Duplex");
2117			adapter->link_active = TRUE;
2118			/* Update any Flow Control changes */
2119			ixgbe_fc_enable(&adapter->hw);
2120			if_link_state_change(ifp, LINK_STATE_UP);
2121		}
2122	} else { /* Link down */
2123		if (adapter->link_active == TRUE) {
2124			if (bootverbose)
2125				device_printf(dev,"Link is Down\n");
2126			if_link_state_change(ifp, LINK_STATE_DOWN);
2127			adapter->link_active = FALSE;
2128		}
2129	}
2130
2131	return;
2132}
2133
2134
2135/*********************************************************************
2136 *
2137 *  This routine disables all traffic on the adapter by issuing a
2138 *  global reset on the MAC and deallocates TX/RX buffers.
2139 *
2140 **********************************************************************/
2141
2142static void
2143ixgbe_stop(void *arg)
2144{
2145	struct ifnet   *ifp;
2146	struct adapter *adapter = arg;
2147	struct ixgbe_hw *hw = &adapter->hw;
2148	ifp = adapter->ifp;
2149
2150	mtx_assert(&adapter->core_mtx, MA_OWNED);
2151
2152	INIT_DEBUGOUT("ixgbe_stop: begin\n");
2153	ixgbe_disable_intr(adapter);
2154	callout_stop(&adapter->timer);
2155
2156	/* Let the stack know...*/
2157	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2158
2159	ixgbe_reset_hw(hw);
2160	hw->adapter_stopped = FALSE;
2161	ixgbe_stop_adapter(hw);
2162	if (hw->mac.type == ixgbe_mac_82599EB)
2163		ixgbe_stop_mac_link_on_d3_82599(hw);
2164	/* Turn off the laser - noop with no optics */
2165	ixgbe_disable_tx_laser(hw);
2166
2167	/* Update the stack */
2168	adapter->link_up = FALSE;
2169       	ixgbe_update_link_status(adapter);
2170
2171	/* reprogram the RAR[0] in case user changed it. */
2172	ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV);
2173
2174	return;
2175}
2176
2177
2178/*********************************************************************
2179 *
2180 *  Determine hardware revision.
2181 *
2182 **********************************************************************/
2183static void
2184ixgbe_identify_hardware(struct adapter *adapter)
2185{
2186	device_t        dev = adapter->dev;
2187	struct ixgbe_hw *hw = &adapter->hw;
2188
2189	/* Save off the information about this board */
2190	hw->vendor_id = pci_get_vendor(dev);
2191	hw->device_id = pci_get_device(dev);
2192	hw->revision_id = pci_read_config(dev, PCIR_REVID, 1);
2193	hw->subsystem_vendor_id =
2194	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2195	hw->subsystem_device_id =
2196	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2197
2198	/* We need this here to set the num_segs below */
2199	ixgbe_set_mac_type(hw);
2200
2201	/* Pick up the 82599 and VF settings */
2202	if (hw->mac.type != ixgbe_mac_82598EB) {
2203		hw->phy.smart_speed = ixgbe_smart_speed;
2204		adapter->num_segs = IXGBE_82599_SCATTER;
2205	} else
2206		adapter->num_segs = IXGBE_82598_SCATTER;
2207
2208	return;
2209}
2210
2211/*********************************************************************
2212 *
2213 *  Determine optic type
2214 *
2215 **********************************************************************/
2216static void
2217ixgbe_setup_optics(struct adapter *adapter)
2218{
2219	struct ixgbe_hw *hw = &adapter->hw;
2220	int		layer;
2221
2222	layer = ixgbe_get_supported_physical_layer(hw);
2223
2224	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) {
2225		adapter->optics = IFM_10G_T;
2226		return;
2227	}
2228
2229	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) {
2230		adapter->optics = IFM_1000_T;
2231		return;
2232	}
2233
2234	if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) {
2235		adapter->optics = IFM_1000_SX;
2236		return;
2237	}
2238
2239	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_LR |
2240	    IXGBE_PHYSICAL_LAYER_10GBASE_LRM)) {
2241		adapter->optics = IFM_10G_LR;
2242		return;
2243	}
2244
2245	if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) {
2246		adapter->optics = IFM_10G_SR;
2247		return;
2248	}
2249
2250	if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU) {
2251		adapter->optics = IFM_10G_TWINAX;
2252		return;
2253	}
2254
2255	if (layer & (IXGBE_PHYSICAL_LAYER_10GBASE_KX4 |
2256	    IXGBE_PHYSICAL_LAYER_10GBASE_CX4)) {
2257		adapter->optics = IFM_10G_CX4;
2258		return;
2259	}
2260
2261	/* If we get here just set the default */
2262	adapter->optics = IFM_ETHER | IFM_AUTO;
2263	return;
2264}
2265
2266/*********************************************************************
2267 *
2268 *  Setup the Legacy or MSI Interrupt handler
2269 *
2270 **********************************************************************/
2271static int
2272ixgbe_allocate_legacy(struct adapter *adapter)
2273{
2274	device_t	dev = adapter->dev;
2275	struct		ix_queue *que = adapter->queues;
2276#ifndef IXGBE_LEGACY_TX
2277	struct tx_ring		*txr = adapter->tx_rings;
2278#endif
2279	int		error, rid = 0;
2280
2281	/* MSI RID at 1 */
2282	if (adapter->msix == 1)
2283		rid = 1;
2284
2285	/* We allocate a single interrupt resource */
2286	adapter->res = bus_alloc_resource_any(dev,
2287            SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2288	if (adapter->res == NULL) {
2289		device_printf(dev, "Unable to allocate bus resource: "
2290		    "interrupt\n");
2291		return (ENXIO);
2292	}
2293
2294	/*
2295	 * Try allocating a fast interrupt and the associated deferred
2296	 * processing contexts.
2297	 */
2298#ifndef IXGBE_LEGACY_TX
2299	TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2300#endif
2301	TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2302	que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2303            taskqueue_thread_enqueue, &que->tq);
2304	taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq",
2305            device_get_nameunit(adapter->dev));
2306
2307	/* Tasklets for Link, SFP and Multispeed Fiber */
2308	TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2309	TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2310	TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2311#ifdef IXGBE_FDIR
2312	TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2313#endif
2314	adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2315	    taskqueue_thread_enqueue, &adapter->tq);
2316	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2317	    device_get_nameunit(adapter->dev));
2318
2319	if ((error = bus_setup_intr(dev, adapter->res,
2320            INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq,
2321            que, &adapter->tag)) != 0) {
2322		device_printf(dev, "Failed to register fast interrupt "
2323		    "handler: %d\n", error);
2324		taskqueue_free(que->tq);
2325		taskqueue_free(adapter->tq);
2326		que->tq = NULL;
2327		adapter->tq = NULL;
2328		return (error);
2329	}
2330	/* For simplicity in the handlers */
2331	adapter->que_mask = IXGBE_EIMS_ENABLE_MASK;
2332
2333	return (0);
2334}
2335
2336
2337/*********************************************************************
2338 *
2339 *  Setup MSIX Interrupt resources and handlers
2340 *
2341 **********************************************************************/
2342static int
2343ixgbe_allocate_msix(struct adapter *adapter)
2344{
2345	device_t        dev = adapter->dev;
2346	struct 		ix_queue *que = adapter->queues;
2347	struct  	tx_ring *txr = adapter->tx_rings;
2348	int 		error, rid, vector = 0;
2349
2350	for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) {
2351		rid = vector + 1;
2352		que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
2353		    RF_SHAREABLE | RF_ACTIVE);
2354		if (que->res == NULL) {
2355			device_printf(dev,"Unable to allocate"
2356		    	    " bus resource: que interrupt [%d]\n", vector);
2357			return (ENXIO);
2358		}
2359		/* Set the handler function */
2360		error = bus_setup_intr(dev, que->res,
2361		    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2362		    ixgbe_msix_que, que, &que->tag);
2363		if (error) {
2364			que->res = NULL;
2365			device_printf(dev, "Failed to register QUE handler");
2366			return (error);
2367		}
2368#if __FreeBSD_version >= 800504
2369		bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2370#endif
2371		que->msix = vector;
2372        	adapter->que_mask |= (u64)(1 << que->msix);
2373		/*
2374		** Bind the msix vector, and thus the
2375		** ring to the corresponding cpu.
2376		*/
2377		if (adapter->num_queues > 1)
2378			bus_bind_intr(dev, que->res, i);
2379
2380#ifndef IXGBE_LEGACY_TX
2381		TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr);
2382#endif
2383		TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que);
2384		que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT,
2385		    taskqueue_thread_enqueue, &que->tq);
2386		taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2387		    device_get_nameunit(adapter->dev));
2388	}
2389
2390	/* and Link */
2391	rid = vector + 1;
2392	adapter->res = bus_alloc_resource_any(dev,
2393    	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2394	if (!adapter->res) {
2395		device_printf(dev,"Unable to allocate"
2396    	    " bus resource: Link interrupt [%d]\n", rid);
2397		return (ENXIO);
2398	}
2399	/* Set the link handler function */
2400	error = bus_setup_intr(dev, adapter->res,
2401	    INTR_TYPE_NET | INTR_MPSAFE, NULL,
2402	    ixgbe_msix_link, adapter, &adapter->tag);
2403	if (error) {
2404		adapter->res = NULL;
2405		device_printf(dev, "Failed to register LINK handler");
2406		return (error);
2407	}
2408#if __FreeBSD_version >= 800504
2409	bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2410#endif
2411	adapter->linkvec = vector;
2412	/* Tasklets for Link, SFP and Multispeed Fiber */
2413	TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter);
2414	TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter);
2415	TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter);
2416#ifdef IXGBE_FDIR
2417	TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter);
2418#endif
2419	adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT,
2420	    taskqueue_thread_enqueue, &adapter->tq);
2421	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq",
2422	    device_get_nameunit(adapter->dev));
2423
2424	return (0);
2425}
2426
2427/*
2428 * Setup Either MSI/X or MSI
2429 */
2430static int
2431ixgbe_setup_msix(struct adapter *adapter)
2432{
2433	device_t dev = adapter->dev;
2434	int rid, want, queues, msgs;
2435
2436	/* Override by tuneable */
2437	if (ixgbe_enable_msix == 0)
2438		goto msi;
2439
2440	/* First try MSI/X */
2441	msgs = pci_msix_count(dev);
2442	if (msgs == 0)
2443		goto msi;
2444	rid = PCIR_BAR(MSIX_82598_BAR);
2445	adapter->msix_mem = bus_alloc_resource_any(dev,
2446	    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2447       	if (adapter->msix_mem == NULL) {
2448		rid += 4;	/* 82599 maps in higher BAR */
2449		adapter->msix_mem = bus_alloc_resource_any(dev,
2450		    SYS_RES_MEMORY, &rid, RF_ACTIVE);
2451	}
2452       	if (adapter->msix_mem == NULL) {
2453		/* May not be enabled */
2454		device_printf(adapter->dev,
2455		    "Unable to map MSIX table \n");
2456		goto msi;
2457	}
2458
2459	/* Figure out a reasonable auto config value */
2460	queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2461
2462	if (ixgbe_num_queues != 0)
2463		queues = ixgbe_num_queues;
2464	/* Set max queues to 8 when autoconfiguring */
2465	else if ((ixgbe_num_queues == 0) && (queues > 8))
2466		queues = 8;
2467
2468	/* reflect correct sysctl value */
2469	ixgbe_num_queues = queues;
2470
2471	/*
2472	** Want one vector (RX/TX pair) per queue
2473	** plus an additional for Link.
2474	*/
2475	want = queues + 1;
2476	if (msgs >= want)
2477		msgs = want;
2478	else {
2479               	device_printf(adapter->dev,
2480		    "MSIX Configuration Problem, "
2481		    "%d vectors but %d queues wanted!\n",
2482		    msgs, want);
2483		goto msi;
2484	}
2485	if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2486               	device_printf(adapter->dev,
2487		    "Using MSIX interrupts with %d vectors\n", msgs);
2488		adapter->num_queues = queues;
2489		return (msgs);
2490	}
2491	/*
2492	** If MSIX alloc failed or provided us with
2493	** less than needed, free and fall through to MSI
2494	*/
2495	pci_release_msi(dev);
2496
2497msi:
2498       	if (adapter->msix_mem != NULL) {
2499		bus_release_resource(dev, SYS_RES_MEMORY,
2500		    rid, adapter->msix_mem);
2501		adapter->msix_mem = NULL;
2502	}
2503       	msgs = 1;
2504       	if (pci_alloc_msi(dev, &msgs) == 0) {
2505               	device_printf(adapter->dev,"Using an MSI interrupt\n");
2506		return (msgs);
2507	}
2508	device_printf(adapter->dev,"Using a Legacy interrupt\n");
2509	return (0);
2510}
2511
2512
2513static int
2514ixgbe_allocate_pci_resources(struct adapter *adapter)
2515{
2516	int             rid;
2517	device_t        dev = adapter->dev;
2518
2519	rid = PCIR_BAR(0);
2520	adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2521	    &rid, RF_ACTIVE);
2522
2523	if (!(adapter->pci_mem)) {
2524		device_printf(dev,"Unable to allocate bus resource: memory\n");
2525		return (ENXIO);
2526	}
2527
2528	adapter->osdep.mem_bus_space_tag =
2529		rman_get_bustag(adapter->pci_mem);
2530	adapter->osdep.mem_bus_space_handle =
2531		rman_get_bushandle(adapter->pci_mem);
2532	adapter->hw.hw_addr = (u8 *) &adapter->osdep.mem_bus_space_handle;
2533
2534	/* Legacy defaults */
2535	adapter->num_queues = 1;
2536	adapter->hw.back = &adapter->osdep;
2537
2538	/*
2539	** Now setup MSI or MSI/X, should
2540	** return us the number of supported
2541	** vectors. (Will be 1 for MSI)
2542	*/
2543	adapter->msix = ixgbe_setup_msix(adapter);
2544	return (0);
2545}
2546
2547static void
2548ixgbe_free_pci_resources(struct adapter * adapter)
2549{
2550	struct 		ix_queue *que = adapter->queues;
2551	device_t	dev = adapter->dev;
2552	int		rid, memrid;
2553
2554	if (adapter->hw.mac.type == ixgbe_mac_82598EB)
2555		memrid = PCIR_BAR(MSIX_82598_BAR);
2556	else
2557		memrid = PCIR_BAR(MSIX_82599_BAR);
2558
2559	/*
2560	** There is a slight possibility of a failure mode
2561	** in attach that will result in entering this function
2562	** before interrupt resources have been initialized, and
2563	** in that case we do not want to execute the loops below
2564	** We can detect this reliably by the state of the adapter
2565	** res pointer.
2566	*/
2567	if (adapter->res == NULL)
2568		goto mem;
2569
2570	/*
2571	**  Release all msix queue resources:
2572	*/
2573	for (int i = 0; i < adapter->num_queues; i++, que++) {
2574		rid = que->msix + 1;
2575		if (que->tag != NULL) {
2576			bus_teardown_intr(dev, que->res, que->tag);
2577			que->tag = NULL;
2578		}
2579		if (que->res != NULL)
2580			bus_release_resource(dev, SYS_RES_IRQ, rid, que->res);
2581	}
2582
2583
2584	/* Clean the Legacy or Link interrupt last */
2585	if (adapter->linkvec) /* we are doing MSIX */
2586		rid = adapter->linkvec + 1;
2587	else
2588		(adapter->msix != 0) ? (rid = 1):(rid = 0);
2589
2590	if (adapter->tag != NULL) {
2591		bus_teardown_intr(dev, adapter->res, adapter->tag);
2592		adapter->tag = NULL;
2593	}
2594	if (adapter->res != NULL)
2595		bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2596
2597mem:
2598	if (adapter->msix)
2599		pci_release_msi(dev);
2600
2601	if (adapter->msix_mem != NULL)
2602		bus_release_resource(dev, SYS_RES_MEMORY,
2603		    memrid, adapter->msix_mem);
2604
2605	if (adapter->pci_mem != NULL)
2606		bus_release_resource(dev, SYS_RES_MEMORY,
2607		    PCIR_BAR(0), adapter->pci_mem);
2608
2609	return;
2610}
2611
2612/*********************************************************************
2613 *
2614 *  Setup networking device structure and register an interface.
2615 *
2616 **********************************************************************/
2617static int
2618ixgbe_setup_interface(device_t dev, struct adapter *adapter)
2619{
2620	struct ixgbe_hw *hw = &adapter->hw;
2621	struct ifnet   *ifp;
2622
2623	INIT_DEBUGOUT("ixgbe_setup_interface: begin");
2624
2625	ifp = adapter->ifp = if_alloc(IFT_ETHER);
2626	if (ifp == NULL) {
2627		device_printf(dev, "can not allocate ifnet structure\n");
2628		return (-1);
2629	}
2630	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2631#if __FreeBSD_version < 1000025
2632	ifp->if_baudrate = 1000000000;
2633#else
2634	if_initbaudrate(ifp, IF_Gbps(10));
2635#endif
2636	ifp->if_init = ixgbe_init;
2637	ifp->if_softc = adapter;
2638	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2639	ifp->if_ioctl = ixgbe_ioctl;
2640#ifndef IXGBE_LEGACY_TX
2641	ifp->if_transmit = ixgbe_mq_start;
2642	ifp->if_qflush = ixgbe_qflush;
2643#else
2644	ifp->if_start = ixgbe_start;
2645	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2);
2646	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2;
2647	IFQ_SET_READY(&ifp->if_snd);
2648#endif
2649
2650	ether_ifattach(ifp, adapter->hw.mac.addr);
2651
2652	adapter->max_frame_size =
2653	    ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
2654
2655	/*
2656	 * Tell the upper layer(s) we support long frames.
2657	 */
2658	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
2659
2660	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_TSO | IFCAP_VLAN_HWCSUM;
2661	ifp->if_capabilities |= IFCAP_JUMBO_MTU;
2662	ifp->if_capabilities |= IFCAP_LRO;
2663	ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
2664			     |  IFCAP_VLAN_HWTSO
2665			     |  IFCAP_VLAN_MTU;
2666	ifp->if_capenable = ifp->if_capabilities;
2667
2668	/*
2669	** Don't turn this on by default, if vlans are
2670	** created on another pseudo device (eg. lagg)
2671	** then vlan events are not passed thru, breaking
2672	** operation, but with HW FILTER off it works. If
2673	** using vlans directly on the ixgbe driver you can
2674	** enable this and get full hardware tag filtering.
2675	*/
2676	ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
2677
2678	/*
2679	 * Specify the media types supported by this adapter and register
2680	 * callbacks to update media and link information
2681	 */
2682	ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change,
2683		     ixgbe_media_status);
2684	ifmedia_add(&adapter->media, IFM_ETHER | adapter->optics, 0, NULL);
2685	ifmedia_set(&adapter->media, IFM_ETHER | adapter->optics);
2686	if (hw->device_id == IXGBE_DEV_ID_82598AT) {
2687		ifmedia_add(&adapter->media,
2688		    IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2689		ifmedia_add(&adapter->media,
2690		    IFM_ETHER | IFM_1000_T, 0, NULL);
2691	}
2692	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2693	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2694
2695	return (0);
2696}
2697
2698static void
2699ixgbe_config_link(struct adapter *adapter)
2700{
2701	struct ixgbe_hw *hw = &adapter->hw;
2702	u32	autoneg, err = 0;
2703	bool	sfp, negotiate;
2704
2705	sfp = ixgbe_is_sfp(hw);
2706
2707	if (sfp) {
2708		if (hw->phy.multispeed_fiber) {
2709			hw->mac.ops.setup_sfp(hw);
2710			ixgbe_enable_tx_laser(hw);
2711			taskqueue_enqueue(adapter->tq, &adapter->msf_task);
2712		} else
2713			taskqueue_enqueue(adapter->tq, &adapter->mod_task);
2714	} else {
2715		if (hw->mac.ops.check_link)
2716			err = ixgbe_check_link(hw, &adapter->link_speed,
2717			    &adapter->link_up, FALSE);
2718		if (err)
2719			goto out;
2720		autoneg = hw->phy.autoneg_advertised;
2721		if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
2722                	err  = hw->mac.ops.get_link_capabilities(hw,
2723			    &autoneg, &negotiate);
2724		if (err)
2725			goto out;
2726		if (hw->mac.ops.setup_link)
2727                	err = hw->mac.ops.setup_link(hw,
2728			    autoneg, adapter->link_up);
2729	}
2730out:
2731	return;
2732}
2733
2734/********************************************************************
2735 * Manage DMA'able memory.
2736 *******************************************************************/
2737static void
2738ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2739{
2740	if (error)
2741		return;
2742	*(bus_addr_t *) arg = segs->ds_addr;
2743	return;
2744}
2745
2746static int
2747ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2748		struct ixgbe_dma_alloc *dma, int mapflags)
2749{
2750	device_t dev = adapter->dev;
2751	int             r;
2752
2753	r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev),	/* parent */
2754			       DBA_ALIGN, 0,	/* alignment, bounds */
2755			       BUS_SPACE_MAXADDR,	/* lowaddr */
2756			       BUS_SPACE_MAXADDR,	/* highaddr */
2757			       NULL, NULL,	/* filter, filterarg */
2758			       size,	/* maxsize */
2759			       1,	/* nsegments */
2760			       size,	/* maxsegsize */
2761			       BUS_DMA_ALLOCNOW,	/* flags */
2762			       NULL,	/* lockfunc */
2763			       NULL,	/* lockfuncarg */
2764			       &dma->dma_tag);
2765	if (r != 0) {
2766		device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2767		       "error %u\n", r);
2768		goto fail_0;
2769	}
2770	r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2771			     BUS_DMA_NOWAIT, &dma->dma_map);
2772	if (r != 0) {
2773		device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2774		       "error %u\n", r);
2775		goto fail_1;
2776	}
2777	r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2778			    size,
2779			    ixgbe_dmamap_cb,
2780			    &dma->dma_paddr,
2781			    mapflags | BUS_DMA_NOWAIT);
2782	if (r != 0) {
2783		device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2784		       "error %u\n", r);
2785		goto fail_2;
2786	}
2787	dma->dma_size = size;
2788	return (0);
2789fail_2:
2790	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2791fail_1:
2792	bus_dma_tag_destroy(dma->dma_tag);
2793fail_0:
2794	dma->dma_map = NULL;
2795	dma->dma_tag = NULL;
2796	return (r);
2797}
2798
2799static void
2800ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2801{
2802	bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2803	    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2804	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2805	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2806	bus_dma_tag_destroy(dma->dma_tag);
2807}
2808
2809
2810/*********************************************************************
2811 *
2812 *  Allocate memory for the transmit and receive rings, and then
2813 *  the descriptors associated with each, called only once at attach.
2814 *
2815 **********************************************************************/
2816static int
2817ixgbe_allocate_queues(struct adapter *adapter)
2818{
2819	device_t	dev = adapter->dev;
2820	struct ix_queue	*que;
2821	struct tx_ring	*txr;
2822	struct rx_ring	*rxr;
2823	int rsize, tsize, error = IXGBE_SUCCESS;
2824	int txconf = 0, rxconf = 0;
2825
2826        /* First allocate the top level queue structs */
2827        if (!(adapter->queues =
2828            (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2829            adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2830                device_printf(dev, "Unable to allocate queue memory\n");
2831                error = ENOMEM;
2832                goto fail;
2833        }
2834
2835	/* First allocate the TX ring struct memory */
2836	if (!(adapter->tx_rings =
2837	    (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2838	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2839		device_printf(dev, "Unable to allocate TX ring memory\n");
2840		error = ENOMEM;
2841		goto tx_fail;
2842	}
2843
2844	/* Next allocate the RX */
2845	if (!(adapter->rx_rings =
2846	    (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2847	    adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2848		device_printf(dev, "Unable to allocate RX ring memory\n");
2849		error = ENOMEM;
2850		goto rx_fail;
2851	}
2852
2853	/* For the ring itself */
2854	tsize = roundup2(adapter->num_tx_desc *
2855	    sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2856
2857	/*
2858	 * Now set up the TX queues, txconf is needed to handle the
2859	 * possibility that things fail midcourse and we need to
2860	 * undo memory gracefully
2861	 */
2862	for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2863		/* Set up some basics */
2864		txr = &adapter->tx_rings[i];
2865		txr->adapter = adapter;
2866		txr->me = i;
2867		txr->num_desc = adapter->num_tx_desc;
2868
2869		/* Initialize the TX side lock */
2870		snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2871		    device_get_nameunit(dev), txr->me);
2872		mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2873
2874		if (ixgbe_dma_malloc(adapter, tsize,
2875			&txr->txdma, BUS_DMA_NOWAIT)) {
2876			device_printf(dev,
2877			    "Unable to allocate TX Descriptor memory\n");
2878			error = ENOMEM;
2879			goto err_tx_desc;
2880		}
2881		txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2882		bzero((void *)txr->tx_base, tsize);
2883
2884        	/* Now allocate transmit buffers for the ring */
2885        	if (ixgbe_allocate_transmit_buffers(txr)) {
2886			device_printf(dev,
2887			    "Critical Failure setting up transmit buffers\n");
2888			error = ENOMEM;
2889			goto err_tx_desc;
2890        	}
2891#ifndef IXGBE_LEGACY_TX
2892		/* Allocate a buf ring */
2893		txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2894		    M_WAITOK, &txr->tx_mtx);
2895		if (txr->br == NULL) {
2896			device_printf(dev,
2897			    "Critical Failure setting up buf ring\n");
2898			error = ENOMEM;
2899			goto err_tx_desc;
2900        	}
2901#endif
2902	}
2903
2904	/*
2905	 * Next the RX queues...
2906	 */
2907	rsize = roundup2(adapter->num_rx_desc *
2908	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2909	for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2910		rxr = &adapter->rx_rings[i];
2911		/* Set up some basics */
2912		rxr->adapter = adapter;
2913		rxr->me = i;
2914		rxr->num_desc = adapter->num_rx_desc;
2915
2916		/* Initialize the RX side lock */
2917		snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2918		    device_get_nameunit(dev), rxr->me);
2919		mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2920
2921		if (ixgbe_dma_malloc(adapter, rsize,
2922			&rxr->rxdma, BUS_DMA_NOWAIT)) {
2923			device_printf(dev,
2924			    "Unable to allocate RxDescriptor memory\n");
2925			error = ENOMEM;
2926			goto err_rx_desc;
2927		}
2928		rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2929		bzero((void *)rxr->rx_base, rsize);
2930
2931        	/* Allocate receive buffers for the ring*/
2932		if (ixgbe_allocate_receive_buffers(rxr)) {
2933			device_printf(dev,
2934			    "Critical Failure setting up receive buffers\n");
2935			error = ENOMEM;
2936			goto err_rx_desc;
2937		}
2938	}
2939
2940	/*
2941	** Finally set up the queue holding structs
2942	*/
2943	for (int i = 0; i < adapter->num_queues; i++) {
2944		que = &adapter->queues[i];
2945		que->adapter = adapter;
2946		que->txr = &adapter->tx_rings[i];
2947		que->rxr = &adapter->rx_rings[i];
2948	}
2949
2950	return (0);
2951
2952err_rx_desc:
2953	for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2954		ixgbe_dma_free(adapter, &rxr->rxdma);
2955err_tx_desc:
2956	for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2957		ixgbe_dma_free(adapter, &txr->txdma);
2958	free(adapter->rx_rings, M_DEVBUF);
2959rx_fail:
2960	free(adapter->tx_rings, M_DEVBUF);
2961tx_fail:
2962	free(adapter->queues, M_DEVBUF);
2963fail:
2964	return (error);
2965}
2966
2967/*********************************************************************
2968 *
2969 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2970 *  the information needed to transmit a packet on the wire. This is
2971 *  called only once at attach, setup is done every reset.
2972 *
2973 **********************************************************************/
2974static int
2975ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
2976{
2977	struct adapter *adapter = txr->adapter;
2978	device_t dev = adapter->dev;
2979	struct ixgbe_tx_buf *txbuf;
2980	int error, i;
2981
2982	/*
2983	 * Setup DMA descriptor areas.
2984	 */
2985	if ((error = bus_dma_tag_create(
2986			       bus_get_dma_tag(adapter->dev),	/* parent */
2987			       1, 0,		/* alignment, bounds */
2988			       BUS_SPACE_MAXADDR,	/* lowaddr */
2989			       BUS_SPACE_MAXADDR,	/* highaddr */
2990			       NULL, NULL,		/* filter, filterarg */
2991			       IXGBE_TSO_SIZE,		/* maxsize */
2992			       adapter->num_segs,	/* nsegments */
2993			       PAGE_SIZE,		/* maxsegsize */
2994			       0,			/* flags */
2995			       NULL,			/* lockfunc */
2996			       NULL,			/* lockfuncarg */
2997			       &txr->txtag))) {
2998		device_printf(dev,"Unable to allocate TX DMA tag\n");
2999		goto fail;
3000	}
3001
3002	if (!(txr->tx_buffers =
3003	    (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
3004	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3005		device_printf(dev, "Unable to allocate tx_buffer memory\n");
3006		error = ENOMEM;
3007		goto fail;
3008	}
3009
3010        /* Create the descriptor buffer dma maps */
3011	txbuf = txr->tx_buffers;
3012	for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3013		error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3014		if (error != 0) {
3015			device_printf(dev, "Unable to create TX DMA map\n");
3016			goto fail;
3017		}
3018	}
3019
3020	return 0;
3021fail:
3022	/* We free all, it handles case where we are in the middle */
3023	ixgbe_free_transmit_structures(adapter);
3024	return (error);
3025}
3026
3027/*********************************************************************
3028 *
3029 *  Initialize a transmit ring.
3030 *
3031 **********************************************************************/
3032static void
3033ixgbe_setup_transmit_ring(struct tx_ring *txr)
3034{
3035	struct adapter *adapter = txr->adapter;
3036	struct ixgbe_tx_buf *txbuf;
3037	int i;
3038#ifdef DEV_NETMAP
3039	struct netmap_adapter *na = NA(adapter->ifp);
3040	struct netmap_slot *slot;
3041#endif /* DEV_NETMAP */
3042
3043	/* Clear the old ring contents */
3044	IXGBE_TX_LOCK(txr);
3045#ifdef DEV_NETMAP
3046	/*
3047	 * (under lock): if in netmap mode, do some consistency
3048	 * checks and set slot to entry 0 of the netmap ring.
3049	 */
3050	slot = netmap_reset(na, NR_TX, txr->me, 0);
3051#endif /* DEV_NETMAP */
3052	bzero((void *)txr->tx_base,
3053	      (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
3054	/* Reset indices */
3055	txr->next_avail_desc = 0;
3056	txr->next_to_clean = 0;
3057
3058	/* Free any existing tx buffers. */
3059        txbuf = txr->tx_buffers;
3060	for (i = 0; i < txr->num_desc; i++, txbuf++) {
3061		if (txbuf->m_head != NULL) {
3062			bus_dmamap_sync(txr->txtag, txbuf->map,
3063			    BUS_DMASYNC_POSTWRITE);
3064			bus_dmamap_unload(txr->txtag, txbuf->map);
3065			m_freem(txbuf->m_head);
3066			txbuf->m_head = NULL;
3067		}
3068#ifdef DEV_NETMAP
3069		/*
3070		 * In netmap mode, set the map for the packet buffer.
3071		 * NOTE: Some drivers (not this one) also need to set
3072		 * the physical buffer address in the NIC ring.
3073		 * Slots in the netmap ring (indexed by "si") are
3074		 * kring->nkr_hwofs positions "ahead" wrt the
3075		 * corresponding slot in the NIC ring. In some drivers
3076		 * (not here) nkr_hwofs can be negative. Function
3077		 * netmap_idx_n2k() handles wraparounds properly.
3078		 */
3079		if (slot) {
3080			int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3081			netmap_load_map(txr->txtag, txbuf->map, NMB(slot + si));
3082		}
3083#endif /* DEV_NETMAP */
3084		/* Clear the EOP descriptor pointer */
3085		txbuf->eop = NULL;
3086        }
3087
3088#ifdef IXGBE_FDIR
3089	/* Set the rate at which we sample packets */
3090	if (adapter->hw.mac.type != ixgbe_mac_82598EB)
3091		txr->atr_sample = atr_sample_rate;
3092#endif
3093
3094	/* Set number of descriptors available */
3095	txr->tx_avail = adapter->num_tx_desc;
3096
3097	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3098	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3099	IXGBE_TX_UNLOCK(txr);
3100}
3101
3102/*********************************************************************
3103 *
3104 *  Initialize all transmit rings.
3105 *
3106 **********************************************************************/
3107static int
3108ixgbe_setup_transmit_structures(struct adapter *adapter)
3109{
3110	struct tx_ring *txr = adapter->tx_rings;
3111
3112	for (int i = 0; i < adapter->num_queues; i++, txr++)
3113		ixgbe_setup_transmit_ring(txr);
3114
3115	return (0);
3116}
3117
3118/*********************************************************************
3119 *
3120 *  Enable transmit unit.
3121 *
3122 **********************************************************************/
3123static void
3124ixgbe_initialize_transmit_units(struct adapter *adapter)
3125{
3126	struct tx_ring	*txr = adapter->tx_rings;
3127	struct ixgbe_hw	*hw = &adapter->hw;
3128
3129	/* Setup the Base and Length of the Tx Descriptor Ring */
3130
3131	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3132		u64	tdba = txr->txdma.dma_paddr;
3133		u32	txctrl;
3134
3135		IXGBE_WRITE_REG(hw, IXGBE_TDBAL(i),
3136		       (tdba & 0x00000000ffffffffULL));
3137		IXGBE_WRITE_REG(hw, IXGBE_TDBAH(i), (tdba >> 32));
3138		IXGBE_WRITE_REG(hw, IXGBE_TDLEN(i),
3139		    adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc));
3140
3141		/* Setup the HW Tx Head and Tail descriptor pointers */
3142		IXGBE_WRITE_REG(hw, IXGBE_TDH(i), 0);
3143		IXGBE_WRITE_REG(hw, IXGBE_TDT(i), 0);
3144
3145		/* Setup Transmit Descriptor Cmd Settings */
3146		txr->txd_cmd = IXGBE_TXD_CMD_IFCS;
3147		txr->queue_status = IXGBE_QUEUE_IDLE;
3148
3149		/* Set the processing limit */
3150		txr->process_limit = ixgbe_tx_process_limit;
3151
3152		/* Disable Head Writeback */
3153		switch (hw->mac.type) {
3154		case ixgbe_mac_82598EB:
3155			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i));
3156			break;
3157		case ixgbe_mac_82599EB:
3158		case ixgbe_mac_X540:
3159		default:
3160			txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i));
3161			break;
3162                }
3163		txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN;
3164		switch (hw->mac.type) {
3165		case ixgbe_mac_82598EB:
3166			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), txctrl);
3167			break;
3168		case ixgbe_mac_82599EB:
3169		case ixgbe_mac_X540:
3170		default:
3171			IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), txctrl);
3172			break;
3173		}
3174
3175	}
3176
3177	if (hw->mac.type != ixgbe_mac_82598EB) {
3178		u32 dmatxctl, rttdcs;
3179		dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
3180		dmatxctl |= IXGBE_DMATXCTL_TE;
3181		IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
3182		/* Disable arbiter to set MTQC */
3183		rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS);
3184		rttdcs |= IXGBE_RTTDCS_ARBDIS;
3185		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3186		IXGBE_WRITE_REG(hw, IXGBE_MTQC, IXGBE_MTQC_64Q_1PB);
3187		rttdcs &= ~IXGBE_RTTDCS_ARBDIS;
3188		IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs);
3189	}
3190
3191	return;
3192}
3193
3194/*********************************************************************
3195 *
3196 *  Free all transmit rings.
3197 *
3198 **********************************************************************/
3199static void
3200ixgbe_free_transmit_structures(struct adapter *adapter)
3201{
3202	struct tx_ring *txr = adapter->tx_rings;
3203
3204	for (int i = 0; i < adapter->num_queues; i++, txr++) {
3205		IXGBE_TX_LOCK(txr);
3206		ixgbe_free_transmit_buffers(txr);
3207		ixgbe_dma_free(adapter, &txr->txdma);
3208		IXGBE_TX_UNLOCK(txr);
3209		IXGBE_TX_LOCK_DESTROY(txr);
3210	}
3211	free(adapter->tx_rings, M_DEVBUF);
3212}
3213
3214/*********************************************************************
3215 *
3216 *  Free transmit ring related data structures.
3217 *
3218 **********************************************************************/
3219static void
3220ixgbe_free_transmit_buffers(struct tx_ring *txr)
3221{
3222	struct adapter *adapter = txr->adapter;
3223	struct ixgbe_tx_buf *tx_buffer;
3224	int             i;
3225
3226	INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
3227
3228	if (txr->tx_buffers == NULL)
3229		return;
3230
3231	tx_buffer = txr->tx_buffers;
3232	for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3233		if (tx_buffer->m_head != NULL) {
3234			bus_dmamap_sync(txr->txtag, tx_buffer->map,
3235			    BUS_DMASYNC_POSTWRITE);
3236			bus_dmamap_unload(txr->txtag,
3237			    tx_buffer->map);
3238			m_freem(tx_buffer->m_head);
3239			tx_buffer->m_head = NULL;
3240			if (tx_buffer->map != NULL) {
3241				bus_dmamap_destroy(txr->txtag,
3242				    tx_buffer->map);
3243				tx_buffer->map = NULL;
3244			}
3245		} else if (tx_buffer->map != NULL) {
3246			bus_dmamap_unload(txr->txtag,
3247			    tx_buffer->map);
3248			bus_dmamap_destroy(txr->txtag,
3249			    tx_buffer->map);
3250			tx_buffer->map = NULL;
3251		}
3252	}
3253#ifdef IXGBE_LEGACY_TX
3254	if (txr->br != NULL)
3255		buf_ring_free(txr->br, M_DEVBUF);
3256#endif
3257	if (txr->tx_buffers != NULL) {
3258		free(txr->tx_buffers, M_DEVBUF);
3259		txr->tx_buffers = NULL;
3260	}
3261	if (txr->txtag != NULL) {
3262		bus_dma_tag_destroy(txr->txtag);
3263		txr->txtag = NULL;
3264	}
3265	return;
3266}
3267
3268/*********************************************************************
3269 *
3270 *  Advanced Context Descriptor setup for VLAN, CSUM or TSO
3271 *
3272 **********************************************************************/
3273
3274static int
3275ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3276    u32 *cmd_type_len, u32 *olinfo_status)
3277{
3278	struct ixgbe_adv_tx_context_desc *TXD;
3279	struct ether_vlan_header *eh;
3280	struct ip *ip;
3281	struct ip6_hdr *ip6;
3282	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3283	int	ehdrlen, ip_hlen = 0;
3284	u16	etype;
3285	u8	ipproto = 0;
3286	int	offload = TRUE;
3287	int	ctxd = txr->next_avail_desc;
3288	u16	vtag = 0;
3289
3290	/* First check if TSO is to be used */
3291	if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3292		return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3293
3294	if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3295		offload = FALSE;
3296
3297	/* Indicate the whole packet as payload when not doing TSO */
3298       	*olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
3299
3300	/* Now ready a context descriptor */
3301	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3302
3303	/*
3304	** In advanced descriptors the vlan tag must
3305	** be placed into the context descriptor. Hence
3306	** we need to make one even if not doing offloads.
3307	*/
3308	if (mp->m_flags & M_VLANTAG) {
3309		vtag = htole16(mp->m_pkthdr.ether_vtag);
3310		vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3311	} else if (offload == FALSE) /* ... no offload to do */
3312		return (0);
3313
3314	/*
3315	 * Determine where frame payload starts.
3316	 * Jump over vlan headers if already present,
3317	 * helpful for QinQ too.
3318	 */
3319	eh = mtod(mp, struct ether_vlan_header *);
3320	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3321		etype = ntohs(eh->evl_proto);
3322		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3323	} else {
3324		etype = ntohs(eh->evl_encap_proto);
3325		ehdrlen = ETHER_HDR_LEN;
3326	}
3327
3328	/* Set the ether header length */
3329	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3330
3331	switch (etype) {
3332		case ETHERTYPE_IP:
3333			ip = (struct ip *)(mp->m_data + ehdrlen);
3334			ip_hlen = ip->ip_hl << 2;
3335			ipproto = ip->ip_p;
3336			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3337			break;
3338		case ETHERTYPE_IPV6:
3339			ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3340			ip_hlen = sizeof(struct ip6_hdr);
3341			/* XXX-BZ this will go badly in case of ext hdrs. */
3342			ipproto = ip6->ip6_nxt;
3343			type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3344			break;
3345		default:
3346			offload = FALSE;
3347			break;
3348	}
3349
3350	vlan_macip_lens |= ip_hlen;
3351	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3352
3353	switch (ipproto) {
3354		case IPPROTO_TCP:
3355			if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3356				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3357			break;
3358
3359		case IPPROTO_UDP:
3360			if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3361				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
3362			break;
3363
3364#if __FreeBSD_version >= 800000
3365		case IPPROTO_SCTP:
3366			if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3367				type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
3368			break;
3369#endif
3370		default:
3371			offload = FALSE;
3372			break;
3373	}
3374
3375	if (offload) /* For the TX descriptor setup */
3376		*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3377
3378	/* Now copy bits into descriptor */
3379	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3380	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3381	TXD->seqnum_seed = htole32(0);
3382	TXD->mss_l4len_idx = htole32(0);
3383
3384	/* We've consumed the first desc, adjust counters */
3385	if (++ctxd == txr->num_desc)
3386		ctxd = 0;
3387	txr->next_avail_desc = ctxd;
3388	--txr->tx_avail;
3389
3390        return (0);
3391}
3392
3393/**********************************************************************
3394 *
3395 *  Setup work for hardware segmentation offload (TSO) on
3396 *  adapters using advanced tx descriptors
3397 *
3398 **********************************************************************/
3399static int
3400ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3401    u32 *cmd_type_len, u32 *olinfo_status)
3402{
3403	struct ixgbe_adv_tx_context_desc *TXD;
3404	u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3405	u32 mss_l4len_idx = 0, paylen;
3406	u16 vtag = 0, eh_type;
3407	int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3408	struct ether_vlan_header *eh;
3409#ifdef INET6
3410	struct ip6_hdr *ip6;
3411#endif
3412#ifdef INET
3413	struct ip *ip;
3414#endif
3415	struct tcphdr *th;
3416
3417
3418	/*
3419	 * Determine where frame payload starts.
3420	 * Jump over vlan headers if already present
3421	 */
3422	eh = mtod(mp, struct ether_vlan_header *);
3423	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3424		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3425		eh_type = eh->evl_proto;
3426	} else {
3427		ehdrlen = ETHER_HDR_LEN;
3428		eh_type = eh->evl_encap_proto;
3429	}
3430
3431	switch (ntohs(eh_type)) {
3432#ifdef INET6
3433	case ETHERTYPE_IPV6:
3434		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3435		/* XXX-BZ For now we do not pretend to support ext. hdrs. */
3436		if (ip6->ip6_nxt != IPPROTO_TCP)
3437			return (ENXIO);
3438		ip_hlen = sizeof(struct ip6_hdr);
3439		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3440		th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3441		th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3442		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
3443		break;
3444#endif
3445#ifdef INET
3446	case ETHERTYPE_IP:
3447		ip = (struct ip *)(mp->m_data + ehdrlen);
3448		if (ip->ip_p != IPPROTO_TCP)
3449			return (ENXIO);
3450		ip->ip_sum = 0;
3451		ip_hlen = ip->ip_hl << 2;
3452		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3453		th->th_sum = in_pseudo(ip->ip_src.s_addr,
3454		    ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3455		type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
3456		/* Tell transmit desc to also do IPv4 checksum. */
3457		*olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
3458		break;
3459#endif
3460	default:
3461		panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3462		    __func__, ntohs(eh_type));
3463		break;
3464	}
3465
3466	ctxd = txr->next_avail_desc;
3467	TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
3468
3469	tcp_hlen = th->th_off << 2;
3470
3471	/* This is used in the transmit desc in encap */
3472	paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3473
3474	/* VLAN MACLEN IPLEN */
3475	if (mp->m_flags & M_VLANTAG) {
3476		vtag = htole16(mp->m_pkthdr.ether_vtag);
3477                vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
3478	}
3479
3480	vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
3481	vlan_macip_lens |= ip_hlen;
3482	TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3483
3484	/* ADV DTYPE TUCMD */
3485	type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
3486	type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
3487	TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3488
3489	/* MSS L4LEN IDX */
3490	mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
3491	mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
3492	TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3493
3494	TXD->seqnum_seed = htole32(0);
3495
3496	if (++ctxd == txr->num_desc)
3497		ctxd = 0;
3498
3499	txr->tx_avail--;
3500	txr->next_avail_desc = ctxd;
3501	*cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
3502	*olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
3503	*olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
3504	++txr->tso_tx;
3505	return (0);
3506}
3507
3508#ifdef IXGBE_FDIR
3509/*
3510** This routine parses packet headers so that Flow
3511** Director can make a hashed filter table entry
3512** allowing traffic flows to be identified and kept
3513** on the same cpu.  This would be a performance
3514** hit, but we only do it at IXGBE_FDIR_RATE of
3515** packets.
3516*/
3517static void
3518ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
3519{
3520	struct adapter			*adapter = txr->adapter;
3521	struct ix_queue			*que;
3522	struct ip			*ip;
3523	struct tcphdr			*th;
3524	struct udphdr			*uh;
3525	struct ether_vlan_header	*eh;
3526	union ixgbe_atr_hash_dword	input = {.dword = 0};
3527	union ixgbe_atr_hash_dword	common = {.dword = 0};
3528	int  				ehdrlen, ip_hlen;
3529	u16				etype;
3530
3531	eh = mtod(mp, struct ether_vlan_header *);
3532	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3533		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3534		etype = eh->evl_proto;
3535	} else {
3536		ehdrlen = ETHER_HDR_LEN;
3537		etype = eh->evl_encap_proto;
3538	}
3539
3540	/* Only handling IPv4 */
3541	if (etype != htons(ETHERTYPE_IP))
3542		return;
3543
3544	ip = (struct ip *)(mp->m_data + ehdrlen);
3545	ip_hlen = ip->ip_hl << 2;
3546
3547	/* check if we're UDP or TCP */
3548	switch (ip->ip_p) {
3549	case IPPROTO_TCP:
3550		th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3551		/* src and dst are inverted */
3552		common.port.dst ^= th->th_sport;
3553		common.port.src ^= th->th_dport;
3554		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
3555		break;
3556	case IPPROTO_UDP:
3557		uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
3558		/* src and dst are inverted */
3559		common.port.dst ^= uh->uh_sport;
3560		common.port.src ^= uh->uh_dport;
3561		input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
3562		break;
3563	default:
3564		return;
3565	}
3566
3567	input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
3568	if (mp->m_pkthdr.ether_vtag)
3569		common.flex_bytes ^= htons(ETHERTYPE_VLAN);
3570	else
3571		common.flex_bytes ^= etype;
3572	common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
3573
3574	que = &adapter->queues[txr->me];
3575	/*
3576	** This assumes the Rx queue and Tx
3577	** queue are bound to the same CPU
3578	*/
3579	ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
3580	    input, common, que->msix);
3581}
3582#endif /* IXGBE_FDIR */
3583
3584/**********************************************************************
3585 *
3586 *  Examine each tx_buffer in the used queue. If the hardware is done
3587 *  processing the packet then free associated resources. The
3588 *  tx_buffer is put back on the free queue.
3589 *
3590 **********************************************************************/
3591static void
3592ixgbe_txeof(struct tx_ring *txr)
3593{
3594	struct adapter		*adapter = txr->adapter;
3595	struct ifnet		*ifp = adapter->ifp;
3596	u32			work, processed = 0;
3597	u16			limit = txr->process_limit;
3598	struct ixgbe_tx_buf	*buf;
3599	union ixgbe_adv_tx_desc *txd;
3600
3601	mtx_assert(&txr->tx_mtx, MA_OWNED);
3602
3603#ifdef DEV_NETMAP
3604	if (ifp->if_capenable & IFCAP_NETMAP) {
3605		struct netmap_adapter *na = NA(ifp);
3606		struct netmap_kring *kring = &na->tx_rings[txr->me];
3607		txd = txr->tx_base;
3608		bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3609		    BUS_DMASYNC_POSTREAD);
3610		/*
3611		 * In netmap mode, all the work is done in the context
3612		 * of the client thread. Interrupt handlers only wake up
3613		 * clients, which may be sleeping on individual rings
3614		 * or on a global resource for all rings.
3615		 * To implement tx interrupt mitigation, we wake up the client
3616		 * thread roughly every half ring, even if the NIC interrupts
3617		 * more frequently. This is implemented as follows:
3618		 * - ixgbe_txsync() sets kring->nr_kflags with the index of
3619		 *   the slot that should wake up the thread (nkr_num_slots
3620		 *   means the user thread should not be woken up);
3621		 * - the driver ignores tx interrupts unless netmap_mitigate=0
3622		 *   or the slot has the DD bit set.
3623		 */
3624		if (!netmap_mitigate ||
3625		    (kring->nr_kflags < kring->nkr_num_slots &&
3626		    txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
3627			netmap_tx_irq(ifp, txr->me);
3628		}
3629		return;
3630	}
3631#endif /* DEV_NETMAP */
3632
3633	if (txr->tx_avail == txr->num_desc) {
3634		txr->queue_status = IXGBE_QUEUE_IDLE;
3635		return;
3636	}
3637
3638	/* Get work starting point */
3639	work = txr->next_to_clean;
3640	buf = &txr->tx_buffers[work];
3641	txd = &txr->tx_base[work];
3642	work -= txr->num_desc; /* The distance to ring end */
3643        bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3644            BUS_DMASYNC_POSTREAD);
3645
3646	do {
3647		union ixgbe_adv_tx_desc *eop= buf->eop;
3648		if (eop == NULL) /* No work */
3649			break;
3650
3651		if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
3652			break;	/* I/O not complete */
3653
3654		if (buf->m_head) {
3655			txr->bytes +=
3656			    buf->m_head->m_pkthdr.len;
3657			bus_dmamap_sync(txr->txtag,
3658			    buf->map,
3659			    BUS_DMASYNC_POSTWRITE);
3660			bus_dmamap_unload(txr->txtag,
3661			    buf->map);
3662			m_freem(buf->m_head);
3663			buf->m_head = NULL;
3664			buf->map = NULL;
3665		}
3666		buf->eop = NULL;
3667		++txr->tx_avail;
3668
3669		/* We clean the range if multi segment */
3670		while (txd != eop) {
3671			++txd;
3672			++buf;
3673			++work;
3674			/* wrap the ring? */
3675			if (__predict_false(!work)) {
3676				work -= txr->num_desc;
3677				buf = txr->tx_buffers;
3678				txd = txr->tx_base;
3679			}
3680			if (buf->m_head) {
3681				txr->bytes +=
3682				    buf->m_head->m_pkthdr.len;
3683				bus_dmamap_sync(txr->txtag,
3684				    buf->map,
3685				    BUS_DMASYNC_POSTWRITE);
3686				bus_dmamap_unload(txr->txtag,
3687				    buf->map);
3688				m_freem(buf->m_head);
3689				buf->m_head = NULL;
3690				buf->map = NULL;
3691			}
3692			++txr->tx_avail;
3693			buf->eop = NULL;
3694
3695		}
3696		++txr->packets;
3697		++processed;
3698		++ifp->if_opackets;
3699		txr->watchdog_time = ticks;
3700
3701		/* Try the next packet */
3702		++txd;
3703		++buf;
3704		++work;
3705		/* reset with a wrap */
3706		if (__predict_false(!work)) {
3707			work -= txr->num_desc;
3708			buf = txr->tx_buffers;
3709			txd = txr->tx_base;
3710		}
3711		prefetch(txd);
3712	} while (__predict_true(--limit));
3713
3714	bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3715	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3716
3717	work += txr->num_desc;
3718	txr->next_to_clean = work;
3719
3720	/*
3721	** Watchdog calculation, we know there's
3722	** work outstanding or the first return
3723	** would have been taken, so none processed
3724	** for too long indicates a hang.
3725	*/
3726	if ((!processed) && ((ticks - txr->watchdog_time) > IXGBE_WATCHDOG))
3727		txr->queue_status = IXGBE_QUEUE_HUNG;
3728
3729	if (txr->tx_avail == txr->num_desc)
3730		txr->queue_status = IXGBE_QUEUE_IDLE;
3731
3732	return;
3733}
3734
3735/*********************************************************************
3736 *
3737 *  Refresh mbuf buffers for RX descriptor rings
3738 *   - now keeps its own state so discards due to resource
3739 *     exhaustion are unnecessary, if an mbuf cannot be obtained
3740 *     it just returns, keeping its placeholder, thus it can simply
3741 *     be recalled to try again.
3742 *
3743 **********************************************************************/
3744static void
3745ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
3746{
3747	struct adapter		*adapter = rxr->adapter;
3748	bus_dma_segment_t	seg[1];
3749	struct ixgbe_rx_buf	*rxbuf;
3750	struct mbuf		*mp;
3751	int			i, j, nsegs, error;
3752	bool			refreshed = FALSE;
3753
3754	i = j = rxr->next_to_refresh;
3755	/* Control the loop with one beyond */
3756	if (++j == rxr->num_desc)
3757		j = 0;
3758
3759	while (j != limit) {
3760		rxbuf = &rxr->rx_buffers[i];
3761		if (rxbuf->buf == NULL) {
3762			mp = m_getjcl(M_NOWAIT, MT_DATA,
3763			    M_PKTHDR, rxr->mbuf_sz);
3764			if (mp == NULL)
3765				goto update;
3766			if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3767				m_adj(mp, ETHER_ALIGN);
3768		} else
3769			mp = rxbuf->buf;
3770
3771		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
3772
3773		/* If we're dealing with an mbuf that was copied rather
3774		 * than replaced, there's no need to go through busdma.
3775		 */
3776		if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
3777			/* Get the memory mapping */
3778			error = bus_dmamap_load_mbuf_sg(rxr->ptag,
3779			    rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
3780			if (error != 0) {
3781				printf("Refresh mbufs: payload dmamap load"
3782				    " failure - %d\n", error);
3783				m_free(mp);
3784				rxbuf->buf = NULL;
3785				goto update;
3786			}
3787			rxbuf->buf = mp;
3788			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3789			    BUS_DMASYNC_PREREAD);
3790			rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
3791			    htole64(seg[0].ds_addr);
3792		} else {
3793			rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
3794			rxbuf->flags &= ~IXGBE_RX_COPY;
3795		}
3796
3797		refreshed = TRUE;
3798		/* Next is precalculated */
3799		i = j;
3800		rxr->next_to_refresh = i;
3801		if (++j == rxr->num_desc)
3802			j = 0;
3803	}
3804update:
3805	if (refreshed) /* Update hardware tail index */
3806		IXGBE_WRITE_REG(&adapter->hw,
3807		    IXGBE_RDT(rxr->me), rxr->next_to_refresh);
3808	return;
3809}
3810
3811/*********************************************************************
3812 *
3813 *  Allocate memory for rx_buffer structures. Since we use one
3814 *  rx_buffer per received packet, the maximum number of rx_buffer's
3815 *  that we'll need is equal to the number of receive descriptors
3816 *  that we've allocated.
3817 *
3818 **********************************************************************/
3819static int
3820ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
3821{
3822	struct	adapter 	*adapter = rxr->adapter;
3823	device_t 		dev = adapter->dev;
3824	struct ixgbe_rx_buf 	*rxbuf;
3825	int             	i, bsize, error;
3826
3827	bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
3828	if (!(rxr->rx_buffers =
3829	    (struct ixgbe_rx_buf *) malloc(bsize,
3830	    M_DEVBUF, M_NOWAIT | M_ZERO))) {
3831		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3832		error = ENOMEM;
3833		goto fail;
3834	}
3835
3836	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),	/* parent */
3837				   1, 0,	/* alignment, bounds */
3838				   BUS_SPACE_MAXADDR,	/* lowaddr */
3839				   BUS_SPACE_MAXADDR,	/* highaddr */
3840				   NULL, NULL,		/* filter, filterarg */
3841				   MJUM16BYTES,		/* maxsize */
3842				   1,			/* nsegments */
3843				   MJUM16BYTES,		/* maxsegsize */
3844				   0,			/* flags */
3845				   NULL,		/* lockfunc */
3846				   NULL,		/* lockfuncarg */
3847				   &rxr->ptag))) {
3848		device_printf(dev, "Unable to create RX DMA tag\n");
3849		goto fail;
3850	}
3851
3852	for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
3853		rxbuf = &rxr->rx_buffers[i];
3854		error = bus_dmamap_create(rxr->ptag,
3855		    BUS_DMA_NOWAIT, &rxbuf->pmap);
3856		if (error) {
3857			device_printf(dev, "Unable to create RX dma map\n");
3858			goto fail;
3859		}
3860	}
3861
3862	return (0);
3863
3864fail:
3865	/* Frees all, but can handle partial completion */
3866	ixgbe_free_receive_structures(adapter);
3867	return (error);
3868}
3869
3870/*
3871** Used to detect a descriptor that has
3872** been merged by Hardware RSC.
3873*/
3874static inline u32
3875ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
3876{
3877	return (le32toh(rx->wb.lower.lo_dword.data) &
3878	    IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
3879}
3880
3881/*********************************************************************
3882 *
3883 *  Initialize Hardware RSC (LRO) feature on 82599
3884 *  for an RX ring, this is toggled by the LRO capability
3885 *  even though it is transparent to the stack.
3886 *
3887 *  NOTE: since this HW feature only works with IPV4 and
3888 *        our testing has shown soft LRO to be as effective
3889 *        I have decided to disable this by default.
3890 *
3891 **********************************************************************/
3892static void
3893ixgbe_setup_hw_rsc(struct rx_ring *rxr)
3894{
3895	struct	adapter 	*adapter = rxr->adapter;
3896	struct	ixgbe_hw	*hw = &adapter->hw;
3897	u32			rscctrl, rdrxctl;
3898
3899	/* If turning LRO/RSC off we need to disable it */
3900	if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
3901		rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3902		rscctrl &= ~IXGBE_RSCCTL_RSCEN;
3903		return;
3904	}
3905
3906	rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
3907	rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
3908#ifdef DEV_NETMAP /* crcstrip is optional in netmap */
3909	if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
3910#endif /* DEV_NETMAP */
3911	rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
3912	rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
3913	IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
3914
3915	rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
3916	rscctrl |= IXGBE_RSCCTL_RSCEN;
3917	/*
3918	** Limit the total number of descriptors that
3919	** can be combined, so it does not exceed 64K
3920	*/
3921	if (rxr->mbuf_sz == MCLBYTES)
3922		rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
3923	else if (rxr->mbuf_sz == MJUMPAGESIZE)
3924		rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
3925	else if (rxr->mbuf_sz == MJUM9BYTES)
3926		rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
3927	else  /* Using 16K cluster */
3928		rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
3929
3930	IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
3931
3932	/* Enable TCP header recognition */
3933	IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
3934	    (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
3935	    IXGBE_PSRTYPE_TCPHDR));
3936
3937	/* Disable RSC for ACK packets */
3938	IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
3939	    (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
3940
3941	rxr->hw_rsc = TRUE;
3942}
3943
3944
3945static void
3946ixgbe_free_receive_ring(struct rx_ring *rxr)
3947{
3948	struct ixgbe_rx_buf       *rxbuf;
3949	int i;
3950
3951	for (i = 0; i < rxr->num_desc; i++) {
3952		rxbuf = &rxr->rx_buffers[i];
3953		if (rxbuf->buf != NULL) {
3954			bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
3955			    BUS_DMASYNC_POSTREAD);
3956			bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
3957			rxbuf->buf->m_flags |= M_PKTHDR;
3958			m_freem(rxbuf->buf);
3959			rxbuf->buf = NULL;
3960			rxbuf->flags = 0;
3961		}
3962	}
3963}
3964
3965
3966/*********************************************************************
3967 *
3968 *  Initialize a receive ring and its buffers.
3969 *
3970 **********************************************************************/
3971static int
3972ixgbe_setup_receive_ring(struct rx_ring *rxr)
3973{
3974	struct	adapter 	*adapter;
3975	struct ifnet		*ifp;
3976	device_t		dev;
3977	struct ixgbe_rx_buf	*rxbuf;
3978	bus_dma_segment_t	seg[1];
3979	struct lro_ctrl		*lro = &rxr->lro;
3980	int			rsize, nsegs, error = 0;
3981#ifdef DEV_NETMAP
3982	struct netmap_adapter *na = NA(rxr->adapter->ifp);
3983	struct netmap_slot *slot;
3984#endif /* DEV_NETMAP */
3985
3986	adapter = rxr->adapter;
3987	ifp = adapter->ifp;
3988	dev = adapter->dev;
3989
3990	/* Clear the ring contents */
3991	IXGBE_RX_LOCK(rxr);
3992#ifdef DEV_NETMAP
3993	/* same as in ixgbe_setup_transmit_ring() */
3994	slot = netmap_reset(na, NR_RX, rxr->me, 0);
3995#endif /* DEV_NETMAP */
3996	rsize = roundup2(adapter->num_rx_desc *
3997	    sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
3998	bzero((void *)rxr->rx_base, rsize);
3999	/* Cache the size */
4000	rxr->mbuf_sz = adapter->rx_mbuf_sz;
4001
4002	/* Free current RX buffer structs and their mbufs */
4003	ixgbe_free_receive_ring(rxr);
4004
4005	/* Now replenish the mbufs */
4006	for (int j = 0; j != rxr->num_desc; ++j) {
4007		struct mbuf	*mp;
4008
4009		rxbuf = &rxr->rx_buffers[j];
4010#ifdef DEV_NETMAP
4011		/*
4012		 * In netmap mode, fill the map and set the buffer
4013		 * address in the NIC ring, considering the offset
4014		 * between the netmap and NIC rings (see comment in
4015		 * ixgbe_setup_transmit_ring() ). No need to allocate
4016		 * an mbuf, so end the block with a continue;
4017		 */
4018		if (slot) {
4019			int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4020			uint64_t paddr;
4021			void *addr;
4022
4023			addr = PNMB(slot + sj, &paddr);
4024			netmap_load_map(rxr->ptag, rxbuf->pmap, addr);
4025			/* Update descriptor and the cached value */
4026			rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4027			rxbuf->addr = htole64(paddr);
4028			continue;
4029		}
4030#endif /* DEV_NETMAP */
4031		rxbuf->flags = 0;
4032		rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
4033		    M_PKTHDR, adapter->rx_mbuf_sz);
4034		if (rxbuf->buf == NULL) {
4035			error = ENOBUFS;
4036                        goto fail;
4037		}
4038		mp = rxbuf->buf;
4039		mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
4040		/* Get the memory mapping */
4041		error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4042		    rxbuf->pmap, mp, seg,
4043		    &nsegs, BUS_DMA_NOWAIT);
4044		if (error != 0)
4045                        goto fail;
4046		bus_dmamap_sync(rxr->ptag,
4047		    rxbuf->pmap, BUS_DMASYNC_PREREAD);
4048		/* Update the descriptor and the cached value */
4049		rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
4050		rxbuf->addr = htole64(seg[0].ds_addr);
4051	}
4052
4053
4054	/* Setup our descriptor indices */
4055	rxr->next_to_check = 0;
4056	rxr->next_to_refresh = 0;
4057	rxr->lro_enabled = FALSE;
4058	rxr->rx_copies = 0;
4059	rxr->rx_bytes = 0;
4060	rxr->discard = FALSE;
4061	rxr->vtag_strip = FALSE;
4062
4063	bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4064	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4065
4066	/*
4067	** Now set up the LRO interface:
4068	*/
4069	if (ixgbe_rsc_enable)
4070		ixgbe_setup_hw_rsc(rxr);
4071	else if (ifp->if_capenable & IFCAP_LRO) {
4072		int err = tcp_lro_init(lro);
4073		if (err) {
4074			device_printf(dev, "LRO Initialization failed!\n");
4075			goto fail;
4076		}
4077		INIT_DEBUGOUT("RX Soft LRO Initialized\n");
4078		rxr->lro_enabled = TRUE;
4079		lro->ifp = adapter->ifp;
4080	}
4081
4082	IXGBE_RX_UNLOCK(rxr);
4083	return (0);
4084
4085fail:
4086	ixgbe_free_receive_ring(rxr);
4087	IXGBE_RX_UNLOCK(rxr);
4088	return (error);
4089}
4090
4091/*********************************************************************
4092 *
4093 *  Initialize all receive rings.
4094 *
4095 **********************************************************************/
4096static int
4097ixgbe_setup_receive_structures(struct adapter *adapter)
4098{
4099	struct rx_ring *rxr = adapter->rx_rings;
4100	int j;
4101
4102	for (j = 0; j < adapter->num_queues; j++, rxr++)
4103		if (ixgbe_setup_receive_ring(rxr))
4104			goto fail;
4105
4106	return (0);
4107fail:
4108	/*
4109	 * Free RX buffers allocated so far, we will only handle
4110	 * the rings that completed, the failing case will have
4111	 * cleaned up for itself. 'j' failed, so its the terminus.
4112	 */
4113	for (int i = 0; i < j; ++i) {
4114		rxr = &adapter->rx_rings[i];
4115		ixgbe_free_receive_ring(rxr);
4116	}
4117
4118	return (ENOBUFS);
4119}
4120
4121/*********************************************************************
4122 *
4123 *  Setup receive registers and features.
4124 *
4125 **********************************************************************/
4126#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2
4127
4128#define BSIZEPKT_ROUNDUP ((1<<IXGBE_SRRCTL_BSIZEPKT_SHIFT)-1)
4129
4130static void
4131ixgbe_initialize_receive_units(struct adapter *adapter)
4132{
4133	struct	rx_ring	*rxr = adapter->rx_rings;
4134	struct ixgbe_hw	*hw = &adapter->hw;
4135	struct ifnet   *ifp = adapter->ifp;
4136	u32		bufsz, rxctrl, fctrl, srrctl, rxcsum;
4137	u32		reta, mrqc = 0, hlreg, random[10];
4138
4139
4140	/*
4141	 * Make sure receives are disabled while
4142	 * setting up the descriptor ring
4143	 */
4144	rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL);
4145	IXGBE_WRITE_REG(hw, IXGBE_RXCTRL,
4146	    rxctrl & ~IXGBE_RXCTRL_RXEN);
4147
4148	/* Enable broadcasts */
4149	fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
4150	fctrl |= IXGBE_FCTRL_BAM;
4151	fctrl |= IXGBE_FCTRL_DPF;
4152	fctrl |= IXGBE_FCTRL_PMCF;
4153	IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
4154
4155	/* Set for Jumbo Frames? */
4156	hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0);
4157	if (ifp->if_mtu > ETHERMTU)
4158		hlreg |= IXGBE_HLREG0_JUMBOEN;
4159	else
4160		hlreg &= ~IXGBE_HLREG0_JUMBOEN;
4161#ifdef DEV_NETMAP
4162	/* crcstrip is conditional in netmap (in RDRXCTL too ?) */
4163	if (ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
4164		hlreg &= ~IXGBE_HLREG0_RXCRCSTRP;
4165	else
4166		hlreg |= IXGBE_HLREG0_RXCRCSTRP;
4167#endif /* DEV_NETMAP */
4168	IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg);
4169
4170	bufsz = (adapter->rx_mbuf_sz +
4171	    BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
4172
4173	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4174		u64 rdba = rxr->rxdma.dma_paddr;
4175
4176		/* Setup the Base and Length of the Rx Descriptor Ring */
4177		IXGBE_WRITE_REG(hw, IXGBE_RDBAL(i),
4178			       (rdba & 0x00000000ffffffffULL));
4179		IXGBE_WRITE_REG(hw, IXGBE_RDBAH(i), (rdba >> 32));
4180		IXGBE_WRITE_REG(hw, IXGBE_RDLEN(i),
4181		    adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc));
4182
4183		/* Set up the SRRCTL register */
4184		srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
4185		srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK;
4186		srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK;
4187		srrctl |= bufsz;
4188		srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
4189		IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
4190
4191		/* Setup the HW Rx Head and Tail Descriptor Pointers */
4192		IXGBE_WRITE_REG(hw, IXGBE_RDH(i), 0);
4193		IXGBE_WRITE_REG(hw, IXGBE_RDT(i), 0);
4194
4195		/* Set the processing limit */
4196		rxr->process_limit = ixgbe_rx_process_limit;
4197	}
4198
4199	if (adapter->hw.mac.type != ixgbe_mac_82598EB) {
4200		u32 psrtype = IXGBE_PSRTYPE_TCPHDR |
4201			      IXGBE_PSRTYPE_UDPHDR |
4202			      IXGBE_PSRTYPE_IPV4HDR |
4203			      IXGBE_PSRTYPE_IPV6HDR;
4204		IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype);
4205	}
4206
4207	rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM);
4208
4209	/* Setup RSS */
4210	if (adapter->num_queues > 1) {
4211		int i, j;
4212		reta = 0;
4213
4214		/* set up random bits */
4215		arc4rand(&random, sizeof(random), 0);
4216
4217		/* Set up the redirection table */
4218		for (i = 0, j = 0; i < 128; i++, j++) {
4219			if (j == adapter->num_queues) j = 0;
4220			reta = (reta << 8) | (j * 0x11);
4221			if ((i & 3) == 3)
4222				IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta);
4223		}
4224
4225		/* Now fill our hash function seeds */
4226		for (int i = 0; i < 10; i++)
4227			IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), random[i]);
4228
4229		/* Perform hash on these packet types */
4230		mrqc = IXGBE_MRQC_RSSEN
4231		     | IXGBE_MRQC_RSS_FIELD_IPV4
4232		     | IXGBE_MRQC_RSS_FIELD_IPV4_TCP
4233		     | IXGBE_MRQC_RSS_FIELD_IPV4_UDP
4234		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP
4235		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX
4236		     | IXGBE_MRQC_RSS_FIELD_IPV6
4237		     | IXGBE_MRQC_RSS_FIELD_IPV6_TCP
4238		     | IXGBE_MRQC_RSS_FIELD_IPV6_UDP
4239		     | IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP;
4240		IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
4241
4242		/* RSS and RX IPP Checksum are mutually exclusive */
4243		rxcsum |= IXGBE_RXCSUM_PCSD;
4244	}
4245
4246	if (ifp->if_capenable & IFCAP_RXCSUM)
4247		rxcsum |= IXGBE_RXCSUM_PCSD;
4248
4249	if (!(rxcsum & IXGBE_RXCSUM_PCSD))
4250		rxcsum |= IXGBE_RXCSUM_IPPCSE;
4251
4252	IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum);
4253
4254	return;
4255}
4256
4257/*********************************************************************
4258 *
4259 *  Free all receive rings.
4260 *
4261 **********************************************************************/
4262static void
4263ixgbe_free_receive_structures(struct adapter *adapter)
4264{
4265	struct rx_ring *rxr = adapter->rx_rings;
4266
4267	INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
4268
4269	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4270		struct lro_ctrl		*lro = &rxr->lro;
4271		ixgbe_free_receive_buffers(rxr);
4272		/* Free LRO memory */
4273		tcp_lro_free(lro);
4274		/* Free the ring memory as well */
4275		ixgbe_dma_free(adapter, &rxr->rxdma);
4276	}
4277
4278	free(adapter->rx_rings, M_DEVBUF);
4279}
4280
4281
4282/*********************************************************************
4283 *
4284 *  Free receive ring data structures
4285 *
4286 **********************************************************************/
4287static void
4288ixgbe_free_receive_buffers(struct rx_ring *rxr)
4289{
4290	struct adapter		*adapter = rxr->adapter;
4291	struct ixgbe_rx_buf	*rxbuf;
4292
4293	INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
4294
4295	/* Cleanup any existing buffers */
4296	if (rxr->rx_buffers != NULL) {
4297		for (int i = 0; i < adapter->num_rx_desc; i++) {
4298			rxbuf = &rxr->rx_buffers[i];
4299			if (rxbuf->buf != NULL) {
4300				bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4301				    BUS_DMASYNC_POSTREAD);
4302				bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4303				rxbuf->buf->m_flags |= M_PKTHDR;
4304				m_freem(rxbuf->buf);
4305			}
4306			rxbuf->buf = NULL;
4307			if (rxbuf->pmap != NULL) {
4308				bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4309				rxbuf->pmap = NULL;
4310			}
4311		}
4312		if (rxr->rx_buffers != NULL) {
4313			free(rxr->rx_buffers, M_DEVBUF);
4314			rxr->rx_buffers = NULL;
4315		}
4316	}
4317
4318	if (rxr->ptag != NULL) {
4319		bus_dma_tag_destroy(rxr->ptag);
4320		rxr->ptag = NULL;
4321	}
4322
4323	return;
4324}
4325
4326static __inline void
4327ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4328{
4329
4330        /*
4331         * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
4332         * should be computed by hardware. Also it should not have VLAN tag in
4333         * ethernet header.  In case of IPv6 we do not yet support ext. hdrs.
4334         */
4335        if (rxr->lro_enabled &&
4336            (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4337            (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4338            ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4339            (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
4340            (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
4341            (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
4342            (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4343            (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4344                /*
4345                 * Send to the stack if:
4346                 **  - LRO not enabled, or
4347                 **  - no LRO resources, or
4348                 **  - lro enqueue fails
4349                 */
4350                if (rxr->lro.lro_cnt != 0)
4351                        if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4352                                return;
4353        }
4354	IXGBE_RX_UNLOCK(rxr);
4355        (*ifp->if_input)(ifp, m);
4356	IXGBE_RX_LOCK(rxr);
4357}
4358
4359static __inline void
4360ixgbe_rx_discard(struct rx_ring *rxr, int i)
4361{
4362	struct ixgbe_rx_buf	*rbuf;
4363
4364	rbuf = &rxr->rx_buffers[i];
4365
4366        if (rbuf->fmp != NULL) {/* Partial chain ? */
4367		rbuf->fmp->m_flags |= M_PKTHDR;
4368                m_freem(rbuf->fmp);
4369                rbuf->fmp = NULL;
4370	}
4371
4372	/*
4373	** With advanced descriptors the writeback
4374	** clobbers the buffer addrs, so its easier
4375	** to just free the existing mbufs and take
4376	** the normal refresh path to get new buffers
4377	** and mapping.
4378	*/
4379	if (rbuf->buf) {
4380		m_free(rbuf->buf);
4381		rbuf->buf = NULL;
4382	}
4383
4384	rbuf->flags = 0;
4385
4386	return;
4387}
4388
4389
4390/*********************************************************************
4391 *
4392 *  This routine executes in interrupt context. It replenishes
4393 *  the mbufs in the descriptor and sends data which has been
4394 *  dma'ed into host memory to upper layer.
4395 *
4396 *  We loop at most count times if count is > 0, or until done if
4397 *  count < 0.
4398 *
4399 *  Return TRUE for more work, FALSE for all clean.
4400 *********************************************************************/
4401static bool
4402ixgbe_rxeof(struct ix_queue *que)
4403{
4404	struct adapter		*adapter = que->adapter;
4405	struct rx_ring		*rxr = que->rxr;
4406	struct ifnet		*ifp = adapter->ifp;
4407	struct lro_ctrl		*lro = &rxr->lro;
4408	struct lro_entry	*queued;
4409	int			i, nextp, processed = 0;
4410	u32			staterr = 0;
4411	u16			count = rxr->process_limit;
4412	union ixgbe_adv_rx_desc	*cur;
4413	struct ixgbe_rx_buf	*rbuf, *nbuf;
4414
4415	IXGBE_RX_LOCK(rxr);
4416
4417#ifdef DEV_NETMAP
4418	/* Same as the txeof routine: wakeup clients on intr. */
4419	if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4420		IXGBE_RX_UNLOCK(rxr);
4421		return (FALSE);
4422	}
4423#endif /* DEV_NETMAP */
4424
4425	for (i = rxr->next_to_check; count != 0;) {
4426		struct mbuf	*sendmp, *mp;
4427		u32		rsc, ptype;
4428		u16		len;
4429		u16		vtag = 0;
4430		bool		eop;
4431
4432		/* Sync the ring. */
4433		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4434		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4435
4436		cur = &rxr->rx_base[i];
4437		staterr = le32toh(cur->wb.upper.status_error);
4438
4439		if ((staterr & IXGBE_RXD_STAT_DD) == 0)
4440			break;
4441		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4442			break;
4443
4444		count--;
4445		sendmp = NULL;
4446		nbuf = NULL;
4447		rsc = 0;
4448		cur->wb.upper.status_error = 0;
4449		rbuf = &rxr->rx_buffers[i];
4450		mp = rbuf->buf;
4451
4452		len = le16toh(cur->wb.upper.length);
4453		ptype = le32toh(cur->wb.lower.lo_dword.data) &
4454		    IXGBE_RXDADV_PKTTYPE_MASK;
4455		eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
4456
4457		/* Make sure bad packets are discarded */
4458		if (((staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) ||
4459		    (rxr->discard)) {
4460			rxr->rx_discarded++;
4461			if (eop)
4462				rxr->discard = FALSE;
4463			else
4464				rxr->discard = TRUE;
4465			ixgbe_rx_discard(rxr, i);
4466			goto next_desc;
4467		}
4468
4469		/*
4470		** On 82599 which supports a hardware
4471		** LRO (called HW RSC), packets need
4472		** not be fragmented across sequential
4473		** descriptors, rather the next descriptor
4474		** is indicated in bits of the descriptor.
4475		** This also means that we might proceses
4476		** more than one packet at a time, something
4477		** that has never been true before, it
4478		** required eliminating global chain pointers
4479		** in favor of what we are doing here.  -jfv
4480		*/
4481		if (!eop) {
4482			/*
4483			** Figure out the next descriptor
4484			** of this frame.
4485			*/
4486			if (rxr->hw_rsc == TRUE) {
4487				rsc = ixgbe_rsc_count(cur);
4488				rxr->rsc_num += (rsc - 1);
4489			}
4490			if (rsc) { /* Get hardware index */
4491				nextp = ((staterr &
4492				    IXGBE_RXDADV_NEXTP_MASK) >>
4493				    IXGBE_RXDADV_NEXTP_SHIFT);
4494			} else { /* Just sequential */
4495				nextp = i + 1;
4496				if (nextp == adapter->num_rx_desc)
4497					nextp = 0;
4498			}
4499			nbuf = &rxr->rx_buffers[nextp];
4500			prefetch(nbuf);
4501		}
4502		/*
4503		** Rather than using the fmp/lmp global pointers
4504		** we now keep the head of a packet chain in the
4505		** buffer struct and pass this along from one
4506		** descriptor to the next, until we get EOP.
4507		*/
4508		mp->m_len = len;
4509		/*
4510		** See if there is a stored head
4511		** that determines what we are
4512		*/
4513		sendmp = rbuf->fmp;
4514		if (sendmp != NULL) {  /* secondary frag */
4515			rbuf->buf = rbuf->fmp = NULL;
4516			mp->m_flags &= ~M_PKTHDR;
4517			sendmp->m_pkthdr.len += mp->m_len;
4518		} else {
4519			/*
4520			 * Optimize.  This might be a small packet,
4521			 * maybe just a TCP ACK.  Do a fast copy that
4522			 * is cache aligned into a new mbuf, and
4523			 * leave the old mbuf+cluster for re-use.
4524			 */
4525			if (eop && len <= IXGBE_RX_COPY_LEN) {
4526				sendmp = m_gethdr(M_NOWAIT, MT_DATA);
4527				if (sendmp != NULL) {
4528					sendmp->m_data +=
4529					    IXGBE_RX_COPY_ALIGN;
4530					ixgbe_bcopy(mp->m_data,
4531					    sendmp->m_data, len);
4532					sendmp->m_len = len;
4533					rxr->rx_copies++;
4534					rbuf->flags |= IXGBE_RX_COPY;
4535				}
4536			}
4537			if (sendmp == NULL) {
4538				rbuf->buf = rbuf->fmp = NULL;
4539				sendmp = mp;
4540			}
4541
4542			/* first desc of a non-ps chain */
4543			sendmp->m_flags |= M_PKTHDR;
4544			sendmp->m_pkthdr.len = mp->m_len;
4545		}
4546		++processed;
4547
4548		/* Pass the head pointer on */
4549		if (eop == 0) {
4550			nbuf->fmp = sendmp;
4551			sendmp = NULL;
4552			mp->m_next = nbuf->buf;
4553		} else { /* Sending this frame */
4554			sendmp->m_pkthdr.rcvif = ifp;
4555			ifp->if_ipackets++;
4556			rxr->rx_packets++;
4557			/* capture data for AIM */
4558			rxr->bytes += sendmp->m_pkthdr.len;
4559			rxr->rx_bytes += sendmp->m_pkthdr.len;
4560			/* Process vlan info */
4561			if ((rxr->vtag_strip) &&
4562			    (staterr & IXGBE_RXD_STAT_VP))
4563				vtag = le16toh(cur->wb.upper.vlan);
4564			if (vtag) {
4565				sendmp->m_pkthdr.ether_vtag = vtag;
4566				sendmp->m_flags |= M_VLANTAG;
4567			}
4568			if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
4569				ixgbe_rx_checksum(staterr, sendmp, ptype);
4570#if __FreeBSD_version >= 800000
4571			sendmp->m_pkthdr.flowid = que->msix;
4572			sendmp->m_flags |= M_FLOWID;
4573#endif
4574		}
4575next_desc:
4576		bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4577		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4578
4579		/* Advance our pointers to the next descriptor. */
4580		if (++i == rxr->num_desc)
4581			i = 0;
4582
4583		/* Now send to the stack or do LRO */
4584		if (sendmp != NULL) {
4585			rxr->next_to_check = i;
4586			ixgbe_rx_input(rxr, ifp, sendmp, ptype);
4587			i = rxr->next_to_check;
4588		}
4589
4590               /* Every 8 descriptors we go to refresh mbufs */
4591		if (processed == 8) {
4592			ixgbe_refresh_mbufs(rxr, i);
4593			processed = 0;
4594		}
4595	}
4596
4597	/* Refresh any remaining buf structs */
4598	if (ixgbe_rx_unrefreshed(rxr))
4599		ixgbe_refresh_mbufs(rxr, i);
4600
4601	rxr->next_to_check = i;
4602
4603	/*
4604	 * Flush any outstanding LRO work
4605	 */
4606	while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
4607		SLIST_REMOVE_HEAD(&lro->lro_active, next);
4608		tcp_lro_flush(lro, queued);
4609	}
4610
4611	IXGBE_RX_UNLOCK(rxr);
4612
4613	/*
4614	** Still have cleaning to do?
4615	*/
4616	if ((staterr & IXGBE_RXD_STAT_DD) != 0)
4617		return (TRUE);
4618	else
4619		return (FALSE);
4620}
4621
4622
4623/*********************************************************************
4624 *
4625 *  Verify that the hardware indicated that the checksum is valid.
4626 *  Inform the stack about the status of checksum so that stack
4627 *  doesn't spend time verifying the checksum.
4628 *
4629 *********************************************************************/
4630static void
4631ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
4632{
4633	u16	status = (u16) staterr;
4634	u8	errors = (u8) (staterr >> 24);
4635	bool	sctp = FALSE;
4636
4637	if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
4638	    (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
4639		sctp = TRUE;
4640
4641	if (status & IXGBE_RXD_STAT_IPCS) {
4642		if (!(errors & IXGBE_RXD_ERR_IPE)) {
4643			/* IP Checksum Good */
4644			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
4645			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
4646
4647		} else
4648			mp->m_pkthdr.csum_flags = 0;
4649	}
4650	if (status & IXGBE_RXD_STAT_L4CS) {
4651		u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
4652#if __FreeBSD_version >= 800000
4653		if (sctp)
4654			type = CSUM_SCTP_VALID;
4655#endif
4656		if (!(errors & IXGBE_RXD_ERR_TCPE)) {
4657			mp->m_pkthdr.csum_flags |= type;
4658			if (!sctp)
4659				mp->m_pkthdr.csum_data = htons(0xffff);
4660		}
4661	}
4662	return;
4663}
4664
4665
4666/*
4667** This routine is run via an vlan config EVENT,
4668** it enables us to use the HW Filter table since
4669** we can get the vlan id. This just creates the
4670** entry in the soft version of the VFTA, init will
4671** repopulate the real table.
4672*/
4673static void
4674ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4675{
4676	struct adapter	*adapter = ifp->if_softc;
4677	u16		index, bit;
4678
4679	if (ifp->if_softc !=  arg)   /* Not our event */
4680		return;
4681
4682	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
4683		return;
4684
4685	IXGBE_CORE_LOCK(adapter);
4686	index = (vtag >> 5) & 0x7F;
4687	bit = vtag & 0x1F;
4688	adapter->shadow_vfta[index] |= (1 << bit);
4689	++adapter->num_vlans;
4690	ixgbe_init_locked(adapter);
4691	IXGBE_CORE_UNLOCK(adapter);
4692}
4693
4694/*
4695** This routine is run via an vlan
4696** unconfig EVENT, remove our entry
4697** in the soft vfta.
4698*/
4699static void
4700ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
4701{
4702	struct adapter	*adapter = ifp->if_softc;
4703	u16		index, bit;
4704
4705	if (ifp->if_softc !=  arg)
4706		return;
4707
4708	if ((vtag == 0) || (vtag > 4095))	/* Invalid */
4709		return;
4710
4711	IXGBE_CORE_LOCK(adapter);
4712	index = (vtag >> 5) & 0x7F;
4713	bit = vtag & 0x1F;
4714	adapter->shadow_vfta[index] &= ~(1 << bit);
4715	--adapter->num_vlans;
4716	/* Re-init to load the changes */
4717	ixgbe_init_locked(adapter);
4718	IXGBE_CORE_UNLOCK(adapter);
4719}
4720
4721static void
4722ixgbe_setup_vlan_hw_support(struct adapter *adapter)
4723{
4724	struct ifnet 	*ifp = adapter->ifp;
4725	struct ixgbe_hw *hw = &adapter->hw;
4726	struct rx_ring	*rxr;
4727	u32		ctrl;
4728
4729
4730	/*
4731	** We get here thru init_locked, meaning
4732	** a soft reset, this has already cleared
4733	** the VFTA and other state, so if there
4734	** have been no vlan's registered do nothing.
4735	*/
4736	if (adapter->num_vlans == 0)
4737		return;
4738
4739	/*
4740	** A soft reset zero's out the VFTA, so
4741	** we need to repopulate it now.
4742	*/
4743	for (int i = 0; i < IXGBE_VFTA_SIZE; i++)
4744		if (adapter->shadow_vfta[i] != 0)
4745			IXGBE_WRITE_REG(hw, IXGBE_VFTA(i),
4746			    adapter->shadow_vfta[i]);
4747
4748	ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
4749	/* Enable the Filter Table if enabled */
4750	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
4751		ctrl &= ~IXGBE_VLNCTRL_CFIEN;
4752		ctrl |= IXGBE_VLNCTRL_VFE;
4753	}
4754	if (hw->mac.type == ixgbe_mac_82598EB)
4755		ctrl |= IXGBE_VLNCTRL_VME;
4756	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl);
4757
4758	/* Setup the queues for vlans */
4759	for (int i = 0; i < adapter->num_queues; i++) {
4760		rxr = &adapter->rx_rings[i];
4761		/* On 82599 the VLAN enable is per/queue in RXDCTL */
4762		if (hw->mac.type != ixgbe_mac_82598EB) {
4763			ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
4764			ctrl |= IXGBE_RXDCTL_VME;
4765			IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl);
4766		}
4767		rxr->vtag_strip = TRUE;
4768	}
4769}
4770
4771static void
4772ixgbe_enable_intr(struct adapter *adapter)
4773{
4774	struct ixgbe_hw	*hw = &adapter->hw;
4775	struct ix_queue	*que = adapter->queues;
4776	u32		mask, fwsm;
4777
4778	mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
4779	/* Enable Fan Failure detection */
4780	if (hw->device_id == IXGBE_DEV_ID_82598AT)
4781		    mask |= IXGBE_EIMS_GPI_SDP1;
4782
4783	switch (adapter->hw.mac.type) {
4784		case ixgbe_mac_82599EB:
4785			mask |= IXGBE_EIMS_ECC;
4786			mask |= IXGBE_EIMS_GPI_SDP0;
4787			mask |= IXGBE_EIMS_GPI_SDP1;
4788			mask |= IXGBE_EIMS_GPI_SDP2;
4789#ifdef IXGBE_FDIR
4790			mask |= IXGBE_EIMS_FLOW_DIR;
4791#endif
4792			break;
4793		case ixgbe_mac_X540:
4794			mask |= IXGBE_EIMS_ECC;
4795			/* Detect if Thermal Sensor is enabled */
4796			fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM);
4797			if (fwsm & IXGBE_FWSM_TS_ENABLED)
4798				mask |= IXGBE_EIMS_TS;
4799#ifdef IXGBE_FDIR
4800			mask |= IXGBE_EIMS_FLOW_DIR;
4801#endif
4802		/* falls through */
4803		default:
4804			break;
4805	}
4806
4807	IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
4808
4809	/* With RSS we use auto clear */
4810	if (adapter->msix_mem) {
4811		mask = IXGBE_EIMS_ENABLE_MASK;
4812		/* Don't autoclear Link */
4813		mask &= ~IXGBE_EIMS_OTHER;
4814		mask &= ~IXGBE_EIMS_LSC;
4815		IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask);
4816	}
4817
4818	/*
4819	** Now enable all queues, this is done separately to
4820	** allow for handling the extended (beyond 32) MSIX
4821	** vectors that can be used by 82599
4822	*/
4823        for (int i = 0; i < adapter->num_queues; i++, que++)
4824                ixgbe_enable_queue(adapter, que->msix);
4825
4826	IXGBE_WRITE_FLUSH(hw);
4827
4828	return;
4829}
4830
4831static void
4832ixgbe_disable_intr(struct adapter *adapter)
4833{
4834	if (adapter->msix_mem)
4835		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0);
4836	if (adapter->hw.mac.type == ixgbe_mac_82598EB) {
4837		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0);
4838	} else {
4839		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
4840		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
4841		IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0);
4842	}
4843	IXGBE_WRITE_FLUSH(&adapter->hw);
4844	return;
4845}
4846
4847u16
4848ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
4849{
4850	u16 value;
4851
4852	value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
4853	    reg, 2);
4854
4855	return (value);
4856}
4857
4858void
4859ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
4860{
4861	pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
4862	    reg, value, 2);
4863
4864	return;
4865}
4866
4867/*
4868** Get the width and transaction speed of
4869** the slot this adapter is plugged into.
4870*/
4871static void
4872ixgbe_get_slot_info(struct ixgbe_hw *hw)
4873{
4874	device_t		dev = ((struct ixgbe_osdep *)hw->back)->dev;
4875	struct ixgbe_mac_info	*mac = &hw->mac;
4876	u16			link;
4877	u32			offset;
4878
4879	/* For most devices simply call the shared code routine */
4880	if (hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) {
4881		ixgbe_get_bus_info(hw);
4882		goto display;
4883	}
4884
4885	/*
4886	** For the Quad port adapter we need to parse back
4887	** up the PCI tree to find the speed of the expansion
4888	** slot into which this adapter is plugged. A bit more work.
4889	*/
4890	dev = device_get_parent(device_get_parent(dev));
4891#ifdef IXGBE_DEBUG
4892	device_printf(dev, "parent pcib = %x,%x,%x\n",
4893	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4894#endif
4895	dev = device_get_parent(device_get_parent(dev));
4896#ifdef IXGBE_DEBUG
4897	device_printf(dev, "slot pcib = %x,%x,%x\n",
4898	    pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
4899#endif
4900	/* Now get the PCI Express Capabilities offset */
4901	pci_find_cap(dev, PCIY_EXPRESS, &offset);
4902	/* ...and read the Link Status Register */
4903	link = pci_read_config(dev, offset + PCIER_LINK_STA, 2);
4904	switch (link & IXGBE_PCI_LINK_WIDTH) {
4905	case IXGBE_PCI_LINK_WIDTH_1:
4906		hw->bus.width = ixgbe_bus_width_pcie_x1;
4907		break;
4908	case IXGBE_PCI_LINK_WIDTH_2:
4909		hw->bus.width = ixgbe_bus_width_pcie_x2;
4910		break;
4911	case IXGBE_PCI_LINK_WIDTH_4:
4912		hw->bus.width = ixgbe_bus_width_pcie_x4;
4913		break;
4914	case IXGBE_PCI_LINK_WIDTH_8:
4915		hw->bus.width = ixgbe_bus_width_pcie_x8;
4916		break;
4917	default:
4918		hw->bus.width = ixgbe_bus_width_unknown;
4919		break;
4920	}
4921
4922	switch (link & IXGBE_PCI_LINK_SPEED) {
4923	case IXGBE_PCI_LINK_SPEED_2500:
4924		hw->bus.speed = ixgbe_bus_speed_2500;
4925		break;
4926	case IXGBE_PCI_LINK_SPEED_5000:
4927		hw->bus.speed = ixgbe_bus_speed_5000;
4928		break;
4929	case IXGBE_PCI_LINK_SPEED_8000:
4930		hw->bus.speed = ixgbe_bus_speed_8000;
4931		break;
4932	default:
4933		hw->bus.speed = ixgbe_bus_speed_unknown;
4934		break;
4935	}
4936
4937	mac->ops.set_lan_id(hw);
4938
4939display:
4940	device_printf(dev,"PCI Express Bus: Speed %s %s\n",
4941	    ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s":
4942	    (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s":
4943	    (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s":"Unknown"),
4944	    (hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" :
4945	    (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" :
4946	    (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" :
4947	    ("Unknown"));
4948
4949	if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4950	    ((hw->bus.width <= ixgbe_bus_width_pcie_x4) &&
4951	    (hw->bus.speed == ixgbe_bus_speed_2500))) {
4952		device_printf(dev, "PCI-Express bandwidth available"
4953		    " for this card\n     is not sufficient for"
4954		    " optimal performance.\n");
4955		device_printf(dev, "For optimal performance a x8 "
4956		    "PCIE, or x4 PCIE Gen2 slot is required.\n");
4957        }
4958	if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) &&
4959	    ((hw->bus.width <= ixgbe_bus_width_pcie_x8) &&
4960	    (hw->bus.speed < ixgbe_bus_speed_8000))) {
4961		device_printf(dev, "PCI-Express bandwidth available"
4962		    " for this card\n     is not sufficient for"
4963		    " optimal performance.\n");
4964		device_printf(dev, "For optimal performance a x8 "
4965		    "PCIE Gen3 slot is required.\n");
4966        }
4967
4968	return;
4969}
4970
4971
4972/*
4973** Setup the correct IVAR register for a particular MSIX interrupt
4974**   (yes this is all very magic and confusing :)
4975**  - entry is the register array entry
4976**  - vector is the MSIX vector for this queue
4977**  - type is RX/TX/MISC
4978*/
4979static void
4980ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type)
4981{
4982	struct ixgbe_hw *hw = &adapter->hw;
4983	u32 ivar, index;
4984
4985	vector |= IXGBE_IVAR_ALLOC_VAL;
4986
4987	switch (hw->mac.type) {
4988
4989	case ixgbe_mac_82598EB:
4990		if (type == -1)
4991			entry = IXGBE_IVAR_OTHER_CAUSES_INDEX;
4992		else
4993			entry += (type * 64);
4994		index = (entry >> 2) & 0x1F;
4995		ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
4996		ivar &= ~(0xFF << (8 * (entry & 0x3)));
4997		ivar |= (vector << (8 * (entry & 0x3)));
4998		IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar);
4999		break;
5000
5001	case ixgbe_mac_82599EB:
5002	case ixgbe_mac_X540:
5003		if (type == -1) { /* MISC IVAR */
5004			index = (entry & 1) * 8;
5005			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC);
5006			ivar &= ~(0xFF << index);
5007			ivar |= (vector << index);
5008			IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar);
5009		} else {	/* RX/TX IVARS */
5010			index = (16 * (entry & 1)) + (8 * type);
5011			ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1));
5012			ivar &= ~(0xFF << index);
5013			ivar |= (vector << index);
5014			IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar);
5015		}
5016
5017	default:
5018		break;
5019	}
5020}
5021
5022static void
5023ixgbe_configure_ivars(struct adapter *adapter)
5024{
5025	struct  ix_queue *que = adapter->queues;
5026	u32 newitr;
5027
5028	if (ixgbe_max_interrupt_rate > 0)
5029		newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8;
5030	else
5031		newitr = 0;
5032
5033        for (int i = 0; i < adapter->num_queues; i++, que++) {
5034		/* First the RX queue entry */
5035                ixgbe_set_ivar(adapter, i, que->msix, 0);
5036		/* ... and the TX */
5037		ixgbe_set_ivar(adapter, i, que->msix, 1);
5038		/* Set an Initial EITR value */
5039                IXGBE_WRITE_REG(&adapter->hw,
5040                    IXGBE_EITR(que->msix), newitr);
5041	}
5042
5043	/* For the Link interrupt */
5044        ixgbe_set_ivar(adapter, 1, adapter->linkvec, -1);
5045}
5046
5047/*
5048** ixgbe_sfp_probe - called in the local timer to
5049** determine if a port had optics inserted.
5050*/
5051static bool ixgbe_sfp_probe(struct adapter *adapter)
5052{
5053	struct ixgbe_hw	*hw = &adapter->hw;
5054	device_t	dev = adapter->dev;
5055	bool		result = FALSE;
5056
5057	if ((hw->phy.type == ixgbe_phy_nl) &&
5058	    (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) {
5059		s32 ret = hw->phy.ops.identify_sfp(hw);
5060		if (ret)
5061                        goto out;
5062		ret = hw->phy.ops.reset(hw);
5063		if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5064			device_printf(dev,"Unsupported SFP+ module detected!");
5065			printf(" Reload driver with supported module.\n");
5066			adapter->sfp_probe = FALSE;
5067                        goto out;
5068		} else
5069			device_printf(dev,"SFP+ module detected!\n");
5070		/* We now have supported optics */
5071		adapter->sfp_probe = FALSE;
5072		/* Set the optics type so system reports correctly */
5073		ixgbe_setup_optics(adapter);
5074		result = TRUE;
5075	}
5076out:
5077	return (result);
5078}
5079
5080/*
5081** Tasklet handler for MSIX Link interrupts
5082**  - do outside interrupt since it might sleep
5083*/
5084static void
5085ixgbe_handle_link(void *context, int pending)
5086{
5087	struct adapter  *adapter = context;
5088
5089	ixgbe_check_link(&adapter->hw,
5090	    &adapter->link_speed, &adapter->link_up, 0);
5091       	ixgbe_update_link_status(adapter);
5092}
5093
5094/*
5095** Tasklet for handling SFP module interrupts
5096*/
5097static void
5098ixgbe_handle_mod(void *context, int pending)
5099{
5100	struct adapter  *adapter = context;
5101	struct ixgbe_hw *hw = &adapter->hw;
5102	device_t	dev = adapter->dev;
5103	u32 err;
5104
5105	err = hw->phy.ops.identify_sfp(hw);
5106	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5107		device_printf(dev,
5108		    "Unsupported SFP+ module type was detected.\n");
5109		return;
5110	}
5111	err = hw->mac.ops.setup_sfp(hw);
5112	if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) {
5113		device_printf(dev,
5114		    "Setup failure - unsupported SFP+ module type.\n");
5115		return;
5116	}
5117	taskqueue_enqueue(adapter->tq, &adapter->msf_task);
5118	return;
5119}
5120
5121
5122/*
5123** Tasklet for handling MSF (multispeed fiber) interrupts
5124*/
5125static void
5126ixgbe_handle_msf(void *context, int pending)
5127{
5128	struct adapter  *adapter = context;
5129	struct ixgbe_hw *hw = &adapter->hw;
5130	u32 autoneg;
5131	bool negotiate;
5132
5133	autoneg = hw->phy.autoneg_advertised;
5134	if ((!autoneg) && (hw->mac.ops.get_link_capabilities))
5135		hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate);
5136	if (hw->mac.ops.setup_link)
5137		hw->mac.ops.setup_link(hw, autoneg, TRUE);
5138	return;
5139}
5140
5141#ifdef IXGBE_FDIR
5142/*
5143** Tasklet for reinitializing the Flow Director filter table
5144*/
5145static void
5146ixgbe_reinit_fdir(void *context, int pending)
5147{
5148	struct adapter  *adapter = context;
5149	struct ifnet   *ifp = adapter->ifp;
5150
5151	if (adapter->fdir_reinit != 1) /* Shouldn't happen */
5152		return;
5153	ixgbe_reinit_fdir_tables_82599(&adapter->hw);
5154	adapter->fdir_reinit = 0;
5155	/* re-enable flow director interrupts */
5156	IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
5157	/* Restart the interface */
5158	ifp->if_drv_flags |= IFF_DRV_RUNNING;
5159	return;
5160}
5161#endif
5162
5163/**********************************************************************
5164 *
5165 *  Update the board statistics counters.
5166 *
5167 **********************************************************************/
5168static void
5169ixgbe_update_stats_counters(struct adapter *adapter)
5170{
5171	struct ifnet   *ifp = adapter->ifp;
5172	struct ixgbe_hw *hw = &adapter->hw;
5173	u32  missed_rx = 0, bprc, lxon, lxoff, total;
5174	u64  total_missed_rx = 0;
5175
5176	adapter->stats.crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS);
5177	adapter->stats.illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC);
5178	adapter->stats.errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC);
5179	adapter->stats.mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC);
5180
5181	/*
5182	** Note: these are for the 8 possible traffic classes,
5183	**	 which in current implementation is unused,
5184	**	 therefore only 0 should read real data.
5185	*/
5186	for (int i = 0; i < 8; i++) {
5187		u32 mp;
5188		mp = IXGBE_READ_REG(hw, IXGBE_MPC(i));
5189		/* missed_rx tallies misses for the gprc workaround */
5190		missed_rx += mp;
5191		/* global total per queue */
5192        	adapter->stats.mpc[i] += mp;
5193		/* Running comprehensive total for stats display */
5194		total_missed_rx += adapter->stats.mpc[i];
5195		if (hw->mac.type == ixgbe_mac_82598EB) {
5196			adapter->stats.rnbc[i] +=
5197			    IXGBE_READ_REG(hw, IXGBE_RNBC(i));
5198			adapter->stats.qbtc[i] +=
5199			    IXGBE_READ_REG(hw, IXGBE_QBTC(i));
5200			adapter->stats.qbrc[i] +=
5201			    IXGBE_READ_REG(hw, IXGBE_QBRC(i));
5202			adapter->stats.pxonrxc[i] +=
5203		    	    IXGBE_READ_REG(hw, IXGBE_PXONRXC(i));
5204		} else
5205			adapter->stats.pxonrxc[i] +=
5206		    	    IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i));
5207		adapter->stats.pxontxc[i] +=
5208		    IXGBE_READ_REG(hw, IXGBE_PXONTXC(i));
5209		adapter->stats.pxofftxc[i] +=
5210		    IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i));
5211		adapter->stats.pxoffrxc[i] +=
5212		    IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
5213		adapter->stats.pxon2offc[i] +=
5214		    IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i));
5215	}
5216	for (int i = 0; i < 16; i++) {
5217		adapter->stats.qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i));
5218		adapter->stats.qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i));
5219		adapter->stats.qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i));
5220	}
5221	adapter->stats.mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC);
5222	adapter->stats.mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC);
5223	adapter->stats.rlec += IXGBE_READ_REG(hw, IXGBE_RLEC);
5224
5225	/* Hardware workaround, gprc counts missed packets */
5226	adapter->stats.gprc += IXGBE_READ_REG(hw, IXGBE_GPRC);
5227	adapter->stats.gprc -= missed_rx;
5228
5229	if (hw->mac.type != ixgbe_mac_82598EB) {
5230		adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) +
5231		    ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32);
5232		adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) +
5233		    ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32);
5234		adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORL) +
5235		    ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32);
5236		adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT);
5237		adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
5238	} else {
5239		adapter->stats.lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC);
5240		adapter->stats.lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
5241		/* 82598 only has a counter in the high register */
5242		adapter->stats.gorc += IXGBE_READ_REG(hw, IXGBE_GORCH);
5243		adapter->stats.gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH);
5244		adapter->stats.tor += IXGBE_READ_REG(hw, IXGBE_TORH);
5245	}
5246
5247	/*
5248	 * Workaround: mprc hardware is incorrectly counting
5249	 * broadcasts, so for now we subtract those.
5250	 */
5251	bprc = IXGBE_READ_REG(hw, IXGBE_BPRC);
5252	adapter->stats.bprc += bprc;
5253	adapter->stats.mprc += IXGBE_READ_REG(hw, IXGBE_MPRC);
5254	if (hw->mac.type == ixgbe_mac_82598EB)
5255		adapter->stats.mprc -= bprc;
5256
5257	adapter->stats.prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64);
5258	adapter->stats.prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127);
5259	adapter->stats.prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255);
5260	adapter->stats.prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511);
5261	adapter->stats.prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023);
5262	adapter->stats.prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522);
5263
5264	lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC);
5265	adapter->stats.lxontxc += lxon;
5266	lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC);
5267	adapter->stats.lxofftxc += lxoff;
5268	total = lxon + lxoff;
5269
5270	adapter->stats.gptc += IXGBE_READ_REG(hw, IXGBE_GPTC);
5271	adapter->stats.mptc += IXGBE_READ_REG(hw, IXGBE_MPTC);
5272	adapter->stats.ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64);
5273	adapter->stats.gptc -= total;
5274	adapter->stats.mptc -= total;
5275	adapter->stats.ptc64 -= total;
5276	adapter->stats.gotc -= total * ETHER_MIN_LEN;
5277
5278	adapter->stats.ruc += IXGBE_READ_REG(hw, IXGBE_RUC);
5279	adapter->stats.rfc += IXGBE_READ_REG(hw, IXGBE_RFC);
5280	adapter->stats.roc += IXGBE_READ_REG(hw, IXGBE_ROC);
5281	adapter->stats.rjc += IXGBE_READ_REG(hw, IXGBE_RJC);
5282	adapter->stats.mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC);
5283	adapter->stats.mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC);
5284	adapter->stats.mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC);
5285	adapter->stats.tpr += IXGBE_READ_REG(hw, IXGBE_TPR);
5286	adapter->stats.tpt += IXGBE_READ_REG(hw, IXGBE_TPT);
5287	adapter->stats.ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127);
5288	adapter->stats.ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255);
5289	adapter->stats.ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511);
5290	adapter->stats.ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023);
5291	adapter->stats.ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522);
5292	adapter->stats.bptc += IXGBE_READ_REG(hw, IXGBE_BPTC);
5293	adapter->stats.xec += IXGBE_READ_REG(hw, IXGBE_XEC);
5294	adapter->stats.fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC);
5295	adapter->stats.fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST);
5296	/* Only read FCOE on 82599 */
5297	if (hw->mac.type != ixgbe_mac_82598EB) {
5298		adapter->stats.fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC);
5299		adapter->stats.fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC);
5300		adapter->stats.fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC);
5301		adapter->stats.fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC);
5302		adapter->stats.fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC);
5303	}
5304
5305	/* Fill out the OS statistics structure */
5306	ifp->if_ipackets = adapter->stats.gprc;
5307	ifp->if_opackets = adapter->stats.gptc;
5308	ifp->if_ibytes = adapter->stats.gorc;
5309	ifp->if_obytes = adapter->stats.gotc;
5310	ifp->if_imcasts = adapter->stats.mprc;
5311	ifp->if_omcasts = adapter->stats.mptc;
5312	ifp->if_collisions = 0;
5313
5314	/* Rx Errors */
5315	ifp->if_iqdrops = total_missed_rx;
5316	ifp->if_ierrors = adapter->stats.crcerrs + adapter->stats.rlec;
5317}
5318
5319/** ixgbe_sysctl_tdh_handler - Handler function
5320 *  Retrieves the TDH value from the hardware
5321 */
5322static int
5323ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS)
5324{
5325	int error;
5326
5327	struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5328	if (!txr) return 0;
5329
5330	unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me));
5331	error = sysctl_handle_int(oidp, &val, 0, req);
5332	if (error || !req->newptr)
5333		return error;
5334	return 0;
5335}
5336
5337/** ixgbe_sysctl_tdt_handler - Handler function
5338 *  Retrieves the TDT value from the hardware
5339 */
5340static int
5341ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS)
5342{
5343	int error;
5344
5345	struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1);
5346	if (!txr) return 0;
5347
5348	unsigned val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me));
5349	error = sysctl_handle_int(oidp, &val, 0, req);
5350	if (error || !req->newptr)
5351		return error;
5352	return 0;
5353}
5354
5355/** ixgbe_sysctl_rdh_handler - Handler function
5356 *  Retrieves the RDH value from the hardware
5357 */
5358static int
5359ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS)
5360{
5361	int error;
5362
5363	struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5364	if (!rxr) return 0;
5365
5366	unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me));
5367	error = sysctl_handle_int(oidp, &val, 0, req);
5368	if (error || !req->newptr)
5369		return error;
5370	return 0;
5371}
5372
5373/** ixgbe_sysctl_rdt_handler - Handler function
5374 *  Retrieves the RDT value from the hardware
5375 */
5376static int
5377ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS)
5378{
5379	int error;
5380
5381	struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1);
5382	if (!rxr) return 0;
5383
5384	unsigned val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me));
5385	error = sysctl_handle_int(oidp, &val, 0, req);
5386	if (error || !req->newptr)
5387		return error;
5388	return 0;
5389}
5390
5391static int
5392ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5393{
5394	int error;
5395	struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1);
5396	unsigned int reg, usec, rate;
5397
5398	reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix));
5399	usec = ((reg & 0x0FF8) >> 3);
5400	if (usec > 0)
5401		rate = 500000 / usec;
5402	else
5403		rate = 0;
5404	error = sysctl_handle_int(oidp, &rate, 0, req);
5405	if (error || !req->newptr)
5406		return error;
5407	reg &= ~0xfff; /* default, no limitation */
5408	ixgbe_max_interrupt_rate = 0;
5409	if (rate > 0 && rate < 500000) {
5410		if (rate < 1000)
5411			rate = 1000;
5412		ixgbe_max_interrupt_rate = rate;
5413		reg |= ((4000000/rate) & 0xff8 );
5414	}
5415	IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg);
5416	return 0;
5417}
5418
5419/*
5420 * Add sysctl variables, one per statistic, to the system.
5421 */
5422static void
5423ixgbe_add_hw_stats(struct adapter *adapter)
5424{
5425
5426	device_t dev = adapter->dev;
5427
5428	struct tx_ring *txr = adapter->tx_rings;
5429	struct rx_ring *rxr = adapter->rx_rings;
5430
5431	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5432	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5433	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5434	struct ixgbe_hw_stats *stats = &adapter->stats;
5435
5436	struct sysctl_oid *stat_node, *queue_node;
5437	struct sysctl_oid_list *stat_list, *queue_list;
5438
5439#define QUEUE_NAME_LEN 32
5440	char namebuf[QUEUE_NAME_LEN];
5441
5442	/* Driver Statistics */
5443	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5444			CTLFLAG_RD, &adapter->dropped_pkts,
5445			"Driver dropped packets");
5446	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed",
5447			CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5448			"m_defrag() failed");
5449	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events",
5450			CTLFLAG_RD, &adapter->watchdog_events,
5451			"Watchdog timeouts");
5452	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5453			CTLFLAG_RD, &adapter->link_irq,
5454			"Link MSIX IRQ Handled");
5455
5456	for (int i = 0; i < adapter->num_queues; i++, txr++) {
5457		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5458		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5459					    CTLFLAG_RD, NULL, "Queue Name");
5460		queue_list = SYSCTL_CHILDREN(queue_node);
5461
5462		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5463				CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i],
5464				sizeof(&adapter->queues[i]),
5465				ixgbe_sysctl_interrupt_rate_handler, "IU",
5466				"Interrupt Rate");
5467		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs",
5468				CTLFLAG_RD, &(adapter->queues[i].irqs),
5469				"irqs on this queue");
5470		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5471				CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5472				ixgbe_sysctl_tdh_handler, "IU",
5473				"Transmit Descriptor Head");
5474		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5475				CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr),
5476				ixgbe_sysctl_tdt_handler, "IU",
5477				"Transmit Descriptor Tail");
5478		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "tso_tx",
5479				CTLFLAG_RD, &txr->tso_tx,
5480				"TSO");
5481		SYSCTL_ADD_ULONG(ctx, queue_list, OID_AUTO, "no_tx_dma_setup",
5482				CTLFLAG_RD, &txr->no_tx_dma_setup,
5483				"Driver tx dma failure in xmit");
5484		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5485				CTLFLAG_RD, &txr->no_desc_avail,
5486				"Queue No Descriptor Available");
5487		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5488				CTLFLAG_RD, &txr->total_packets,
5489				"Queue Packets Transmitted");
5490	}
5491
5492	for (int i = 0; i < adapter->num_queues; i++, rxr++) {
5493		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5494		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5495					    CTLFLAG_RD, NULL, "Queue Name");
5496		queue_list = SYSCTL_CHILDREN(queue_node);
5497
5498		struct lro_ctrl *lro = &rxr->lro;
5499
5500		snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5501		queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5502					    CTLFLAG_RD, NULL, "Queue Name");
5503		queue_list = SYSCTL_CHILDREN(queue_node);
5504
5505		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5506				CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5507				ixgbe_sysctl_rdh_handler, "IU",
5508				"Receive Descriptor Head");
5509		SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5510				CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr),
5511				ixgbe_sysctl_rdt_handler, "IU",
5512				"Receive Descriptor Tail");
5513		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5514				CTLFLAG_RD, &rxr->rx_packets,
5515				"Queue Packets Received");
5516		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5517				CTLFLAG_RD, &rxr->rx_bytes,
5518				"Queue Bytes Received");
5519		SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies",
5520				CTLFLAG_RD, &rxr->rx_copies,
5521				"Copied RX Frames");
5522		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_queued",
5523				CTLFLAG_RD, &lro->lro_queued, 0,
5524				"LRO Queued");
5525		SYSCTL_ADD_INT(ctx, queue_list, OID_AUTO, "lro_flushed",
5526				CTLFLAG_RD, &lro->lro_flushed, 0,
5527				"LRO Flushed");
5528	}
5529
5530	/* MAC stats get the own sub node */
5531
5532	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5533				    CTLFLAG_RD, NULL, "MAC Statistics");
5534	stat_list = SYSCTL_CHILDREN(stat_node);
5535
5536	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
5537			CTLFLAG_RD, &stats->crcerrs,
5538			"CRC Errors");
5539	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs",
5540			CTLFLAG_RD, &stats->illerrc,
5541			"Illegal Byte Errors");
5542	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs",
5543			CTLFLAG_RD, &stats->errbc,
5544			"Byte Errors");
5545	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards",
5546			CTLFLAG_RD, &stats->mspdc,
5547			"MAC Short Packets Discarded");
5548	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults",
5549			CTLFLAG_RD, &stats->mlfc,
5550			"MAC Local Faults");
5551	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults",
5552			CTLFLAG_RD, &stats->mrfc,
5553			"MAC Remote Faults");
5554	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs",
5555			CTLFLAG_RD, &stats->rlec,
5556			"Receive Length Errors");
5557
5558	/* Flow Control stats */
5559	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
5560			CTLFLAG_RD, &stats->lxontxc,
5561			"Link XON Transmitted");
5562	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
5563			CTLFLAG_RD, &stats->lxonrxc,
5564			"Link XON Received");
5565	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
5566			CTLFLAG_RD, &stats->lxofftxc,
5567			"Link XOFF Transmitted");
5568	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
5569			CTLFLAG_RD, &stats->lxoffrxc,
5570			"Link XOFF Received");
5571
5572	/* Packet Reception Stats */
5573	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd",
5574			CTLFLAG_RD, &stats->tor,
5575			"Total Octets Received");
5576	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd",
5577			CTLFLAG_RD, &stats->gorc,
5578			"Good Octets Received");
5579	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd",
5580			CTLFLAG_RD, &stats->tpr,
5581			"Total Packets Received");
5582	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd",
5583			CTLFLAG_RD, &stats->gprc,
5584			"Good Packets Received");
5585	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd",
5586			CTLFLAG_RD, &stats->mprc,
5587			"Multicast Packets Received");
5588	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd",
5589			CTLFLAG_RD, &stats->bprc,
5590			"Broadcast Packets Received");
5591	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
5592			CTLFLAG_RD, &stats->prc64,
5593			"64 byte frames received ");
5594	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
5595			CTLFLAG_RD, &stats->prc127,
5596			"65-127 byte frames received");
5597	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
5598			CTLFLAG_RD, &stats->prc255,
5599			"128-255 byte frames received");
5600	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
5601			CTLFLAG_RD, &stats->prc511,
5602			"256-511 byte frames received");
5603	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
5604			CTLFLAG_RD, &stats->prc1023,
5605			"512-1023 byte frames received");
5606	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
5607			CTLFLAG_RD, &stats->prc1522,
5608			"1023-1522 byte frames received");
5609	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized",
5610			CTLFLAG_RD, &stats->ruc,
5611			"Receive Undersized");
5612	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5613			CTLFLAG_RD, &stats->rfc,
5614			"Fragmented Packets Received ");
5615	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized",
5616			CTLFLAG_RD, &stats->roc,
5617			"Oversized Packets Received");
5618	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd",
5619			CTLFLAG_RD, &stats->rjc,
5620			"Received Jabber");
5621	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd",
5622			CTLFLAG_RD, &stats->mngprc,
5623			"Management Packets Received");
5624	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd",
5625			CTLFLAG_RD, &stats->mngptc,
5626			"Management Packets Dropped");
5627	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs",
5628			CTLFLAG_RD, &stats->xec,
5629			"Checksum Errors");
5630
5631	/* Packet Transmission Stats */
5632	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5633			CTLFLAG_RD, &stats->gotc,
5634			"Good Octets Transmitted");
5635	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
5636			CTLFLAG_RD, &stats->tpt,
5637			"Total Packets Transmitted");
5638	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5639			CTLFLAG_RD, &stats->gptc,
5640			"Good Packets Transmitted");
5641	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
5642			CTLFLAG_RD, &stats->bptc,
5643			"Broadcast Packets Transmitted");
5644	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
5645			CTLFLAG_RD, &stats->mptc,
5646			"Multicast Packets Transmitted");
5647	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd",
5648			CTLFLAG_RD, &stats->mngptc,
5649			"Management Packets Transmitted");
5650	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
5651			CTLFLAG_RD, &stats->ptc64,
5652			"64 byte frames transmitted ");
5653	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
5654			CTLFLAG_RD, &stats->ptc127,
5655			"65-127 byte frames transmitted");
5656	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
5657			CTLFLAG_RD, &stats->ptc255,
5658			"128-255 byte frames transmitted");
5659	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
5660			CTLFLAG_RD, &stats->ptc511,
5661			"256-511 byte frames transmitted");
5662	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
5663			CTLFLAG_RD, &stats->ptc1023,
5664			"512-1023 byte frames transmitted");
5665	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
5666			CTLFLAG_RD, &stats->ptc1522,
5667			"1024-1522 byte frames transmitted");
5668}
5669
5670/*
5671** Set flow control using sysctl:
5672** Flow control values:
5673** 	0 - off
5674**	1 - rx pause
5675**	2 - tx pause
5676**	3 - full
5677*/
5678static int
5679ixgbe_set_flowcntl(SYSCTL_HANDLER_ARGS)
5680{
5681	int error, last;
5682	struct adapter *adapter = (struct adapter *) arg1;
5683
5684	last = adapter->fc;
5685	error = sysctl_handle_int(oidp, &adapter->fc, 0, req);
5686	if ((error) || (req->newptr == NULL))
5687		return (error);
5688
5689	/* Don't bother if it's not changed */
5690	if (adapter->fc == last)
5691		return (0);
5692
5693	switch (adapter->fc) {
5694		case ixgbe_fc_rx_pause:
5695		case ixgbe_fc_tx_pause:
5696		case ixgbe_fc_full:
5697			adapter->hw.fc.requested_mode = adapter->fc;
5698			if (adapter->num_queues > 1)
5699				ixgbe_disable_rx_drop(adapter);
5700			break;
5701		case ixgbe_fc_none:
5702			adapter->hw.fc.requested_mode = ixgbe_fc_none;
5703			if (adapter->num_queues > 1)
5704				ixgbe_enable_rx_drop(adapter);
5705			break;
5706		default:
5707			adapter->fc = last;
5708			return (EINVAL);
5709	}
5710	/* Don't autoneg if forcing a value */
5711	adapter->hw.fc.disable_fc_autoneg = TRUE;
5712	ixgbe_fc_enable(&adapter->hw);
5713	return error;
5714}
5715
5716/*
5717** Control link advertise speed:
5718**	1 - advertise only 1G
5719**	2 - advertise 100Mb
5720**	3 - advertise normal
5721*/
5722static int
5723ixgbe_set_advertise(SYSCTL_HANDLER_ARGS)
5724{
5725	int			error = 0;
5726	struct adapter		*adapter;
5727	device_t		dev;
5728	struct ixgbe_hw		*hw;
5729	ixgbe_link_speed	speed, last;
5730
5731	adapter = (struct adapter *) arg1;
5732	dev = adapter->dev;
5733	hw = &adapter->hw;
5734	last = adapter->advertise;
5735
5736	error = sysctl_handle_int(oidp, &adapter->advertise, 0, req);
5737	if ((error) || (req->newptr == NULL))
5738		return (error);
5739
5740	if (adapter->advertise == last) /* no change */
5741		return (0);
5742
5743	if (!((hw->phy.media_type == ixgbe_media_type_copper) ||
5744            (hw->phy.multispeed_fiber)))
5745		return (EINVAL);
5746
5747	if ((adapter->advertise == 2) && (hw->mac.type != ixgbe_mac_X540)) {
5748		device_printf(dev, "Set Advertise: 100Mb on X540 only\n");
5749		return (EINVAL);
5750	}
5751
5752	if (adapter->advertise == 1)
5753                speed = IXGBE_LINK_SPEED_1GB_FULL;
5754	else if (adapter->advertise == 2)
5755                speed = IXGBE_LINK_SPEED_100_FULL;
5756	else if (adapter->advertise == 3)
5757                speed = IXGBE_LINK_SPEED_1GB_FULL |
5758			IXGBE_LINK_SPEED_10GB_FULL;
5759	else {	/* bogus value */
5760		adapter->advertise = last;
5761		return (EINVAL);
5762	}
5763
5764	hw->mac.autotry_restart = TRUE;
5765	hw->mac.ops.setup_link(hw, speed, TRUE);
5766
5767	return (error);
5768}
5769
5770/*
5771** Thermal Shutdown Trigger
5772**   - cause a Thermal Overtemp IRQ
5773**   - this now requires firmware enabling
5774*/
5775static int
5776ixgbe_set_thermal_test(SYSCTL_HANDLER_ARGS)
5777{
5778	int		error, fire = 0;
5779	struct adapter	*adapter = (struct adapter *) arg1;
5780	struct ixgbe_hw *hw = &adapter->hw;
5781
5782
5783	if (hw->mac.type != ixgbe_mac_X540)
5784		return (0);
5785
5786	error = sysctl_handle_int(oidp, &fire, 0, req);
5787	if ((error) || (req->newptr == NULL))
5788		return (error);
5789
5790	if (fire) {
5791		u32 reg = IXGBE_READ_REG(hw, IXGBE_EICS);
5792		reg |= IXGBE_EICR_TS;
5793		IXGBE_WRITE_REG(hw, IXGBE_EICS, reg);
5794	}
5795
5796	return (0);
5797}
5798
5799/*
5800** Enable the hardware to drop packets when the buffer is
5801** full. This is useful when multiqueue,so that no single
5802** queue being full stalls the entire RX engine. We only
5803** enable this when Multiqueue AND when Flow Control is
5804** disabled.
5805*/
5806static void
5807ixgbe_enable_rx_drop(struct adapter *adapter)
5808{
5809        struct ixgbe_hw *hw = &adapter->hw;
5810
5811	for (int i = 0; i < adapter->num_queues; i++) {
5812        	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5813        	srrctl |= IXGBE_SRRCTL_DROP_EN;
5814        	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5815	}
5816}
5817
5818static void
5819ixgbe_disable_rx_drop(struct adapter *adapter)
5820{
5821        struct ixgbe_hw *hw = &adapter->hw;
5822
5823	for (int i = 0; i < adapter->num_queues; i++) {
5824        	u32 srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
5825        	srrctl &= ~IXGBE_SRRCTL_DROP_EN;
5826        	IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(i), srrctl);
5827	}
5828}
5829