Deleted Added
full compact
1/******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 294327 2016-01-19 15:33:28Z hselasky $*/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 295323 2016-02-05 17:14:37Z erj $*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38#include "opt_rss.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#include "opt_altq.h"
43#endif
44
45#include "if_igb.h"
46
47/*********************************************************************
48 * Driver version:
49 *********************************************************************/
50char igb_driver_version[] = "2.5.2";
50char igb_driver_version[] = "2.5.3-k";
51
52
53/*********************************************************************
54 * PCI Device ID Table
55 *
56 * Used by probe to select devices to load on
57 * Last field stores an index into e1000_strings
58 * Last entry must be all 0s
59 *
60 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61 *********************************************************************/
62
63static igb_vendor_info_t igb_vendor_info_array[] =
64{
65 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
72 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
79 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
81 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
88 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0},
89 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
90 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0},
91 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
93 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0},
98 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
99 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0},
100 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
101 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0},
104 /* required last entry */
105 {0, 0, 0, 0, 0}
106};
107
108/*********************************************************************
109 * Table of branding strings for all supported NICs.
110 *********************************************************************/
111
112static char *igb_strings[] = {
113 "Intel(R) PRO/1000 Network Connection"
114};
115
116/*********************************************************************
117 * Function prototypes
118 *********************************************************************/
119static int igb_probe(device_t);
120static int igb_attach(device_t);
121static int igb_detach(device_t);
122static int igb_shutdown(device_t);
123static int igb_suspend(device_t);
124static int igb_resume(device_t);
125#ifndef IGB_LEGACY_TX
126static int igb_mq_start(struct ifnet *, struct mbuf *);
127static int igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128static void igb_qflush(struct ifnet *);
129static void igb_deferred_mq_start(void *, int);
130#else
131static void igb_start(struct ifnet *);
132static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133#endif
134static int igb_ioctl(struct ifnet *, u_long, caddr_t);
135static uint64_t igb_get_counter(if_t, ift_counter);
136static void igb_init(void *);
137static void igb_init_locked(struct adapter *);
138static void igb_stop(void *);
139static void igb_media_status(struct ifnet *, struct ifmediareq *);
140static int igb_media_change(struct ifnet *);
141static void igb_identify_hardware(struct adapter *);
142static int igb_allocate_pci_resources(struct adapter *);
143static int igb_allocate_msix(struct adapter *);
144static int igb_allocate_legacy(struct adapter *);
145static int igb_setup_msix(struct adapter *);
146static void igb_free_pci_resources(struct adapter *);
147static void igb_local_timer(void *);
148static void igb_reset(struct adapter *);
149static int igb_setup_interface(device_t, struct adapter *);
150static int igb_allocate_queues(struct adapter *);
151static void igb_configure_queues(struct adapter *);
152
153static int igb_allocate_transmit_buffers(struct tx_ring *);
154static void igb_setup_transmit_structures(struct adapter *);
155static void igb_setup_transmit_ring(struct tx_ring *);
156static void igb_initialize_transmit_units(struct adapter *);
157static void igb_free_transmit_structures(struct adapter *);
158static void igb_free_transmit_buffers(struct tx_ring *);
159
160static int igb_allocate_receive_buffers(struct rx_ring *);
161static int igb_setup_receive_structures(struct adapter *);
162static int igb_setup_receive_ring(struct rx_ring *);
163static void igb_initialize_receive_units(struct adapter *);
164static void igb_free_receive_structures(struct adapter *);
165static void igb_free_receive_buffers(struct rx_ring *);
166static void igb_free_receive_ring(struct rx_ring *);
167
168static void igb_enable_intr(struct adapter *);
169static void igb_disable_intr(struct adapter *);
170static void igb_update_stats_counters(struct adapter *);
171static bool igb_txeof(struct tx_ring *);
172
173static __inline void igb_rx_discard(struct rx_ring *, int);
174static __inline void igb_rx_input(struct rx_ring *,
175 struct ifnet *, struct mbuf *, u32);
176
177static bool igb_rxeof(struct igb_queue *, int, int *);
178static void igb_rx_checksum(u32, struct mbuf *, u32);
179static int igb_tx_ctx_setup(struct tx_ring *,
180 struct mbuf *, u32 *, u32 *);
181static int igb_tso_setup(struct tx_ring *,
182 struct mbuf *, u32 *, u32 *);
183static void igb_set_promisc(struct adapter *);
184static void igb_disable_promisc(struct adapter *);
185static void igb_set_multi(struct adapter *);
186static void igb_update_link_status(struct adapter *);
187static void igb_refresh_mbufs(struct rx_ring *, int);
188
189static void igb_register_vlan(void *, struct ifnet *, u16);
190static void igb_unregister_vlan(void *, struct ifnet *, u16);
191static void igb_setup_vlan_hw_support(struct adapter *);
192
193static int igb_xmit(struct tx_ring *, struct mbuf **);
194static int igb_dma_malloc(struct adapter *, bus_size_t,
195 struct igb_dma_alloc *, int);
196static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198static void igb_print_nvm_info(struct adapter *);
199static int igb_is_valid_ether_addr(u8 *);
200static void igb_add_hw_stats(struct adapter *);
201
202static void igb_vf_init_stats(struct adapter *);
203static void igb_update_vf_stats_counters(struct adapter *);
204
205/* Management and WOL Support */
206static void igb_init_manageability(struct adapter *);
207static void igb_release_manageability(struct adapter *);
208static void igb_get_hw_control(struct adapter *);
209static void igb_release_hw_control(struct adapter *);
210static void igb_enable_wakeup(device_t);
211static void igb_led_func(void *, int);
212
213static int igb_irq_fast(void *);
214static void igb_msix_que(void *);
215static void igb_msix_link(void *);
216static void igb_handle_que(void *context, int pending);
217static void igb_handle_link(void *context, int pending);
218static void igb_handle_link_locked(struct adapter *);
219
220static void igb_set_sysctl_value(struct adapter *, const char *,
221 const char *, int *, int);
222static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226#ifdef DEVICE_POLLING
227static poll_handler_t igb_poll;
228#endif /* POLLING */
229
230/*********************************************************************
231 * FreeBSD Device Interface Entry Points
232 *********************************************************************/
233
234static device_method_t igb_methods[] = {
235 /* Device interface */
236 DEVMETHOD(device_probe, igb_probe),
237 DEVMETHOD(device_attach, igb_attach),
238 DEVMETHOD(device_detach, igb_detach),
239 DEVMETHOD(device_shutdown, igb_shutdown),
240 DEVMETHOD(device_suspend, igb_suspend),
241 DEVMETHOD(device_resume, igb_resume),
242 DEVMETHOD_END
243};
244
245static driver_t igb_driver = {
246 "igb", igb_methods, sizeof(struct adapter),
247};
248
249static devclass_t igb_devclass;
250DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251MODULE_DEPEND(igb, pci, 1, 1, 1);
252MODULE_DEPEND(igb, ether, 1, 1, 1);
253#ifdef DEV_NETMAP
254MODULE_DEPEND(igb, netmap, 1, 1, 1);
255#endif /* DEV_NETMAP */
256
257/*********************************************************************
258 * Tunable default values.
259 *********************************************************************/
260
261static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263/* Descriptor defaults */
264static int igb_rxd = IGB_DEFAULT_RXD;
265static int igb_txd = IGB_DEFAULT_TXD;
266SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267 "Number of receive descriptors per queue");
268SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269 "Number of transmit descriptors per queue");
270
271/*
272** AIM: Adaptive Interrupt Moderation
273** which means that the interrupt rate
274** is varied over time based on the
275** traffic for that interrupt vector
276*/
277static int igb_enable_aim = TRUE;
278SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279 "Enable adaptive interrupt moderation");
280
281/*
282 * MSIX should be the default for best performance,
283 * but this allows it to be forced off for testing.
284 */
285static int igb_enable_msix = 1;
286SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287 "Enable MSI-X interrupts");
288
289/*
290** Tuneable Interrupt rate
291*/
292static int igb_max_interrupt_rate = 8000;
293SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294 &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296#ifndef IGB_LEGACY_TX
297/*
298** Tuneable number of buffers in the buf-ring (drbr_xxx)
299*/
300static int igb_buf_ring_size = IGB_BR_SIZE;
301SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302 &igb_buf_ring_size, 0, "Size of the bufring");
303#endif
304
305/*
306** Header split causes the packet header to
307** be dma'd to a seperate mbuf from the payload.
308** this can have memory alignment benefits. But
309** another plus is that small packets often fit
310** into the header and thus use no cluster. Its
311** a very workload dependent type feature.
312*/
313static int igb_header_split = FALSE;
314SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315 "Enable receive mbuf header split");
316
317/*
318** This will autoconfigure based on the
319** number of CPUs and max supported
320** MSIX messages if left at 0.
321*/
322static int igb_num_queues = 0;
323SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324 "Number of queues to configure, 0 indicates autoconfigure");
325
326/*
327** Global variable to store last used CPU when binding queues
328** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
329** queue is bound to a cpu.
330*/
331static int igb_last_bind_cpu = -1;
332
333/* How many packets rxeof tries to clean at a time */
334static int igb_rx_process_limit = 100;
335SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336 &igb_rx_process_limit, 0,
337 "Maximum number of received packets to process at a time, -1 means unlimited");
338
339/* How many packets txeof tries to clean at a time */
340static int igb_tx_process_limit = -1;
341SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342 &igb_tx_process_limit, 0,
343 "Maximum number of sent packets to process at a time, -1 means unlimited");
344
345#ifdef DEV_NETMAP /* see ixgbe.c for details */
346#include <dev/netmap/if_igb_netmap.h>
347#endif /* DEV_NETMAP */
348/*********************************************************************
349 * Device identification routine
350 *
351 * igb_probe determines if the driver should be loaded on
352 * adapter based on PCI vendor/device id of the adapter.
353 *
354 * return BUS_PROBE_DEFAULT on success, positive on failure
355 *********************************************************************/
356
357static int
358igb_probe(device_t dev)
359{
360 char adapter_name[256];
361 uint16_t pci_vendor_id = 0;
362 uint16_t pci_device_id = 0;
363 uint16_t pci_subvendor_id = 0;
364 uint16_t pci_subdevice_id = 0;
365 igb_vendor_info_t *ent;
366
367 INIT_DEBUGOUT("igb_probe: begin");
368
369 pci_vendor_id = pci_get_vendor(dev);
370 if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371 return (ENXIO);
372
373 pci_device_id = pci_get_device(dev);
374 pci_subvendor_id = pci_get_subvendor(dev);
375 pci_subdevice_id = pci_get_subdevice(dev);
376
377 ent = igb_vendor_info_array;
378 while (ent->vendor_id != 0) {
379 if ((pci_vendor_id == ent->vendor_id) &&
380 (pci_device_id == ent->device_id) &&
381
382 ((pci_subvendor_id == ent->subvendor_id) ||
383 (ent->subvendor_id == 0)) &&
384
385 ((pci_subdevice_id == ent->subdevice_id) ||
386 (ent->subdevice_id == 0))) {
387 sprintf(adapter_name, "%s, Version - %s",
388 igb_strings[ent->index],
389 igb_driver_version);
390 device_set_desc_copy(dev, adapter_name);
391 return (BUS_PROBE_DEFAULT);
392 }
393 ent++;
394 }
395 return (ENXIO);
396}
397
398/*********************************************************************
399 * Device initialization routine
400 *
401 * The attach entry point is called when the driver is being loaded.
402 * This routine identifies the type of hardware, allocates all resources
403 * and initializes the hardware.
404 *
405 * return 0 on success, positive on failure
406 *********************************************************************/
407
408static int
409igb_attach(device_t dev)
410{
411 struct adapter *adapter;
412 int error = 0;
413 u16 eeprom_data;
414
415 INIT_DEBUGOUT("igb_attach: begin");
416
417 if (resource_disabled("igb", device_get_unit(dev))) {
418 device_printf(dev, "Disabled by device hint\n");
419 return (ENXIO);
420 }
421
422 adapter = device_get_softc(dev);
423 adapter->dev = adapter->osdep.dev = dev;
424 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425
426 /* SYSCTLs */
427 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430 igb_sysctl_nvm_info, "I", "NVM Information");
431
432 igb_set_sysctl_value(adapter, "enable_aim",
433 "Interrupt Moderation", &adapter->enable_aim,
434 igb_enable_aim);
435
436 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439 adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440
441 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442
443 /* Determine hardware and mac info */
444 igb_identify_hardware(adapter);
445
446 /* Setup PCI resources */
447 if (igb_allocate_pci_resources(adapter)) {
448 device_printf(dev, "Allocation of PCI resources failed\n");
449 error = ENXIO;
450 goto err_pci;
451 }
452
453 /* Do Shared Code initialization */
454 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455 device_printf(dev, "Setup of Shared code failed\n");
456 error = ENXIO;
457 goto err_pci;
458 }
459
460 e1000_get_bus_info(&adapter->hw);
461
462 /* Sysctls for limiting the amount of work done in the taskqueues */
463 igb_set_sysctl_value(adapter, "rx_processing_limit",
464 "max number of rx packets to process",
465 &adapter->rx_process_limit, igb_rx_process_limit);
466
467 igb_set_sysctl_value(adapter, "tx_processing_limit",
468 "max number of tx packets to process",
469 &adapter->tx_process_limit, igb_tx_process_limit);
470
471 /*
472 * Validate number of transmit and receive descriptors. It
473 * must not exceed hardware maximum, and must be multiple
474 * of E1000_DBA_ALIGN.
475 */
476 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479 IGB_DEFAULT_TXD, igb_txd);
480 adapter->num_tx_desc = IGB_DEFAULT_TXD;
481 } else
482 adapter->num_tx_desc = igb_txd;
483 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486 IGB_DEFAULT_RXD, igb_rxd);
487 adapter->num_rx_desc = IGB_DEFAULT_RXD;
488 } else
489 adapter->num_rx_desc = igb_rxd;
490
491 adapter->hw.mac.autoneg = DO_AUTO_NEG;
492 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494
495 /* Copper options */
496 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497 adapter->hw.phy.mdix = AUTO_ALL_MODES;
498 adapter->hw.phy.disable_polarity_correction = FALSE;
499 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500 }
501
502 /*
503 * Set the frame limits assuming
504 * standard ethernet sized frames.
505 */
506 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507
508 /*
509 ** Allocate and Setup Queues
510 */
511 if (igb_allocate_queues(adapter)) {
512 error = ENOMEM;
513 goto err_pci;
514 }
515
516 /* Allocate the appropriate stats memory */
517 if (adapter->vf_ifp) {
518 adapter->stats =
519 (struct e1000_vf_stats *)malloc(sizeof \
520 (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521 igb_vf_init_stats(adapter);
522 } else
523 adapter->stats =
524 (struct e1000_hw_stats *)malloc(sizeof \
525 (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526 if (adapter->stats == NULL) {
527 device_printf(dev, "Can not allocate stats memory\n");
528 error = ENOMEM;
529 goto err_late;
530 }
531
532 /* Allocate multicast array memory. */
533 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535 if (adapter->mta == NULL) {
536 device_printf(dev, "Can not allocate multicast setup array\n");
537 error = ENOMEM;
538 goto err_late;
539 }
540
541 /* Some adapter-specific advanced features */
542 if (adapter->hw.mac.type >= e1000_i350) {
543 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545 OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546 adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549 OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550 adapter, 0, igb_sysctl_eee, "I",
551 "Disable Energy Efficient Ethernet");
552 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553 if (adapter->hw.mac.type == e1000_i354)
554 e1000_set_eee_i354(&adapter->hw);
554 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
555 else
556 e1000_set_eee_i350(&adapter->hw);
556 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
557 }
558 }
559
560 /*
561 ** Start from a known state, this is
562 ** important in reading the nvm and
563 ** mac from that.
564 */
565 e1000_reset_hw(&adapter->hw);
566
567 /* Make sure we have a good EEPROM before we read from it */
568 if (((adapter->hw.mac.type != e1000_i210) &&
569 (adapter->hw.mac.type != e1000_i211)) &&
570 (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571 /*
572 ** Some PCI-E parts fail the first check due to
573 ** the link being in sleep state, call it again,
574 ** if it fails a second time its a real issue.
575 */
576 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577 device_printf(dev,
578 "The EEPROM Checksum Is Not Valid\n");
579 error = EIO;
580 goto err_late;
581 }
582 }
583
584 /*
585 ** Copy the permanent MAC address out of the EEPROM
586 */
587 if (e1000_read_mac_addr(&adapter->hw) < 0) {
588 device_printf(dev, "EEPROM read error while reading MAC"
589 " address\n");
590 error = EIO;
591 goto err_late;
592 }
593 /* Check its sanity */
594 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595 device_printf(dev, "Invalid MAC address\n");
596 error = EIO;
597 goto err_late;
598 }
599
600 /* Setup OS specific network interface */
601 if (igb_setup_interface(dev, adapter) != 0)
602 goto err_late;
603
604 /* Now get a good starting state */
605 igb_reset(adapter);
606
607 /* Initialize statistics */
608 igb_update_stats_counters(adapter);
609
610 adapter->hw.mac.get_link_status = 1;
611 igb_update_link_status(adapter);
612
613 /* Indicate SOL/IDER usage */
614 if (e1000_check_reset_block(&adapter->hw))
615 device_printf(dev,
616 "PHY reset is blocked due to SOL/IDER session.\n");
617
618 /* Determine if we have to control management hardware */
619 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620
621 /*
622 * Setup Wake-on-Lan
623 */
624 /* APME bit in EEPROM is mapped to WUC.APME */
625 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626 if (eeprom_data)
627 adapter->wol = E1000_WUFC_MAG;
628
629 /* Register for VLAN events */
630 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634
635 igb_add_hw_stats(adapter);
636
637 /* Tell the stack that the interface is not active */
638 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
640
641 adapter->led_dev = led_create(igb_led_func, adapter,
642 device_get_nameunit(dev));
643
644 /*
645 ** Configure Interrupts
646 */
647 if ((adapter->msix > 1) && (igb_enable_msix))
648 error = igb_allocate_msix(adapter);
649 else /* MSI or Legacy */
650 error = igb_allocate_legacy(adapter);
651 if (error)
652 goto err_late;
653
654#ifdef DEV_NETMAP
655 igb_netmap_attach(adapter);
656#endif /* DEV_NETMAP */
657 INIT_DEBUGOUT("igb_attach: end");
658
659 return (0);
660
661err_late:
662 igb_detach(dev);
663 igb_free_transmit_structures(adapter);
664 igb_free_receive_structures(adapter);
665 igb_release_hw_control(adapter);
666err_pci:
667 igb_free_pci_resources(adapter);
668 if (adapter->ifp != NULL)
669 if_free(adapter->ifp);
670 free(adapter->mta, M_DEVBUF);
671 IGB_CORE_LOCK_DESTROY(adapter);
672
673 return (error);
674}
675
676/*********************************************************************
677 * Device removal routine
678 *
679 * The detach entry point is called when the driver is being removed.
680 * This routine stops the adapter and deallocates all the resources
681 * that were allocated for driver operation.
682 *
683 * return 0 on success, positive on failure
684 *********************************************************************/
685
686static int
687igb_detach(device_t dev)
688{
689 struct adapter *adapter = device_get_softc(dev);
690 struct ifnet *ifp = adapter->ifp;
691
692 INIT_DEBUGOUT("igb_detach: begin");
693
694 /* Make sure VLANS are not using driver */
695 if (adapter->ifp->if_vlantrunk != NULL) {
696 device_printf(dev,"Vlan in use, detach first\n");
697 return (EBUSY);
698 }
699
700 ether_ifdetach(adapter->ifp);
701
702 if (adapter->led_dev != NULL)
703 led_destroy(adapter->led_dev);
704
705#ifdef DEVICE_POLLING
706 if (ifp->if_capenable & IFCAP_POLLING)
707 ether_poll_deregister(ifp);
708#endif
709
710 IGB_CORE_LOCK(adapter);
711 adapter->in_detach = 1;
712 igb_stop(adapter);
713 IGB_CORE_UNLOCK(adapter);
714
715 e1000_phy_hw_reset(&adapter->hw);
716
717 /* Give control back to firmware */
718 igb_release_manageability(adapter);
719 igb_release_hw_control(adapter);
720
721 if (adapter->wol) {
722 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
723 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
724 igb_enable_wakeup(dev);
725 }
726
727 /* Unregister VLAN events */
728 if (adapter->vlan_attach != NULL)
729 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730 if (adapter->vlan_detach != NULL)
731 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
732
733 callout_drain(&adapter->timer);
734
735#ifdef DEV_NETMAP
736 netmap_detach(adapter->ifp);
737#endif /* DEV_NETMAP */
738 igb_free_pci_resources(adapter);
739 bus_generic_detach(dev);
740 if_free(ifp);
741
742 igb_free_transmit_structures(adapter);
743 igb_free_receive_structures(adapter);
744 if (adapter->mta != NULL)
745 free(adapter->mta, M_DEVBUF);
746
747 IGB_CORE_LOCK_DESTROY(adapter);
748
749 return (0);
750}
751
752/*********************************************************************
753 *
754 * Shutdown entry point
755 *
756 **********************************************************************/
757
758static int
759igb_shutdown(device_t dev)
760{
761 return igb_suspend(dev);
762}
763
764/*
765 * Suspend/resume device methods.
766 */
767static int
768igb_suspend(device_t dev)
769{
770 struct adapter *adapter = device_get_softc(dev);
771
772 IGB_CORE_LOCK(adapter);
773
774 igb_stop(adapter);
775
776 igb_release_manageability(adapter);
777 igb_release_hw_control(adapter);
778
779 if (adapter->wol) {
780 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
781 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
782 igb_enable_wakeup(dev);
783 }
784
785 IGB_CORE_UNLOCK(adapter);
786
787 return bus_generic_suspend(dev);
788}
789
790static int
791igb_resume(device_t dev)
792{
793 struct adapter *adapter = device_get_softc(dev);
794 struct tx_ring *txr = adapter->tx_rings;
795 struct ifnet *ifp = adapter->ifp;
796
797 IGB_CORE_LOCK(adapter);
798 igb_init_locked(adapter);
799 igb_init_manageability(adapter);
800
801 if ((ifp->if_flags & IFF_UP) &&
802 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
803 for (int i = 0; i < adapter->num_queues; i++, txr++) {
804 IGB_TX_LOCK(txr);
805#ifndef IGB_LEGACY_TX
806 /* Process the stack queue only if not depleted */
807 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
808 !drbr_empty(ifp, txr->br))
809 igb_mq_start_locked(ifp, txr);
810#else
811 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
812 igb_start_locked(txr, ifp);
813#endif
814 IGB_TX_UNLOCK(txr);
815 }
816 }
817 IGB_CORE_UNLOCK(adapter);
818
819 return bus_generic_resume(dev);
820}
821
822
823#ifdef IGB_LEGACY_TX
824
825/*********************************************************************
826 * Transmit entry point
827 *
828 * igb_start is called by the stack to initiate a transmit.
829 * The driver will remain in this routine as long as there are
830 * packets to transmit and transmit resources are available.
831 * In case resources are not available stack is notified and
832 * the packet is requeued.
833 **********************************************************************/
834
835static void
836igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
837{
838 struct adapter *adapter = ifp->if_softc;
839 struct mbuf *m_head;
840
841 IGB_TX_LOCK_ASSERT(txr);
842
843 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
844 IFF_DRV_RUNNING)
845 return;
846 if (!adapter->link_active)
847 return;
848
849 /* Call cleanup if number of TX descriptors low */
850 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
851 igb_txeof(txr);
852
853 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
854 if (txr->tx_avail <= IGB_MAX_SCATTER) {
855 txr->queue_status |= IGB_QUEUE_DEPLETED;
856 break;
857 }
858 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
859 if (m_head == NULL)
860 break;
861 /*
862 * Encapsulation can modify our pointer, and or make it
863 * NULL on failure. In that event, we can't requeue.
864 */
865 if (igb_xmit(txr, &m_head)) {
866 if (m_head != NULL)
867 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
868 if (txr->tx_avail <= IGB_MAX_SCATTER)
869 txr->queue_status |= IGB_QUEUE_DEPLETED;
870 break;
871 }
872
873 /* Send a copy of the frame to the BPF listener */
874 ETHER_BPF_MTAP(ifp, m_head);
875
876 /* Set watchdog on */
877 txr->watchdog_time = ticks;
878 txr->queue_status |= IGB_QUEUE_WORKING;
879 }
880}
881
882/*
883 * Legacy TX driver routine, called from the
884 * stack, always uses tx[0], and spins for it.
885 * Should not be used with multiqueue tx
886 */
887static void
888igb_start(struct ifnet *ifp)
889{
890 struct adapter *adapter = ifp->if_softc;
891 struct tx_ring *txr = adapter->tx_rings;
892
893 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
894 IGB_TX_LOCK(txr);
895 igb_start_locked(txr, ifp);
896 IGB_TX_UNLOCK(txr);
897 }
898 return;
899}
900
901#else /* ~IGB_LEGACY_TX */
902
903/*
904** Multiqueue Transmit Entry:
905** quick turnaround to the stack
906**
907*/
908static int
909igb_mq_start(struct ifnet *ifp, struct mbuf *m)
910{
911 struct adapter *adapter = ifp->if_softc;
912 struct igb_queue *que;
913 struct tx_ring *txr;
914 int i, err = 0;
915#ifdef RSS
916 uint32_t bucket_id;
917#endif
918
919 /* Which queue to use */
920 /*
921 * When doing RSS, map it to the same outbound queue
922 * as the incoming flow would be mapped to.
923 *
924 * If everything is setup correctly, it should be the
925 * same bucket that the current CPU we're on is.
926 */
927 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
928#ifdef RSS
929 if (rss_hash2bucket(m->m_pkthdr.flowid,
930 M_HASHTYPE_GET(m), &bucket_id) == 0) {
931 /* XXX TODO: spit out something if bucket_id > num_queues? */
932 i = bucket_id % adapter->num_queues;
933 } else {
934#endif
935 i = m->m_pkthdr.flowid % adapter->num_queues;
936#ifdef RSS
937 }
938#endif
939 } else {
940 i = curcpu % adapter->num_queues;
941 }
942 txr = &adapter->tx_rings[i];
943 que = &adapter->queues[i];
944
945 err = drbr_enqueue(ifp, txr->br, m);
946 if (err)
947 return (err);
948 if (IGB_TX_TRYLOCK(txr)) {
949 igb_mq_start_locked(ifp, txr);
950 IGB_TX_UNLOCK(txr);
951 } else
952 taskqueue_enqueue(que->tq, &txr->txq_task);
953
954 return (0);
955}
956
957static int
958igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
959{
960 struct adapter *adapter = txr->adapter;
961 struct mbuf *next;
962 int err = 0, enq = 0;
963
964 IGB_TX_LOCK_ASSERT(txr);
965
966 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
967 adapter->link_active == 0)
968 return (ENETDOWN);
969
970 /* Process the queue */
971 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
972 if ((err = igb_xmit(txr, &next)) != 0) {
973 if (next == NULL) {
974 /* It was freed, move forward */
975 drbr_advance(ifp, txr->br);
976 } else {
977 /*
978 * Still have one left, it may not be
979 * the same since the transmit function
980 * may have changed it.
981 */
982 drbr_putback(ifp, txr->br, next);
983 }
984 break;
985 }
986 drbr_advance(ifp, txr->br);
987 enq++;
988 if (next->m_flags & M_MCAST && adapter->vf_ifp)
989 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
990 ETHER_BPF_MTAP(ifp, next);
991 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
992 break;
993 }
994 if (enq > 0) {
995 /* Set the watchdog */
996 txr->queue_status |= IGB_QUEUE_WORKING;
997 txr->watchdog_time = ticks;
998 }
999 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1000 igb_txeof(txr);
1001 if (txr->tx_avail <= IGB_MAX_SCATTER)
1002 txr->queue_status |= IGB_QUEUE_DEPLETED;
1003 return (err);
1004}
1005
1006/*
1007 * Called from a taskqueue to drain queued transmit packets.
1008 */
1009static void
1010igb_deferred_mq_start(void *arg, int pending)
1011{
1012 struct tx_ring *txr = arg;
1013 struct adapter *adapter = txr->adapter;
1014 struct ifnet *ifp = adapter->ifp;
1015
1016 IGB_TX_LOCK(txr);
1017 if (!drbr_empty(ifp, txr->br))
1018 igb_mq_start_locked(ifp, txr);
1019 IGB_TX_UNLOCK(txr);
1020}
1021
1022/*
1023** Flush all ring buffers
1024*/
1025static void
1026igb_qflush(struct ifnet *ifp)
1027{
1028 struct adapter *adapter = ifp->if_softc;
1029 struct tx_ring *txr = adapter->tx_rings;
1030 struct mbuf *m;
1031
1032 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1033 IGB_TX_LOCK(txr);
1034 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1035 m_freem(m);
1036 IGB_TX_UNLOCK(txr);
1037 }
1038 if_qflush(ifp);
1039}
1040#endif /* ~IGB_LEGACY_TX */
1041
1042/*********************************************************************
1043 * Ioctl entry point
1044 *
1045 * igb_ioctl is called when the user wants to configure the
1046 * interface.
1047 *
1048 * return 0 on success, positive on failure
1049 **********************************************************************/
1050
1051static int
1052igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1053{
1054 struct adapter *adapter = ifp->if_softc;
1055 struct ifreq *ifr = (struct ifreq *)data;
1056#if defined(INET) || defined(INET6)
1057 struct ifaddr *ifa = (struct ifaddr *)data;
1058#endif
1059 bool avoid_reset = FALSE;
1060 int error = 0;
1061
1062 if (adapter->in_detach)
1063 return (error);
1064
1065 switch (command) {
1066 case SIOCSIFADDR:
1067#ifdef INET
1068 if (ifa->ifa_addr->sa_family == AF_INET)
1069 avoid_reset = TRUE;
1070#endif
1071#ifdef INET6
1072 if (ifa->ifa_addr->sa_family == AF_INET6)
1073 avoid_reset = TRUE;
1074#endif
1075 /*
1076 ** Calling init results in link renegotiation,
1077 ** so we avoid doing it when possible.
1078 */
1079 if (avoid_reset) {
1080 ifp->if_flags |= IFF_UP;
1081 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1082 igb_init(adapter);
1083#ifdef INET
1084 if (!(ifp->if_flags & IFF_NOARP))
1085 arp_ifinit(ifp, ifa);
1086#endif
1087 } else
1088 error = ether_ioctl(ifp, command, data);
1089 break;
1090 case SIOCSIFMTU:
1091 {
1092 int max_frame_size;
1093
1094 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1095
1096 IGB_CORE_LOCK(adapter);
1097 max_frame_size = 9234;
1098 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1099 ETHER_CRC_LEN) {
1100 IGB_CORE_UNLOCK(adapter);
1101 error = EINVAL;
1102 break;
1103 }
1104
1105 ifp->if_mtu = ifr->ifr_mtu;
1106 adapter->max_frame_size =
1107 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1108 igb_init_locked(adapter);
1109 IGB_CORE_UNLOCK(adapter);
1110 break;
1111 }
1112 case SIOCSIFFLAGS:
1113 IOCTL_DEBUGOUT("ioctl rcv'd:\
1114 SIOCSIFFLAGS (Set Interface Flags)");
1115 IGB_CORE_LOCK(adapter);
1116 if (ifp->if_flags & IFF_UP) {
1117 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1118 if ((ifp->if_flags ^ adapter->if_flags) &
1119 (IFF_PROMISC | IFF_ALLMULTI)) {
1120 igb_disable_promisc(adapter);
1121 igb_set_promisc(adapter);
1122 }
1123 } else
1124 igb_init_locked(adapter);
1125 } else
1126 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1127 igb_stop(adapter);
1128 adapter->if_flags = ifp->if_flags;
1129 IGB_CORE_UNLOCK(adapter);
1130 break;
1131 case SIOCADDMULTI:
1132 case SIOCDELMULTI:
1133 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1134 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1135 IGB_CORE_LOCK(adapter);
1136 igb_disable_intr(adapter);
1137 igb_set_multi(adapter);
1138#ifdef DEVICE_POLLING
1139 if (!(ifp->if_capenable & IFCAP_POLLING))
1140#endif
1141 igb_enable_intr(adapter);
1142 IGB_CORE_UNLOCK(adapter);
1143 }
1144 break;
1145 case SIOCSIFMEDIA:
1146 /* Check SOL/IDER usage */
1147 IGB_CORE_LOCK(adapter);
1148 if (e1000_check_reset_block(&adapter->hw)) {
1149 IGB_CORE_UNLOCK(adapter);
1150 device_printf(adapter->dev, "Media change is"
1151 " blocked due to SOL/IDER session.\n");
1152 break;
1153 }
1154 IGB_CORE_UNLOCK(adapter);
1155 case SIOCGIFMEDIA:
1156 IOCTL_DEBUGOUT("ioctl rcv'd: \
1157 SIOCxIFMEDIA (Get/Set Interface Media)");
1158 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1159 break;
1160 case SIOCSIFCAP:
1161 {
1162 int mask, reinit;
1163
1164 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1165 reinit = 0;
1166 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1167#ifdef DEVICE_POLLING
1168 if (mask & IFCAP_POLLING) {
1169 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1170 error = ether_poll_register(igb_poll, ifp);
1171 if (error)
1172 return (error);
1173 IGB_CORE_LOCK(adapter);
1174 igb_disable_intr(adapter);
1175 ifp->if_capenable |= IFCAP_POLLING;
1176 IGB_CORE_UNLOCK(adapter);
1177 } else {
1178 error = ether_poll_deregister(ifp);
1179 /* Enable interrupt even in error case */
1180 IGB_CORE_LOCK(adapter);
1181 igb_enable_intr(adapter);
1182 ifp->if_capenable &= ~IFCAP_POLLING;
1183 IGB_CORE_UNLOCK(adapter);
1184 }
1185 }
1186#endif
1187 if (mask & IFCAP_HWCSUM) {
1188 ifp->if_capenable ^= IFCAP_HWCSUM;
1189 reinit = 1;
1190 }
1191 if (mask & IFCAP_TSO4) {
1192 ifp->if_capenable ^= IFCAP_TSO4;
1193 reinit = 1;
1194 }
1195 if (mask & IFCAP_TSO6) {
1196 ifp->if_capenable ^= IFCAP_TSO6;
1197 reinit = 1;
1198 }
1199 if (mask & IFCAP_VLAN_HWTAGGING) {
1200 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1201 reinit = 1;
1202 }
1203 if (mask & IFCAP_VLAN_HWFILTER) {
1204 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1205 reinit = 1;
1206 }
1207 if (mask & IFCAP_VLAN_HWTSO) {
1208 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1209 reinit = 1;
1210 }
1211 if (mask & IFCAP_LRO) {
1212 ifp->if_capenable ^= IFCAP_LRO;
1213 reinit = 1;
1214 }
1215 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1216 igb_init(adapter);
1217 VLAN_CAPABILITIES(ifp);
1218 break;
1219 }
1220
1221 default:
1222 error = ether_ioctl(ifp, command, data);
1223 break;
1224 }
1225
1226 return (error);
1227}
1228
1229
1230/*********************************************************************
1231 * Init entry point
1232 *
1233 * This routine is used in two ways. It is used by the stack as
1234 * init entry point in network interface structure. It is also used
1235 * by the driver as a hw/sw initialization routine to get to a
1236 * consistent state.
1237 *
1238 * return 0 on success, positive on failure
1239 **********************************************************************/
1240
1241static void
1242igb_init_locked(struct adapter *adapter)
1243{
1244 struct ifnet *ifp = adapter->ifp;
1245 device_t dev = adapter->dev;
1246
1247 INIT_DEBUGOUT("igb_init: begin");
1248
1249 IGB_CORE_LOCK_ASSERT(adapter);
1250
1251 igb_disable_intr(adapter);
1252 callout_stop(&adapter->timer);
1253
1254 /* Get the latest mac address, User can use a LAA */
1255 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1256 ETHER_ADDR_LEN);
1257
1258 /* Put the address into the Receive Address Array */
1259 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1260
1261 igb_reset(adapter);
1262 igb_update_link_status(adapter);
1263
1264 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1265
1266 /* Set hardware offload abilities */
1267 ifp->if_hwassist = 0;
1268 if (ifp->if_capenable & IFCAP_TXCSUM) {
1269 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1270#if __FreeBSD_version >= 800000
1271 if ((adapter->hw.mac.type == e1000_82576) ||
1272 (adapter->hw.mac.type == e1000_82580))
1273 ifp->if_hwassist |= CSUM_SCTP;
1274#endif
1275 }
1276
1277 if (ifp->if_capenable & IFCAP_TSO)
1278 ifp->if_hwassist |= CSUM_TSO;
1279
1280 /* Clear bad data from Rx FIFOs */
1281 e1000_rx_fifo_flush_82575(&adapter->hw);
1282
1283 /* Configure for OS presence */
1284 igb_init_manageability(adapter);
1285
1286 /* Prepare transmit descriptors and buffers */
1287 igb_setup_transmit_structures(adapter);
1288 igb_initialize_transmit_units(adapter);
1289
1290 /* Setup Multicast table */
1291 igb_set_multi(adapter);
1292
1293 /*
1294 ** Figure out the desired mbuf pool
1295 ** for doing jumbo/packetsplit
1296 */
1297 if (adapter->max_frame_size <= 2048)
1298 adapter->rx_mbuf_sz = MCLBYTES;
1299 else if (adapter->max_frame_size <= 4096)
1300 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1301 else
1302 adapter->rx_mbuf_sz = MJUM9BYTES;
1303
1304 /* Prepare receive descriptors and buffers */
1305 if (igb_setup_receive_structures(adapter)) {
1306 device_printf(dev, "Could not setup receive structures\n");
1307 return;
1308 }
1309 igb_initialize_receive_units(adapter);
1307 e1000_rx_fifo_flush_82575(&adapter->hw);
1310
1311 /* Enable VLAN support */
1312 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1313 igb_setup_vlan_hw_support(adapter);
1314
1315 /* Don't lose promiscuous settings */
1316 igb_set_promisc(adapter);
1317
1318 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1319 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1320
1321 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1322 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1323
1324 if (adapter->msix > 1) /* Set up queue routing */
1325 igb_configure_queues(adapter);
1326
1327 /* this clears any pending interrupts */
1328 E1000_READ_REG(&adapter->hw, E1000_ICR);
1329#ifdef DEVICE_POLLING
1330 /*
1331 * Only enable interrupts if we are not polling, make sure
1332 * they are off otherwise.
1333 */
1334 if (ifp->if_capenable & IFCAP_POLLING)
1335 igb_disable_intr(adapter);
1336 else
1337#endif /* DEVICE_POLLING */
1338 {
1339 igb_enable_intr(adapter);
1340 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1341 }
1342
1343 /* Set Energy Efficient Ethernet */
1344 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1345 if (adapter->hw.mac.type == e1000_i354)
1344 e1000_set_eee_i354(&adapter->hw);
1346 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE);
1347 else
1346 e1000_set_eee_i350(&adapter->hw);
1348 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE);
1349 }
1350}
1351
1352static void
1353igb_init(void *arg)
1354{
1355 struct adapter *adapter = arg;
1356
1357 IGB_CORE_LOCK(adapter);
1358 igb_init_locked(adapter);
1359 IGB_CORE_UNLOCK(adapter);
1360}
1361
1362
1363static void
1364igb_handle_que(void *context, int pending)
1365{
1366 struct igb_queue *que = context;
1367 struct adapter *adapter = que->adapter;
1368 struct tx_ring *txr = que->txr;
1369 struct ifnet *ifp = adapter->ifp;
1370
1371 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1372 bool more;
1373
1374 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1375
1376 IGB_TX_LOCK(txr);
1377 igb_txeof(txr);
1378#ifndef IGB_LEGACY_TX
1379 /* Process the stack queue only if not depleted */
1380 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1381 !drbr_empty(ifp, txr->br))
1382 igb_mq_start_locked(ifp, txr);
1383#else
1384 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1385 igb_start_locked(txr, ifp);
1386#endif
1387 IGB_TX_UNLOCK(txr);
1388 /* Do we need another? */
1389 if (more) {
1390 taskqueue_enqueue(que->tq, &que->que_task);
1391 return;
1392 }
1393 }
1394
1395#ifdef DEVICE_POLLING
1396 if (ifp->if_capenable & IFCAP_POLLING)
1397 return;
1398#endif
1399 /* Reenable this interrupt */
1400 if (que->eims)
1401 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1402 else
1403 igb_enable_intr(adapter);
1404}
1405
1406/* Deal with link in a sleepable context */
1407static void
1408igb_handle_link(void *context, int pending)
1409{
1410 struct adapter *adapter = context;
1411
1412 IGB_CORE_LOCK(adapter);
1413 igb_handle_link_locked(adapter);
1414 IGB_CORE_UNLOCK(adapter);
1415}
1416
1417static void
1418igb_handle_link_locked(struct adapter *adapter)
1419{
1420 struct tx_ring *txr = adapter->tx_rings;
1421 struct ifnet *ifp = adapter->ifp;
1422
1423 IGB_CORE_LOCK_ASSERT(adapter);
1424 adapter->hw.mac.get_link_status = 1;
1425 igb_update_link_status(adapter);
1426 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1427 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1428 IGB_TX_LOCK(txr);
1429#ifndef IGB_LEGACY_TX
1430 /* Process the stack queue only if not depleted */
1431 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1432 !drbr_empty(ifp, txr->br))
1433 igb_mq_start_locked(ifp, txr);
1434#else
1435 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1436 igb_start_locked(txr, ifp);
1437#endif
1438 IGB_TX_UNLOCK(txr);
1439 }
1440 }
1441}
1442
1443/*********************************************************************
1444 *
1445 * MSI/Legacy Deferred
1446 * Interrupt Service routine
1447 *
1448 *********************************************************************/
1449static int
1450igb_irq_fast(void *arg)
1451{
1452 struct adapter *adapter = arg;
1453 struct igb_queue *que = adapter->queues;
1454 u32 reg_icr;
1455
1456
1457 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1458
1459 /* Hot eject? */
1460 if (reg_icr == 0xffffffff)
1461 return FILTER_STRAY;
1462
1463 /* Definitely not our interrupt. */
1464 if (reg_icr == 0x0)
1465 return FILTER_STRAY;
1466
1467 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1468 return FILTER_STRAY;
1469
1470 /*
1471 * Mask interrupts until the taskqueue is finished running. This is
1472 * cheap, just assume that it is needed. This also works around the
1473 * MSI message reordering errata on certain systems.
1474 */
1475 igb_disable_intr(adapter);
1476 taskqueue_enqueue(que->tq, &que->que_task);
1477
1478 /* Link status change */
1479 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1480 taskqueue_enqueue(que->tq, &adapter->link_task);
1481
1482 if (reg_icr & E1000_ICR_RXO)
1483 adapter->rx_overruns++;
1484 return FILTER_HANDLED;
1485}
1486
1487#ifdef DEVICE_POLLING
1488#if __FreeBSD_version >= 800000
1489#define POLL_RETURN_COUNT(a) (a)
1490static int
1491#else
1492#define POLL_RETURN_COUNT(a)
1493static void
1494#endif
1495igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1496{
1497 struct adapter *adapter = ifp->if_softc;
1498 struct igb_queue *que;
1499 struct tx_ring *txr;
1500 u32 reg_icr, rx_done = 0;
1501 u32 loop = IGB_MAX_LOOP;
1502 bool more;
1503
1504 IGB_CORE_LOCK(adapter);
1505 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1506 IGB_CORE_UNLOCK(adapter);
1507 return POLL_RETURN_COUNT(rx_done);
1508 }
1509
1510 if (cmd == POLL_AND_CHECK_STATUS) {
1511 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1512 /* Link status change */
1513 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1514 igb_handle_link_locked(adapter);
1515
1516 if (reg_icr & E1000_ICR_RXO)
1517 adapter->rx_overruns++;
1518 }
1519 IGB_CORE_UNLOCK(adapter);
1520
1521 for (int i = 0; i < adapter->num_queues; i++) {
1522 que = &adapter->queues[i];
1523 txr = que->txr;
1524
1525 igb_rxeof(que, count, &rx_done);
1526
1527 IGB_TX_LOCK(txr);
1528 do {
1529 more = igb_txeof(txr);
1530 } while (loop-- && more);
1531#ifndef IGB_LEGACY_TX
1532 if (!drbr_empty(ifp, txr->br))
1533 igb_mq_start_locked(ifp, txr);
1534#else
1535 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1536 igb_start_locked(txr, ifp);
1537#endif
1538 IGB_TX_UNLOCK(txr);
1539 }
1540
1541 return POLL_RETURN_COUNT(rx_done);
1542}
1543#endif /* DEVICE_POLLING */
1544
1545/*********************************************************************
1546 *
1547 * MSIX Que Interrupt Service routine
1548 *
1549 **********************************************************************/
1550static void
1551igb_msix_que(void *arg)
1552{
1553 struct igb_queue *que = arg;
1554 struct adapter *adapter = que->adapter;
1555 struct ifnet *ifp = adapter->ifp;
1556 struct tx_ring *txr = que->txr;
1557 struct rx_ring *rxr = que->rxr;
1558 u32 newitr = 0;
1559 bool more_rx;
1560
1561 /* Ignore spurious interrupts */
1562 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1563 return;
1564
1565 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1566 ++que->irqs;
1567
1568 IGB_TX_LOCK(txr);
1569 igb_txeof(txr);
1570#ifndef IGB_LEGACY_TX
1571 /* Process the stack queue only if not depleted */
1572 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1573 !drbr_empty(ifp, txr->br))
1574 igb_mq_start_locked(ifp, txr);
1575#else
1576 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1577 igb_start_locked(txr, ifp);
1578#endif
1579 IGB_TX_UNLOCK(txr);
1580
1581 more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1582
1583 if (adapter->enable_aim == FALSE)
1584 goto no_calc;
1585 /*
1586 ** Do Adaptive Interrupt Moderation:
1587 ** - Write out last calculated setting
1588 ** - Calculate based on average size over
1589 ** the last interval.
1590 */
1591 if (que->eitr_setting)
1592 E1000_WRITE_REG(&adapter->hw,
1593 E1000_EITR(que->msix), que->eitr_setting);
1594
1595 que->eitr_setting = 0;
1596
1597 /* Idle, do nothing */
1598 if ((txr->bytes == 0) && (rxr->bytes == 0))
1599 goto no_calc;
1600
1601 /* Used half Default if sub-gig */
1602 if (adapter->link_speed != 1000)
1603 newitr = IGB_DEFAULT_ITR / 2;
1604 else {
1605 if ((txr->bytes) && (txr->packets))
1606 newitr = txr->bytes/txr->packets;
1607 if ((rxr->bytes) && (rxr->packets))
1608 newitr = max(newitr,
1609 (rxr->bytes / rxr->packets));
1610 newitr += 24; /* account for hardware frame, crc */
1611 /* set an upper boundary */
1612 newitr = min(newitr, 3000);
1613 /* Be nice to the mid range */
1614 if ((newitr > 300) && (newitr < 1200))
1615 newitr = (newitr / 3);
1616 else
1617 newitr = (newitr / 2);
1618 }
1619 newitr &= 0x7FFC; /* Mask invalid bits */
1620 if (adapter->hw.mac.type == e1000_82575)
1621 newitr |= newitr << 16;
1622 else
1623 newitr |= E1000_EITR_CNT_IGNR;
1624
1625 /* save for next interrupt */
1626 que->eitr_setting = newitr;
1627
1628 /* Reset state */
1629 txr->bytes = 0;
1630 txr->packets = 0;
1631 rxr->bytes = 0;
1632 rxr->packets = 0;
1633
1634no_calc:
1635 /* Schedule a clean task if needed*/
1636 if (more_rx)
1637 taskqueue_enqueue(que->tq, &que->que_task);
1638 else
1639 /* Reenable this interrupt */
1640 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1641 return;
1642}
1643
1644
1645/*********************************************************************
1646 *
1647 * MSIX Link Interrupt Service routine
1648 *
1649 **********************************************************************/
1650
1651static void
1652igb_msix_link(void *arg)
1653{
1654 struct adapter *adapter = arg;
1655 u32 icr;
1656
1657 ++adapter->link_irq;
1658 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1659 if (!(icr & E1000_ICR_LSC))
1660 goto spurious;
1661 igb_handle_link(adapter, 0);
1662
1663spurious:
1664 /* Rearm */
1665 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1666 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1667 return;
1668}
1669
1670
1671/*********************************************************************
1672 *
1673 * Media Ioctl callback
1674 *
1675 * This routine is called whenever the user queries the status of
1676 * the interface using ifconfig.
1677 *
1678 **********************************************************************/
1679static void
1680igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1681{
1682 struct adapter *adapter = ifp->if_softc;
1683
1684 INIT_DEBUGOUT("igb_media_status: begin");
1685
1686 IGB_CORE_LOCK(adapter);
1687 igb_update_link_status(adapter);
1688
1689 ifmr->ifm_status = IFM_AVALID;
1690 ifmr->ifm_active = IFM_ETHER;
1691
1692 if (!adapter->link_active) {
1693 IGB_CORE_UNLOCK(adapter);
1694 return;
1695 }
1696
1697 ifmr->ifm_status |= IFM_ACTIVE;
1698
1699 switch (adapter->link_speed) {
1700 case 10:
1701 ifmr->ifm_active |= IFM_10_T;
1702 break;
1703 case 100:
1704 /*
1705 ** Support for 100Mb SFP - these are Fiber
1706 ** but the media type appears as serdes
1707 */
1708 if (adapter->hw.phy.media_type ==
1709 e1000_media_type_internal_serdes)
1710 ifmr->ifm_active |= IFM_100_FX;
1711 else
1712 ifmr->ifm_active |= IFM_100_TX;
1713 break;
1714 case 1000:
1715 ifmr->ifm_active |= IFM_1000_T;
1716 break;
1717 case 2500:
1718 ifmr->ifm_active |= IFM_2500_SX;
1719 break;
1720 }
1721
1722 if (adapter->link_duplex == FULL_DUPLEX)
1723 ifmr->ifm_active |= IFM_FDX;
1724 else
1725 ifmr->ifm_active |= IFM_HDX;
1726
1727 IGB_CORE_UNLOCK(adapter);
1728}
1729
1730/*********************************************************************
1731 *
1732 * Media Ioctl callback
1733 *
1734 * This routine is called when the user changes speed/duplex using
1735 * media/mediopt option with ifconfig.
1736 *
1737 **********************************************************************/
1738static int
1739igb_media_change(struct ifnet *ifp)
1740{
1741 struct adapter *adapter = ifp->if_softc;
1742 struct ifmedia *ifm = &adapter->media;
1743
1744 INIT_DEBUGOUT("igb_media_change: begin");
1745
1746 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1747 return (EINVAL);
1748
1749 IGB_CORE_LOCK(adapter);
1750 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1751 case IFM_AUTO:
1752 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1753 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1754 break;
1755 case IFM_1000_LX:
1756 case IFM_1000_SX:
1757 case IFM_1000_T:
1758 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1759 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1760 break;
1761 case IFM_100_TX:
1762 adapter->hw.mac.autoneg = FALSE;
1763 adapter->hw.phy.autoneg_advertised = 0;
1764 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1765 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1766 else
1767 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1768 break;
1769 case IFM_10_T:
1770 adapter->hw.mac.autoneg = FALSE;
1771 adapter->hw.phy.autoneg_advertised = 0;
1772 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1773 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1774 else
1775 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1776 break;
1777 default:
1778 device_printf(adapter->dev, "Unsupported media type\n");
1779 }
1780
1781 igb_init_locked(adapter);
1782 IGB_CORE_UNLOCK(adapter);
1783
1784 return (0);
1785}
1786
1787
1788/*********************************************************************
1789 *
1790 * This routine maps the mbufs to Advanced TX descriptors.
1791 *
1792 **********************************************************************/
1793static int
1794igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1795{
1796 struct adapter *adapter = txr->adapter;
1797 u32 olinfo_status = 0, cmd_type_len;
1798 int i, j, error, nsegs;
1799 int first;
1800 bool remap = TRUE;
1801 struct mbuf *m_head;
1802 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1803 bus_dmamap_t map;
1804 struct igb_tx_buf *txbuf;
1805 union e1000_adv_tx_desc *txd = NULL;
1806
1807 m_head = *m_headp;
1808
1809 /* Basic descriptor defines */
1810 cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1811 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1812
1813 if (m_head->m_flags & M_VLANTAG)
1814 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1815
1816 /*
1817 * Important to capture the first descriptor
1818 * used because it will contain the index of
1819 * the one we tell the hardware to report back
1820 */
1821 first = txr->next_avail_desc;
1822 txbuf = &txr->tx_buffers[first];
1823 map = txbuf->map;
1824
1825 /*
1826 * Map the packet for DMA.
1827 */
1828retry:
1829 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1830 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1831
1832 if (__predict_false(error)) {
1833 struct mbuf *m;
1834
1835 switch (error) {
1836 case EFBIG:
1837 /* Try it again? - one try */
1838 if (remap == TRUE) {
1839 remap = FALSE;
1840 m = m_collapse(*m_headp, M_NOWAIT,
1841 IGB_MAX_SCATTER);
1842 if (m == NULL) {
1843 adapter->mbuf_defrag_failed++;
1844 m_freem(*m_headp);
1845 *m_headp = NULL;
1846 return (ENOBUFS);
1847 }
1848 *m_headp = m;
1849 goto retry;
1850 } else
1851 return (error);
1852 default:
1853 txr->no_tx_dma_setup++;
1854 m_freem(*m_headp);
1855 *m_headp = NULL;
1856 return (error);
1857 }
1858 }
1859
1860 /* Make certain there are enough descriptors */
1861 if (nsegs > txr->tx_avail - 2) {
1862 txr->no_desc_avail++;
1863 bus_dmamap_unload(txr->txtag, map);
1864 return (ENOBUFS);
1865 }
1866 m_head = *m_headp;
1867
1868 /*
1869 ** Set up the appropriate offload context
1870 ** this will consume the first descriptor
1871 */
1872 error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1873 if (__predict_false(error)) {
1874 m_freem(*m_headp);
1875 *m_headp = NULL;
1876 return (error);
1877 }
1878
1879 /* 82575 needs the queue index added */
1880 if (adapter->hw.mac.type == e1000_82575)
1881 olinfo_status |= txr->me << 4;
1882
1883 i = txr->next_avail_desc;
1884 for (j = 0; j < nsegs; j++) {
1885 bus_size_t seglen;
1886 bus_addr_t segaddr;
1887
1888 txbuf = &txr->tx_buffers[i];
1889 txd = &txr->tx_base[i];
1890 seglen = segs[j].ds_len;
1891 segaddr = htole64(segs[j].ds_addr);
1892
1893 txd->read.buffer_addr = segaddr;
1894 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1895 cmd_type_len | seglen);
1896 txd->read.olinfo_status = htole32(olinfo_status);
1897
1898 if (++i == txr->num_desc)
1899 i = 0;
1900 }
1901
1902 txd->read.cmd_type_len |=
1903 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1904 txr->tx_avail -= nsegs;
1905 txr->next_avail_desc = i;
1906
1907 txbuf->m_head = m_head;
1908 /*
1909 ** Here we swap the map so the last descriptor,
1910 ** which gets the completion interrupt has the
1911 ** real map, and the first descriptor gets the
1912 ** unused map from this descriptor.
1913 */
1914 txr->tx_buffers[first].map = txbuf->map;
1915 txbuf->map = map;
1916 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1917
1918 /* Set the EOP descriptor that will be marked done */
1919 txbuf = &txr->tx_buffers[first];
1920 txbuf->eop = txd;
1921
1922 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1923 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1924 /*
1925 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1926 * hardware that this frame is available to transmit.
1927 */
1928 ++txr->total_packets;
1929 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1930
1931 return (0);
1932}
1933static void
1934igb_set_promisc(struct adapter *adapter)
1935{
1936 struct ifnet *ifp = adapter->ifp;
1937 struct e1000_hw *hw = &adapter->hw;
1938 u32 reg;
1939
1940 if (adapter->vf_ifp) {
1941 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1942 return;
1943 }
1944
1945 reg = E1000_READ_REG(hw, E1000_RCTL);
1946 if (ifp->if_flags & IFF_PROMISC) {
1947 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1948 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1949 } else if (ifp->if_flags & IFF_ALLMULTI) {
1950 reg |= E1000_RCTL_MPE;
1951 reg &= ~E1000_RCTL_UPE;
1952 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1953 }
1954}
1955
1956static void
1957igb_disable_promisc(struct adapter *adapter)
1958{
1959 struct e1000_hw *hw = &adapter->hw;
1960 struct ifnet *ifp = adapter->ifp;
1961 u32 reg;
1962 int mcnt = 0;
1963
1964 if (adapter->vf_ifp) {
1965 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1966 return;
1967 }
1968 reg = E1000_READ_REG(hw, E1000_RCTL);
1969 reg &= (~E1000_RCTL_UPE);
1970 if (ifp->if_flags & IFF_ALLMULTI)
1971 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1972 else {
1973 struct ifmultiaddr *ifma;
1974#if __FreeBSD_version < 800000
1975 IF_ADDR_LOCK(ifp);
1976#else
1977 if_maddr_rlock(ifp);
1978#endif
1979 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1980 if (ifma->ifma_addr->sa_family != AF_LINK)
1981 continue;
1982 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1983 break;
1984 mcnt++;
1985 }
1986#if __FreeBSD_version < 800000
1987 IF_ADDR_UNLOCK(ifp);
1988#else
1989 if_maddr_runlock(ifp);
1990#endif
1991 }
1992 /* Don't disable if in MAX groups */
1993 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1994 reg &= (~E1000_RCTL_MPE);
1995 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1996}
1997
1998
1999/*********************************************************************
2000 * Multicast Update
2001 *
2002 * This routine is called whenever multicast address list is updated.
2003 *
2004 **********************************************************************/
2005
2006static void
2007igb_set_multi(struct adapter *adapter)
2008{
2009 struct ifnet *ifp = adapter->ifp;
2010 struct ifmultiaddr *ifma;
2011 u32 reg_rctl = 0;
2012 u8 *mta;
2013
2014 int mcnt = 0;
2015
2016 IOCTL_DEBUGOUT("igb_set_multi: begin");
2017
2018 mta = adapter->mta;
2019 bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2020 MAX_NUM_MULTICAST_ADDRESSES);
2021
2022#if __FreeBSD_version < 800000
2023 IF_ADDR_LOCK(ifp);
2024#else
2025 if_maddr_rlock(ifp);
2026#endif
2027 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2028 if (ifma->ifma_addr->sa_family != AF_LINK)
2029 continue;
2030
2031 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2032 break;
2033
2034 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2035 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2036 mcnt++;
2037 }
2038#if __FreeBSD_version < 800000
2039 IF_ADDR_UNLOCK(ifp);
2040#else
2041 if_maddr_runlock(ifp);
2042#endif
2043
2044 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2045 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2046 reg_rctl |= E1000_RCTL_MPE;
2047 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2048 } else
2049 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2050}
2051
2052
2053/*********************************************************************
2054 * Timer routine:
2055 * This routine checks for link status,
2056 * updates statistics, and does the watchdog.
2057 *
2058 **********************************************************************/
2059
2060static void
2061igb_local_timer(void *arg)
2062{
2063 struct adapter *adapter = arg;
2064 device_t dev = adapter->dev;
2065 struct ifnet *ifp = adapter->ifp;
2066 struct tx_ring *txr = adapter->tx_rings;
2067 struct igb_queue *que = adapter->queues;
2068 int hung = 0, busy = 0;
2069
2070
2071 IGB_CORE_LOCK_ASSERT(adapter);
2072
2073 igb_update_link_status(adapter);
2074 igb_update_stats_counters(adapter);
2075
2076 /*
2077 ** Check the TX queues status
2078 ** - central locked handling of OACTIVE
2079 ** - watchdog only if all queues show hung
2080 */
2081 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2082 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2083 (adapter->pause_frames == 0))
2084 ++hung;
2085 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2086 ++busy;
2087 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2088 taskqueue_enqueue(que->tq, &que->que_task);
2089 }
2090 if (hung == adapter->num_queues)
2091 goto timeout;
2092 if (busy == adapter->num_queues)
2093 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2094 else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2095 (busy < adapter->num_queues))
2096 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2097
2098 adapter->pause_frames = 0;
2099 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2100#ifndef DEVICE_POLLING
2101 /* Schedule all queue interrupts - deadlock protection */
2102 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2103#endif
2104 return;
2105
2106timeout:
2107 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2108 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2109 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2110 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2111 device_printf(dev,"TX(%d) desc avail = %d,"
2112 "Next TX to Clean = %d\n",
2113 txr->me, txr->tx_avail, txr->next_to_clean);
2114 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2115 adapter->watchdog_events++;
2116 igb_init_locked(adapter);
2117}
2118
2119static void
2120igb_update_link_status(struct adapter *adapter)
2121{
2122 struct e1000_hw *hw = &adapter->hw;
2123 struct e1000_fc_info *fc = &hw->fc;
2124 struct ifnet *ifp = adapter->ifp;
2125 device_t dev = adapter->dev;
2126 struct tx_ring *txr = adapter->tx_rings;
2127 u32 link_check, thstat, ctrl;
2128 char *flowctl = NULL;
2129
2130 link_check = thstat = ctrl = 0;
2131
2132 /* Get the cached link value or read for real */
2133 switch (hw->phy.media_type) {
2134 case e1000_media_type_copper:
2135 if (hw->mac.get_link_status) {
2136 /* Do the work to read phy */
2137 e1000_check_for_link(hw);
2138 link_check = !hw->mac.get_link_status;
2139 } else
2140 link_check = TRUE;
2141 break;
2142 case e1000_media_type_fiber:
2143 e1000_check_for_link(hw);
2144 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2145 E1000_STATUS_LU);
2146 break;
2147 case e1000_media_type_internal_serdes:
2148 e1000_check_for_link(hw);
2149 link_check = adapter->hw.mac.serdes_has_link;
2150 break;
2151 /* VF device is type_unknown */
2152 case e1000_media_type_unknown:
2153 e1000_check_for_link(hw);
2154 link_check = !hw->mac.get_link_status;
2155 /* Fall thru */
2156 default:
2157 break;
2158 }
2159
2160 /* Check for thermal downshift or shutdown */
2161 if (hw->mac.type == e1000_i350) {
2162 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2163 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2164 }
2165
2166 /* Get the flow control for display */
2167 switch (fc->current_mode) {
2168 case e1000_fc_rx_pause:
2169 flowctl = "RX";
2170 break;
2171 case e1000_fc_tx_pause:
2172 flowctl = "TX";
2173 break;
2174 case e1000_fc_full:
2175 flowctl = "Full";
2176 break;
2177 case e1000_fc_none:
2178 default:
2179 flowctl = "None";
2180 break;
2181 }
2182
2183 /* Now we check if a transition has happened */
2184 if (link_check && (adapter->link_active == 0)) {
2185 e1000_get_speed_and_duplex(&adapter->hw,
2186 &adapter->link_speed, &adapter->link_duplex);
2187 if (bootverbose)
2188 device_printf(dev, "Link is up %d Mbps %s,"
2189 " Flow Control: %s\n",
2190 adapter->link_speed,
2191 ((adapter->link_duplex == FULL_DUPLEX) ?
2192 "Full Duplex" : "Half Duplex"), flowctl);
2193 adapter->link_active = 1;
2194 ifp->if_baudrate = adapter->link_speed * 1000000;
2195 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2196 (thstat & E1000_THSTAT_LINK_THROTTLE))
2197 device_printf(dev, "Link: thermal downshift\n");
2198 /* Delay Link Up for Phy update */
2199 if (((hw->mac.type == e1000_i210) ||
2200 (hw->mac.type == e1000_i211)) &&
2201 (hw->phy.id == I210_I_PHY_ID))
2202 msec_delay(I210_LINK_DELAY);
2203 /* Reset if the media type changed. */
2204 if (hw->dev_spec._82575.media_changed) {
2205 hw->dev_spec._82575.media_changed = false;
2206 adapter->flags |= IGB_MEDIA_RESET;
2207 igb_reset(adapter);
2208 }
2209 /* This can sleep */
2210 if_link_state_change(ifp, LINK_STATE_UP);
2211 } else if (!link_check && (adapter->link_active == 1)) {
2212 ifp->if_baudrate = adapter->link_speed = 0;
2213 adapter->link_duplex = 0;
2214 if (bootverbose)
2215 device_printf(dev, "Link is Down\n");
2216 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2217 (thstat & E1000_THSTAT_PWR_DOWN))
2218 device_printf(dev, "Link: thermal shutdown\n");
2219 adapter->link_active = 0;
2220 /* This can sleep */
2221 if_link_state_change(ifp, LINK_STATE_DOWN);
2222 /* Reset queue state */
2223 for (int i = 0; i < adapter->num_queues; i++, txr++)
2224 txr->queue_status = IGB_QUEUE_IDLE;
2225 }
2226}
2227
2228/*********************************************************************
2229 *
2230 * This routine disables all traffic on the adapter by issuing a
2231 * global reset on the MAC and deallocates TX/RX buffers.
2232 *
2233 **********************************************************************/
2234
2235static void
2236igb_stop(void *arg)
2237{
2238 struct adapter *adapter = arg;
2239 struct ifnet *ifp = adapter->ifp;
2240 struct tx_ring *txr = adapter->tx_rings;
2241
2242 IGB_CORE_LOCK_ASSERT(adapter);
2243
2244 INIT_DEBUGOUT("igb_stop: begin");
2245
2246 igb_disable_intr(adapter);
2247
2248 callout_stop(&adapter->timer);
2249
2250 /* Tell the stack that the interface is no longer active */
2251 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2252 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2253
2254 /* Disarm watchdog timer. */
2255 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2256 IGB_TX_LOCK(txr);
2257 txr->queue_status = IGB_QUEUE_IDLE;
2258 IGB_TX_UNLOCK(txr);
2259 }
2260
2261 e1000_reset_hw(&adapter->hw);
2262 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2263
2264 e1000_led_off(&adapter->hw);
2265 e1000_cleanup_led(&adapter->hw);
2266}
2267
2268
2269/*********************************************************************
2270 *
2271 * Determine hardware revision.
2272 *
2273 **********************************************************************/
2274static void
2275igb_identify_hardware(struct adapter *adapter)
2276{
2277 device_t dev = adapter->dev;
2278
2279 /* Make sure our PCI config space has the necessary stuff set */
2280 pci_enable_busmaster(dev);
2281 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2282
2283 /* Save off the information about this board */
2284 adapter->hw.vendor_id = pci_get_vendor(dev);
2285 adapter->hw.device_id = pci_get_device(dev);
2286 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2287 adapter->hw.subsystem_vendor_id =
2288 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2289 adapter->hw.subsystem_device_id =
2290 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2291
2292 /* Set MAC type early for PCI setup */
2293 e1000_set_mac_type(&adapter->hw);
2294
2295 /* Are we a VF device? */
2296 if ((adapter->hw.mac.type == e1000_vfadapt) ||
2297 (adapter->hw.mac.type == e1000_vfadapt_i350))
2298 adapter->vf_ifp = 1;
2299 else
2300 adapter->vf_ifp = 0;
2301}
2302
2303static int
2304igb_allocate_pci_resources(struct adapter *adapter)
2305{
2306 device_t dev = adapter->dev;
2307 int rid;
2308
2309 rid = PCIR_BAR(0);
2310 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2311 &rid, RF_ACTIVE);
2312 if (adapter->pci_mem == NULL) {
2313 device_printf(dev, "Unable to allocate bus resource: memory\n");
2314 return (ENXIO);
2315 }
2316 adapter->osdep.mem_bus_space_tag =
2317 rman_get_bustag(adapter->pci_mem);
2318 adapter->osdep.mem_bus_space_handle =
2319 rman_get_bushandle(adapter->pci_mem);
2320 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2321
2322 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2323
2324 /* This will setup either MSI/X or MSI */
2325 adapter->msix = igb_setup_msix(adapter);
2326 adapter->hw.back = &adapter->osdep;
2327
2328 return (0);
2329}
2330
2331/*********************************************************************
2332 *
2333 * Setup the Legacy or MSI Interrupt handler
2334 *
2335 **********************************************************************/
2336static int
2337igb_allocate_legacy(struct adapter *adapter)
2338{
2339 device_t dev = adapter->dev;
2340 struct igb_queue *que = adapter->queues;
2341#ifndef IGB_LEGACY_TX
2342 struct tx_ring *txr = adapter->tx_rings;
2343#endif
2344 int error, rid = 0;
2345
2346 /* Turn off all interrupts */
2347 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2348
2349 /* MSI RID is 1 */
2350 if (adapter->msix == 1)
2351 rid = 1;
2352
2353 /* We allocate a single interrupt resource */
2354 adapter->res = bus_alloc_resource_any(dev,
2355 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2356 if (adapter->res == NULL) {
2357 device_printf(dev, "Unable to allocate bus resource: "
2358 "interrupt\n");
2359 return (ENXIO);
2360 }
2361
2362#ifndef IGB_LEGACY_TX
2363 TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2364#endif
2365
2366 /*
2367 * Try allocating a fast interrupt and the associated deferred
2368 * processing contexts.
2369 */
2370 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2371 /* Make tasklet for deferred link handling */
2372 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2373 que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2374 taskqueue_thread_enqueue, &que->tq);
2375 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2376 device_get_nameunit(adapter->dev));
2377 if ((error = bus_setup_intr(dev, adapter->res,
2378 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2379 adapter, &adapter->tag)) != 0) {
2380 device_printf(dev, "Failed to register fast interrupt "
2381 "handler: %d\n", error);
2382 taskqueue_free(que->tq);
2383 que->tq = NULL;
2384 return (error);
2385 }
2386
2387 return (0);
2388}
2389
2390
2391/*********************************************************************
2392 *
2393 * Setup the MSIX Queue Interrupt handlers:
2394 *
2395 **********************************************************************/
2396static int
2397igb_allocate_msix(struct adapter *adapter)
2398{
2399 device_t dev = adapter->dev;
2400 struct igb_queue *que = adapter->queues;
2401 int error, rid, vector = 0;
2402 int cpu_id = 0;
2403#ifdef RSS
2404 cpuset_t cpu_mask;
2405#endif
2406
2407 /* Be sure to start with all interrupts disabled */
2408 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2409 E1000_WRITE_FLUSH(&adapter->hw);
2410
2411#ifdef RSS
2412 /*
2413 * If we're doing RSS, the number of queues needs to
2414 * match the number of RSS buckets that are configured.
2415 *
2416 * + If there's more queues than RSS buckets, we'll end
2417 * up with queues that get no traffic.
2418 *
2419 * + If there's more RSS buckets than queues, we'll end
2420 * up having multiple RSS buckets map to the same queue,
2421 * so there'll be some contention.
2422 */
2423 if (adapter->num_queues != rss_getnumbuckets()) {
2424 device_printf(dev,
2425 "%s: number of queues (%d) != number of RSS buckets (%d)"
2426 "; performance will be impacted.\n",
2427 __func__,
2428 adapter->num_queues,
2429 rss_getnumbuckets());
2430 }
2431#endif
2432
2433 for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2434 rid = vector +1;
2435 que->res = bus_alloc_resource_any(dev,
2436 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2437 if (que->res == NULL) {
2438 device_printf(dev,
2439 "Unable to allocate bus resource: "
2440 "MSIX Queue Interrupt\n");
2441 return (ENXIO);
2442 }
2443 error = bus_setup_intr(dev, que->res,
2444 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2445 igb_msix_que, que, &que->tag);
2446 if (error) {
2447 que->res = NULL;
2448 device_printf(dev, "Failed to register Queue handler");
2449 return (error);
2450 }
2451#if __FreeBSD_version >= 800504
2452 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2453#endif
2454 que->msix = vector;
2455 if (adapter->hw.mac.type == e1000_82575)
2456 que->eims = E1000_EICR_TX_QUEUE0 << i;
2457 else
2458 que->eims = 1 << vector;
2459
2460#ifdef RSS
2461 /*
2462 * The queue ID is used as the RSS layer bucket ID.
2463 * We look up the queue ID -> RSS CPU ID and select
2464 * that.
2465 */
2466 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2467#else
2468 /*
2469 * Bind the msix vector, and thus the
2470 * rings to the corresponding cpu.
2471 *
2472 * This just happens to match the default RSS round-robin
2473 * bucket -> queue -> CPU allocation.
2474 */
2475 if (adapter->num_queues > 1) {
2476 if (igb_last_bind_cpu < 0)
2477 igb_last_bind_cpu = CPU_FIRST();
2478 cpu_id = igb_last_bind_cpu;
2479 }
2480#endif
2481
2482 if (adapter->num_queues > 1) {
2483 bus_bind_intr(dev, que->res, cpu_id);
2484#ifdef RSS
2485 device_printf(dev,
2486 "Bound queue %d to RSS bucket %d\n",
2487 i, cpu_id);
2488#else
2489 device_printf(dev,
2490 "Bound queue %d to cpu %d\n",
2491 i, cpu_id);
2492#endif
2493 }
2494
2495#ifndef IGB_LEGACY_TX
2496 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2497 que->txr);
2498#endif
2499 /* Make tasklet for deferred handling */
2500 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2501 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2502 taskqueue_thread_enqueue, &que->tq);
2503 if (adapter->num_queues > 1) {
2504 /*
2505 * Only pin the taskqueue thread to a CPU if
2506 * RSS is in use.
2507 *
2508 * This again just happens to match the default RSS
2509 * round-robin bucket -> queue -> CPU allocation.
2510 */
2511#ifdef RSS
2512 CPU_SETOF(cpu_id, &cpu_mask);
2513 taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2514 &cpu_mask,
2515 "%s que (bucket %d)",
2516 device_get_nameunit(adapter->dev),
2517 cpu_id);
2518#else
2519 taskqueue_start_threads(&que->tq, 1, PI_NET,
2520 "%s que (qid %d)",
2521 device_get_nameunit(adapter->dev),
2522 cpu_id);
2523#endif
2524 } else {
2525 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2526 device_get_nameunit(adapter->dev));
2527 }
2528
2529 /* Finally update the last bound CPU id */
2530 if (adapter->num_queues > 1)
2531 igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2532 }
2533
2534 /* And Link */
2535 rid = vector + 1;
2536 adapter->res = bus_alloc_resource_any(dev,
2537 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2538 if (adapter->res == NULL) {
2539 device_printf(dev,
2540 "Unable to allocate bus resource: "
2541 "MSIX Link Interrupt\n");
2542 return (ENXIO);
2543 }
2544 if ((error = bus_setup_intr(dev, adapter->res,
2545 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2546 igb_msix_link, adapter, &adapter->tag)) != 0) {
2547 device_printf(dev, "Failed to register Link handler");
2548 return (error);
2549 }
2550#if __FreeBSD_version >= 800504
2551 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2552#endif
2553 adapter->linkvec = vector;
2554
2555 return (0);
2556}
2557
2558
2559static void
2560igb_configure_queues(struct adapter *adapter)
2561{
2562 struct e1000_hw *hw = &adapter->hw;
2563 struct igb_queue *que;
2564 u32 tmp, ivar = 0, newitr = 0;
2565
2566 /* First turn on RSS capability */
2567 if (adapter->hw.mac.type != e1000_82575)
2568 E1000_WRITE_REG(hw, E1000_GPIE,
2569 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2570 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2571
2572 /* Turn on MSIX */
2573 switch (adapter->hw.mac.type) {
2574 case e1000_82580:
2575 case e1000_i350:
2576 case e1000_i354:
2577 case e1000_i210:
2578 case e1000_i211:
2579 case e1000_vfadapt:
2580 case e1000_vfadapt_i350:
2581 /* RX entries */
2582 for (int i = 0; i < adapter->num_queues; i++) {
2583 u32 index = i >> 1;
2584 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2585 que = &adapter->queues[i];
2586 if (i & 1) {
2587 ivar &= 0xFF00FFFF;
2588 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2589 } else {
2590 ivar &= 0xFFFFFF00;
2591 ivar |= que->msix | E1000_IVAR_VALID;
2592 }
2593 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2594 }
2595 /* TX entries */
2596 for (int i = 0; i < adapter->num_queues; i++) {
2597 u32 index = i >> 1;
2598 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2599 que = &adapter->queues[i];
2600 if (i & 1) {
2601 ivar &= 0x00FFFFFF;
2602 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2603 } else {
2604 ivar &= 0xFFFF00FF;
2605 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2606 }
2607 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2608 adapter->que_mask |= que->eims;
2609 }
2610
2611 /* And for the link interrupt */
2612 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2613 adapter->link_mask = 1 << adapter->linkvec;
2614 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2615 break;
2616 case e1000_82576:
2617 /* RX entries */
2618 for (int i = 0; i < adapter->num_queues; i++) {
2619 u32 index = i & 0x7; /* Each IVAR has two entries */
2620 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2621 que = &adapter->queues[i];
2622 if (i < 8) {
2623 ivar &= 0xFFFFFF00;
2624 ivar |= que->msix | E1000_IVAR_VALID;
2625 } else {
2626 ivar &= 0xFF00FFFF;
2627 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2628 }
2629 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2630 adapter->que_mask |= que->eims;
2631 }
2632 /* TX entries */
2633 for (int i = 0; i < adapter->num_queues; i++) {
2634 u32 index = i & 0x7; /* Each IVAR has two entries */
2635 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2636 que = &adapter->queues[i];
2637 if (i < 8) {
2638 ivar &= 0xFFFF00FF;
2639 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2640 } else {
2641 ivar &= 0x00FFFFFF;
2642 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2643 }
2644 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2645 adapter->que_mask |= que->eims;
2646 }
2647
2648 /* And for the link interrupt */
2649 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2650 adapter->link_mask = 1 << adapter->linkvec;
2651 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2652 break;
2653
2654 case e1000_82575:
2655 /* enable MSI-X support*/
2656 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2657 tmp |= E1000_CTRL_EXT_PBA_CLR;
2658 /* Auto-Mask interrupts upon ICR read. */
2659 tmp |= E1000_CTRL_EXT_EIAME;
2660 tmp |= E1000_CTRL_EXT_IRCA;
2661 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2662
2663 /* Queues */
2664 for (int i = 0; i < adapter->num_queues; i++) {
2665 que = &adapter->queues[i];
2666 tmp = E1000_EICR_RX_QUEUE0 << i;
2667 tmp |= E1000_EICR_TX_QUEUE0 << i;
2668 que->eims = tmp;
2669 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2670 i, que->eims);
2671 adapter->que_mask |= que->eims;
2672 }
2673
2674 /* Link */
2675 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2676 E1000_EIMS_OTHER);
2677 adapter->link_mask |= E1000_EIMS_OTHER;
2678 default:
2679 break;
2680 }
2681
2682 /* Set the starting interrupt rate */
2683 if (igb_max_interrupt_rate > 0)
2684 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2685
2686 if (hw->mac.type == e1000_82575)
2687 newitr |= newitr << 16;
2688 else
2689 newitr |= E1000_EITR_CNT_IGNR;
2690
2691 for (int i = 0; i < adapter->num_queues; i++) {
2692 que = &adapter->queues[i];
2693 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2694 }
2695
2696 return;
2697}
2698
2699
2700static void
2701igb_free_pci_resources(struct adapter *adapter)
2702{
2703 struct igb_queue *que = adapter->queues;
2704 device_t dev = adapter->dev;
2705 int rid;
2706
2707 /*
2708 ** There is a slight possibility of a failure mode
2709 ** in attach that will result in entering this function
2710 ** before interrupt resources have been initialized, and
2711 ** in that case we do not want to execute the loops below
2712 ** We can detect this reliably by the state of the adapter
2713 ** res pointer.
2714 */
2715 if (adapter->res == NULL)
2716 goto mem;
2717
2718 /*
2719 * First release all the interrupt resources:
2720 */
2721 for (int i = 0; i < adapter->num_queues; i++, que++) {
2722 rid = que->msix + 1;
2723 if (que->tag != NULL) {
2724 bus_teardown_intr(dev, que->res, que->tag);
2725 que->tag = NULL;
2726 }
2727 if (que->res != NULL)
2728 bus_release_resource(dev,
2729 SYS_RES_IRQ, rid, que->res);
2730 }
2731
2732 /* Clean the Legacy or Link interrupt last */
2733 if (adapter->linkvec) /* we are doing MSIX */
2734 rid = adapter->linkvec + 1;
2735 else
2736 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2737
2738 que = adapter->queues;
2739 if (adapter->tag != NULL) {
2740 taskqueue_drain(que->tq, &adapter->link_task);
2741 bus_teardown_intr(dev, adapter->res, adapter->tag);
2742 adapter->tag = NULL;
2743 }
2744 if (adapter->res != NULL)
2745 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2746
2747 for (int i = 0; i < adapter->num_queues; i++, que++) {
2748 if (que->tq != NULL) {
2749#ifndef IGB_LEGACY_TX
2750 taskqueue_drain(que->tq, &que->txr->txq_task);
2751#endif
2752 taskqueue_drain(que->tq, &que->que_task);
2753 taskqueue_free(que->tq);
2754 }
2755 }
2756mem:
2757 if (adapter->msix)
2758 pci_release_msi(dev);
2759
2760 if (adapter->msix_mem != NULL)
2761 bus_release_resource(dev, SYS_RES_MEMORY,
2762 adapter->memrid, adapter->msix_mem);
2763
2764 if (adapter->pci_mem != NULL)
2765 bus_release_resource(dev, SYS_RES_MEMORY,
2766 PCIR_BAR(0), adapter->pci_mem);
2767
2768}
2769
2770/*
2771 * Setup Either MSI/X or MSI
2772 */
2773static int
2774igb_setup_msix(struct adapter *adapter)
2775{
2776 device_t dev = adapter->dev;
2777 int bar, want, queues, msgs, maxqueues;
2778
2779 /* tuneable override */
2780 if (igb_enable_msix == 0)
2781 goto msi;
2782
2783 /* First try MSI/X */
2784 msgs = pci_msix_count(dev);
2785 if (msgs == 0)
2786 goto msi;
2787 /*
2788 ** Some new devices, as with ixgbe, now may
2789 ** use a different BAR, so we need to keep
2790 ** track of which is used.
2791 */
2792 adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2793 bar = pci_read_config(dev, adapter->memrid, 4);
2794 if (bar == 0) /* use next bar */
2795 adapter->memrid += 4;
2796 adapter->msix_mem = bus_alloc_resource_any(dev,
2797 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2798 if (adapter->msix_mem == NULL) {
2799 /* May not be enabled */
2800 device_printf(adapter->dev,
2801 "Unable to map MSIX table \n");
2802 goto msi;
2803 }
2804
2805 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2806
2807 /* Override via tuneable */
2808 if (igb_num_queues != 0)
2809 queues = igb_num_queues;
2810
2811#ifdef RSS
2812 /* If we're doing RSS, clamp at the number of RSS buckets */
2813 if (queues > rss_getnumbuckets())
2814 queues = rss_getnumbuckets();
2815#endif
2816
2817
2818 /* Sanity check based on HW */
2819 switch (adapter->hw.mac.type) {
2820 case e1000_82575:
2821 maxqueues = 4;
2822 break;
2823 case e1000_82576:
2824 case e1000_82580:
2825 case e1000_i350:
2826 case e1000_i354:
2827 maxqueues = 8;
2828 break;
2829 case e1000_i210:
2830 maxqueues = 4;
2831 break;
2832 case e1000_i211:
2833 maxqueues = 2;
2834 break;
2835 default: /* VF interfaces */
2836 maxqueues = 1;
2837 break;
2838 }
2839
2840 /* Final clamp on the actual hardware capability */
2841 if (queues > maxqueues)
2842 queues = maxqueues;
2843
2844 /*
2845 ** One vector (RX/TX pair) per queue
2846 ** plus an additional for Link interrupt
2847 */
2848 want = queues + 1;
2849 if (msgs >= want)
2850 msgs = want;
2851 else {
2852 device_printf(adapter->dev,
2853 "MSIX Configuration Problem, "
2854 "%d vectors configured, but %d queues wanted!\n",
2855 msgs, want);
2856 goto msi;
2857 }
2858 if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2859 device_printf(adapter->dev,
2860 "Using MSIX interrupts with %d vectors\n", msgs);
2861 adapter->num_queues = queues;
2862 return (msgs);
2863 }
2864 /*
2865 ** If MSIX alloc failed or provided us with
2866 ** less than needed, free and fall through to MSI
2867 */
2868 pci_release_msi(dev);
2869
2870msi:
2871 if (adapter->msix_mem != NULL) {
2872 bus_release_resource(dev, SYS_RES_MEMORY,
2873 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2874 adapter->msix_mem = NULL;
2875 }
2876 msgs = 1;
2877 if (pci_alloc_msi(dev, &msgs) == 0) {
2878 device_printf(adapter->dev," Using an MSI interrupt\n");
2879 return (msgs);
2880 }
2881 device_printf(adapter->dev," Using a Legacy interrupt\n");
2882 return (0);
2883}
2884
2885/*********************************************************************
2886 *
2887 * Initialize the DMA Coalescing feature
2888 *
2889 **********************************************************************/
2890static void
2891igb_init_dmac(struct adapter *adapter, u32 pba)
2892{
2893 device_t dev = adapter->dev;
2894 struct e1000_hw *hw = &adapter->hw;
2895 u32 dmac, reg = ~E1000_DMACR_DMAC_EN;
2896 u16 hwm;
2897
2898 if (hw->mac.type == e1000_i211)
2899 return;
2900
2901 if (hw->mac.type > e1000_82580) {
2902
2903 if (adapter->dmac == 0) { /* Disabling it */
2904 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2905 return;
2906 } else
2907 device_printf(dev, "DMA Coalescing enabled\n");
2908
2909 /* Set starting threshold */
2910 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2911
2912 hwm = 64 * pba - adapter->max_frame_size / 16;
2913 if (hwm < 64 * (pba - 6))
2914 hwm = 64 * (pba - 6);
2915 reg = E1000_READ_REG(hw, E1000_FCRTC);
2916 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2917 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2918 & E1000_FCRTC_RTH_COAL_MASK);
2919 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2920
2921
2922 dmac = pba - adapter->max_frame_size / 512;
2923 if (dmac < pba - 10)
2924 dmac = pba - 10;
2925 reg = E1000_READ_REG(hw, E1000_DMACR);
2926 reg &= ~E1000_DMACR_DMACTHR_MASK;
2927 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2928 & E1000_DMACR_DMACTHR_MASK);
2929
2930 /* transition to L0x or L1 if available..*/
2931 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2932
2933 /* Check if status is 2.5Gb backplane connection
2934 * before configuration of watchdog timer, which is
2935 * in msec values in 12.8usec intervals
2936 * watchdog timer= msec values in 32usec intervals
2937 * for non 2.5Gb connection
2938 */
2939 if (hw->mac.type == e1000_i354) {
2940 int status = E1000_READ_REG(hw, E1000_STATUS);
2941 if ((status & E1000_STATUS_2P5_SKU) &&
2942 (!(status & E1000_STATUS_2P5_SKU_OVER)))
2943 reg |= ((adapter->dmac * 5) >> 6);
2944 else
2945 reg |= (adapter->dmac >> 5);
2946 } else {
2947 reg |= (adapter->dmac >> 5);
2948 }
2949
2950 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2951
2952 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2953
2954 /* Set the interval before transition */
2955 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2956 if (hw->mac.type == e1000_i350)
2957 reg |= IGB_DMCTLX_DCFLUSH_DIS;
2958 /*
2959 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2960 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2961 */
2962 if (hw->mac.type == e1000_i354) {
2963 int status = E1000_READ_REG(hw, E1000_STATUS);
2964 if ((status & E1000_STATUS_2P5_SKU) &&
2965 (!(status & E1000_STATUS_2P5_SKU_OVER)))
2966 reg |= 0xA;
2967 else
2968 reg |= 0x4;
2969 } else {
2970 reg |= 0x4;
2971 }
2972
2973 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2974
2975 /* free space in tx packet buffer to wake from DMA coal */
2976 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2977 (2 * adapter->max_frame_size)) >> 6);
2978
2979 /* make low power state decision controlled by DMA coal */
2980 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2981 reg &= ~E1000_PCIEMISC_LX_DECISION;
2982 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2983
2984 } else if (hw->mac.type == e1000_82580) {
2985 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2986 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2987 reg & ~E1000_PCIEMISC_LX_DECISION);
2988 E1000_WRITE_REG(hw, E1000_DMACR, 0);
2989 }
2990}
2991
2992
2993/*********************************************************************
2994 *
2995 * Set up an fresh starting state
2996 *
2997 **********************************************************************/
2998static void
2999igb_reset(struct adapter *adapter)
3000{
3001 device_t dev = adapter->dev;
3002 struct e1000_hw *hw = &adapter->hw;
3003 struct e1000_fc_info *fc = &hw->fc;
3004 struct ifnet *ifp = adapter->ifp;
3005 u32 pba = 0;
3006 u16 hwm;
3007
3008 INIT_DEBUGOUT("igb_reset: begin");
3009
3010 /* Let the firmware know the OS is in control */
3011 igb_get_hw_control(adapter);
3012
3013 /*
3014 * Packet Buffer Allocation (PBA)
3015 * Writing PBA sets the receive portion of the buffer
3016 * the remainder is used for the transmit buffer.
3017 */
3018 switch (hw->mac.type) {
3019 case e1000_82575:
3020 pba = E1000_PBA_32K;
3021 break;
3022 case e1000_82576:
3023 case e1000_vfadapt:
3024 pba = E1000_READ_REG(hw, E1000_RXPBS);
3025 pba &= E1000_RXPBS_SIZE_MASK_82576;
3026 break;
3027 case e1000_82580:
3028 case e1000_i350:
3029 case e1000_i354:
3030 case e1000_vfadapt_i350:
3031 pba = E1000_READ_REG(hw, E1000_RXPBS);
3032 pba = e1000_rxpbs_adjust_82580(pba);
3033 break;
3034 case e1000_i210:
3035 case e1000_i211:
3036 pba = E1000_PBA_34K;
3037 default:
3038 break;
3039 }
3040
3041 /* Special needs in case of Jumbo frames */
3042 if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3043 u32 tx_space, min_tx, min_rx;
3044 pba = E1000_READ_REG(hw, E1000_PBA);
3045 tx_space = pba >> 16;
3046 pba &= 0xffff;
3047 min_tx = (adapter->max_frame_size +
3048 sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3049 min_tx = roundup2(min_tx, 1024);
3050 min_tx >>= 10;
3051 min_rx = adapter->max_frame_size;
3052 min_rx = roundup2(min_rx, 1024);
3053 min_rx >>= 10;
3054 if (tx_space < min_tx &&
3055 ((min_tx - tx_space) < pba)) {
3056 pba = pba - (min_tx - tx_space);
3057 /*
3058 * if short on rx space, rx wins
3059 * and must trump tx adjustment
3060 */
3061 if (pba < min_rx)
3062 pba = min_rx;
3063 }
3064 E1000_WRITE_REG(hw, E1000_PBA, pba);
3065 }
3066
3067 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3068
3069 /*
3070 * These parameters control the automatic generation (Tx) and
3071 * response (Rx) to Ethernet PAUSE frames.
3072 * - High water mark should allow for at least two frames to be
3073 * received after sending an XOFF.
3074 * - Low water mark works best when it is very near the high water mark.
3075 * This allows the receiver to restart by sending XON when it has
3076 * drained a bit.
3077 */
3078 hwm = min(((pba << 10) * 9 / 10),
3079 ((pba << 10) - 2 * adapter->max_frame_size));
3080
3081 if (hw->mac.type < e1000_82576) {
3082 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
3083 fc->low_water = fc->high_water - 8;
3084 } else {
3085 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
3086 fc->low_water = fc->high_water - 16;
3087 }
3088
3089 fc->pause_time = IGB_FC_PAUSE_TIME;
3090 fc->send_xon = TRUE;
3091 if (adapter->fc)
3092 fc->requested_mode = adapter->fc;
3093 else
3094 fc->requested_mode = e1000_fc_default;
3095
3096 /* Issue a global reset */
3097 e1000_reset_hw(hw);
3098 E1000_WRITE_REG(hw, E1000_WUC, 0);
3099
3100 /* Reset for AutoMediaDetect */
3101 if (adapter->flags & IGB_MEDIA_RESET) {
3102 e1000_setup_init_funcs(hw, TRUE);
3103 e1000_get_bus_info(hw);
3104 adapter->flags &= ~IGB_MEDIA_RESET;
3105 }
3106
3107 if (e1000_init_hw(hw) < 0)
3108 device_printf(dev, "Hardware Initialization Failed\n");
3109
3110 /* Setup DMA Coalescing */
3111 igb_init_dmac(adapter, pba);
3112
3113 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3114 e1000_get_phy_info(hw);
3115 e1000_check_for_link(hw);
3116 return;
3117}
3118
3119/*********************************************************************
3120 *
3121 * Setup networking device structure and register an interface.
3122 *
3123 **********************************************************************/
3124static int
3125igb_setup_interface(device_t dev, struct adapter *adapter)
3126{
3127 struct ifnet *ifp;
3128
3129 INIT_DEBUGOUT("igb_setup_interface: begin");
3130
3131 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3132 if (ifp == NULL) {
3133 device_printf(dev, "can not allocate ifnet structure\n");
3134 return (-1);
3135 }
3136 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3137 ifp->if_init = igb_init;
3138 ifp->if_softc = adapter;
3139 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3140 ifp->if_ioctl = igb_ioctl;
3141 ifp->if_get_counter = igb_get_counter;
3142#ifndef IGB_LEGACY_TX
3143 ifp->if_transmit = igb_mq_start;
3144 ifp->if_qflush = igb_qflush;
3145#else
3146 ifp->if_start = igb_start;
3147 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3148 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3149 IFQ_SET_READY(&ifp->if_snd);
3150#endif
3151
3152 ether_ifattach(ifp, adapter->hw.mac.addr);
3153
3154 ifp->if_capabilities = ifp->if_capenable = 0;
3155
3156 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3157 ifp->if_capabilities |= IFCAP_TSO;
3158 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3159 ifp->if_capenable = ifp->if_capabilities;
3160
3161 /* Don't enable LRO by default */
3162 ifp->if_capabilities |= IFCAP_LRO;
3163
3164#ifdef DEVICE_POLLING
3165 ifp->if_capabilities |= IFCAP_POLLING;
3166#endif
3167
3168 /*
3169 * Tell the upper layer(s) we
3170 * support full VLAN capability.
3171 */
3172 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3173 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3174 | IFCAP_VLAN_HWTSO
3175 | IFCAP_VLAN_MTU;
3176 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3177 | IFCAP_VLAN_HWTSO
3178 | IFCAP_VLAN_MTU;
3179
3180 /*
3181 ** Don't turn this on by default, if vlans are
3182 ** created on another pseudo device (eg. lagg)
3183 ** then vlan events are not passed thru, breaking
3184 ** operation, but with HW FILTER off it works. If
3185 ** using vlans directly on the igb driver you can
3186 ** enable this and get full hardware tag filtering.
3187 */
3188 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3189
3190 /*
3191 * Specify the media types supported by this adapter and register
3192 * callbacks to update media and link information
3193 */
3194 ifmedia_init(&adapter->media, IFM_IMASK,
3195 igb_media_change, igb_media_status);
3196 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3197 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3198 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3199 0, NULL);
3200 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3201 } else {
3202 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3203 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3204 0, NULL);
3205 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3206 0, NULL);
3207 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3208 0, NULL);
3209 if (adapter->hw.phy.type != e1000_phy_ife) {
3210 ifmedia_add(&adapter->media,
3211 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3212 ifmedia_add(&adapter->media,
3213 IFM_ETHER | IFM_1000_T, 0, NULL);
3214 }
3215 }
3216 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3217 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3218 return (0);
3219}
3220
3221
3222/*
3223 * Manage DMA'able memory.
3224 */
3225static void
3226igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3227{
3228 if (error)
3229 return;
3230 *(bus_addr_t *) arg = segs[0].ds_addr;
3231}
3232
3233static int
3234igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3235 struct igb_dma_alloc *dma, int mapflags)
3236{
3237 int error;
3238
3239 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3240 IGB_DBA_ALIGN, 0, /* alignment, bounds */
3241 BUS_SPACE_MAXADDR, /* lowaddr */
3242 BUS_SPACE_MAXADDR, /* highaddr */
3243 NULL, NULL, /* filter, filterarg */
3244 size, /* maxsize */
3245 1, /* nsegments */
3246 size, /* maxsegsize */
3247 0, /* flags */
3248 NULL, /* lockfunc */
3249 NULL, /* lockarg */
3250 &dma->dma_tag);
3251 if (error) {
3252 device_printf(adapter->dev,
3253 "%s: bus_dma_tag_create failed: %d\n",
3254 __func__, error);
3255 goto fail_0;
3256 }
3257
3258 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3259 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3260 if (error) {
3261 device_printf(adapter->dev,
3262 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3263 __func__, (uintmax_t)size, error);
3264 goto fail_2;
3265 }
3266
3267 dma->dma_paddr = 0;
3268 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3269 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3270 if (error || dma->dma_paddr == 0) {
3271 device_printf(adapter->dev,
3272 "%s: bus_dmamap_load failed: %d\n",
3273 __func__, error);
3274 goto fail_3;
3275 }
3276
3277 return (0);
3278
3279fail_3:
3280 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3281fail_2:
3282 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3283 bus_dma_tag_destroy(dma->dma_tag);
3284fail_0:
3285 dma->dma_tag = NULL;
3286
3287 return (error);
3288}
3289
3290static void
3291igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3292{
3293 if (dma->dma_tag == NULL)
3294 return;
3295 if (dma->dma_paddr != 0) {
3296 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3297 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3298 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3299 dma->dma_paddr = 0;
3300 }
3301 if (dma->dma_vaddr != NULL) {
3302 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3303 dma->dma_vaddr = NULL;
3304 }
3305 bus_dma_tag_destroy(dma->dma_tag);
3306 dma->dma_tag = NULL;
3307}
3308
3309
3310/*********************************************************************
3311 *
3312 * Allocate memory for the transmit and receive rings, and then
3313 * the descriptors associated with each, called only once at attach.
3314 *
3315 **********************************************************************/
3316static int
3317igb_allocate_queues(struct adapter *adapter)
3318{
3319 device_t dev = adapter->dev;
3320 struct igb_queue *que = NULL;
3321 struct tx_ring *txr = NULL;
3322 struct rx_ring *rxr = NULL;
3323 int rsize, tsize, error = E1000_SUCCESS;
3324 int txconf = 0, rxconf = 0;
3325
3326 /* First allocate the top level queue structs */
3327 if (!(adapter->queues =
3328 (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3329 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3330 device_printf(dev, "Unable to allocate queue memory\n");
3331 error = ENOMEM;
3332 goto fail;
3333 }
3334
3335 /* Next allocate the TX ring struct memory */
3336 if (!(adapter->tx_rings =
3337 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3338 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3339 device_printf(dev, "Unable to allocate TX ring memory\n");
3340 error = ENOMEM;
3341 goto tx_fail;
3342 }
3343
3344 /* Now allocate the RX */
3345 if (!(adapter->rx_rings =
3346 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3347 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3348 device_printf(dev, "Unable to allocate RX ring memory\n");
3349 error = ENOMEM;
3350 goto rx_fail;
3351 }
3352
3353 tsize = roundup2(adapter->num_tx_desc *
3354 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3355 /*
3356 * Now set up the TX queues, txconf is needed to handle the
3357 * possibility that things fail midcourse and we need to
3358 * undo memory gracefully
3359 */
3360 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3361 /* Set up some basics */
3362 txr = &adapter->tx_rings[i];
3363 txr->adapter = adapter;
3364 txr->me = i;
3365 txr->num_desc = adapter->num_tx_desc;
3366
3367 /* Initialize the TX lock */
3368 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3369 device_get_nameunit(dev), txr->me);
3370 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3371
3372 if (igb_dma_malloc(adapter, tsize,
3373 &txr->txdma, BUS_DMA_NOWAIT)) {
3374 device_printf(dev,
3375 "Unable to allocate TX Descriptor memory\n");
3376 error = ENOMEM;
3377 goto err_tx_desc;
3378 }
3379 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3380 bzero((void *)txr->tx_base, tsize);
3381
3382 /* Now allocate transmit buffers for the ring */
3383 if (igb_allocate_transmit_buffers(txr)) {
3384 device_printf(dev,
3385 "Critical Failure setting up transmit buffers\n");
3386 error = ENOMEM;
3387 goto err_tx_desc;
3388 }
3389#ifndef IGB_LEGACY_TX
3390 /* Allocate a buf ring */
3391 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3392 M_WAITOK, &txr->tx_mtx);
3393#endif
3394 }
3395
3396 /*
3397 * Next the RX queues...
3398 */
3399 rsize = roundup2(adapter->num_rx_desc *
3400 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3401 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3402 rxr = &adapter->rx_rings[i];
3403 rxr->adapter = adapter;
3404 rxr->me = i;
3405
3406 /* Initialize the RX lock */
3407 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3408 device_get_nameunit(dev), txr->me);
3409 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3410
3411 if (igb_dma_malloc(adapter, rsize,
3412 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3413 device_printf(dev,
3414 "Unable to allocate RxDescriptor memory\n");
3415 error = ENOMEM;
3416 goto err_rx_desc;
3417 }
3418 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3419 bzero((void *)rxr->rx_base, rsize);
3420
3421 /* Allocate receive buffers for the ring*/
3422 if (igb_allocate_receive_buffers(rxr)) {
3423 device_printf(dev,
3424 "Critical Failure setting up receive buffers\n");
3425 error = ENOMEM;
3426 goto err_rx_desc;
3427 }
3428 }
3429
3430 /*
3431 ** Finally set up the queue holding structs
3432 */
3433 for (int i = 0; i < adapter->num_queues; i++) {
3434 que = &adapter->queues[i];
3435 que->adapter = adapter;
3436 que->txr = &adapter->tx_rings[i];
3437 que->rxr = &adapter->rx_rings[i];
3438 }
3439
3440 return (0);
3441
3442err_rx_desc:
3443 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3444 igb_dma_free(adapter, &rxr->rxdma);
3445err_tx_desc:
3446 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3447 igb_dma_free(adapter, &txr->txdma);
3448 free(adapter->rx_rings, M_DEVBUF);
3449rx_fail:
3450#ifndef IGB_LEGACY_TX
3451 buf_ring_free(txr->br, M_DEVBUF);
3452#endif
3453 free(adapter->tx_rings, M_DEVBUF);
3454tx_fail:
3455 free(adapter->queues, M_DEVBUF);
3456fail:
3457 return (error);
3458}
3459
3460/*********************************************************************
3461 *
3462 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3463 * the information needed to transmit a packet on the wire. This is
3464 * called only once at attach, setup is done every reset.
3465 *
3466 **********************************************************************/
3467static int
3468igb_allocate_transmit_buffers(struct tx_ring *txr)
3469{
3470 struct adapter *adapter = txr->adapter;
3471 device_t dev = adapter->dev;
3472 struct igb_tx_buf *txbuf;
3473 int error, i;
3474
3475 /*
3476 * Setup DMA descriptor areas.
3477 */
3478 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3479 1, 0, /* alignment, bounds */
3480 BUS_SPACE_MAXADDR, /* lowaddr */
3481 BUS_SPACE_MAXADDR, /* highaddr */
3482 NULL, NULL, /* filter, filterarg */
3483 IGB_TSO_SIZE, /* maxsize */
3484 IGB_MAX_SCATTER, /* nsegments */
3485 PAGE_SIZE, /* maxsegsize */
3486 0, /* flags */
3487 NULL, /* lockfunc */
3488 NULL, /* lockfuncarg */
3489 &txr->txtag))) {
3490 device_printf(dev,"Unable to allocate TX DMA tag\n");
3491 goto fail;
3492 }
3493
3494 if (!(txr->tx_buffers =
3495 (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3496 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3497 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3498 error = ENOMEM;
3499 goto fail;
3500 }
3501
3502 /* Create the descriptor buffer dma maps */
3503 txbuf = txr->tx_buffers;
3504 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3505 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3506 if (error != 0) {
3507 device_printf(dev, "Unable to create TX DMA map\n");
3508 goto fail;
3509 }
3510 }
3511
3512 return 0;
3513fail:
3514 /* We free all, it handles case where we are in the middle */
3515 igb_free_transmit_structures(adapter);
3516 return (error);
3517}
3518
3519/*********************************************************************
3520 *
3521 * Initialize a transmit ring.
3522 *
3523 **********************************************************************/
3524static void
3525igb_setup_transmit_ring(struct tx_ring *txr)
3526{
3527 struct adapter *adapter = txr->adapter;
3528 struct igb_tx_buf *txbuf;
3529 int i;
3530#ifdef DEV_NETMAP
3531 struct netmap_adapter *na = NA(adapter->ifp);
3532 struct netmap_slot *slot;
3533#endif /* DEV_NETMAP */
3534
3535 /* Clear the old descriptor contents */
3536 IGB_TX_LOCK(txr);
3537#ifdef DEV_NETMAP
3538 slot = netmap_reset(na, NR_TX, txr->me, 0);
3539#endif /* DEV_NETMAP */
3540 bzero((void *)txr->tx_base,
3541 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3542 /* Reset indices */
3543 txr->next_avail_desc = 0;
3544 txr->next_to_clean = 0;
3545
3546 /* Free any existing tx buffers. */
3547 txbuf = txr->tx_buffers;
3548 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3549 if (txbuf->m_head != NULL) {
3550 bus_dmamap_sync(txr->txtag, txbuf->map,
3551 BUS_DMASYNC_POSTWRITE);
3552 bus_dmamap_unload(txr->txtag, txbuf->map);
3553 m_freem(txbuf->m_head);
3554 txbuf->m_head = NULL;
3555 }
3556#ifdef DEV_NETMAP
3557 if (slot) {
3558 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3559 /* no need to set the address */
3560 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3561 }
3562#endif /* DEV_NETMAP */
3563 /* clear the watch index */
3564 txbuf->eop = NULL;
3565 }
3566
3567 /* Set number of descriptors available */
3568 txr->tx_avail = adapter->num_tx_desc;
3569
3570 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3571 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3572 IGB_TX_UNLOCK(txr);
3573}
3574
3575/*********************************************************************
3576 *
3577 * Initialize all transmit rings.
3578 *
3579 **********************************************************************/
3580static void
3581igb_setup_transmit_structures(struct adapter *adapter)
3582{
3583 struct tx_ring *txr = adapter->tx_rings;
3584
3585 for (int i = 0; i < adapter->num_queues; i++, txr++)
3586 igb_setup_transmit_ring(txr);
3587
3588 return;
3589}
3590
3591/*********************************************************************
3592 *
3593 * Enable transmit unit.
3594 *
3595 **********************************************************************/
3596static void
3597igb_initialize_transmit_units(struct adapter *adapter)
3598{
3599 struct tx_ring *txr = adapter->tx_rings;
3600 struct e1000_hw *hw = &adapter->hw;
3601 u32 tctl, txdctl;
3602
3603 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3604 tctl = txdctl = 0;
3605
3606 /* Setup the Tx Descriptor Rings */
3607 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3608 u64 bus_addr = txr->txdma.dma_paddr;
3609
3610 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3611 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3612 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3613 (uint32_t)(bus_addr >> 32));
3614 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3615 (uint32_t)bus_addr);
3616
3617 /* Setup the HW Tx Head and Tail descriptor pointers */
3618 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3619 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3620
3621 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3622 E1000_READ_REG(hw, E1000_TDBAL(i)),
3623 E1000_READ_REG(hw, E1000_TDLEN(i)));
3624
3625 txr->queue_status = IGB_QUEUE_IDLE;
3626
3627 txdctl |= IGB_TX_PTHRESH;
3628 txdctl |= IGB_TX_HTHRESH << 8;
3629 txdctl |= IGB_TX_WTHRESH << 16;
3630 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3631 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3632 }
3633
3634 if (adapter->vf_ifp)
3635 return;
3636
3637 e1000_config_collision_dist(hw);
3638
3639 /* Program the Transmit Control Register */
3640 tctl = E1000_READ_REG(hw, E1000_TCTL);
3641 tctl &= ~E1000_TCTL_CT;
3642 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3643 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3644
3645 /* This write will effectively turn on the transmit unit. */
3646 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3647}
3648
3649/*********************************************************************
3650 *
3651 * Free all transmit rings.
3652 *
3653 **********************************************************************/
3654static void
3655igb_free_transmit_structures(struct adapter *adapter)
3656{
3657 struct tx_ring *txr = adapter->tx_rings;
3658
3659 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3660 IGB_TX_LOCK(txr);
3661 igb_free_transmit_buffers(txr);
3662 igb_dma_free(adapter, &txr->txdma);
3663 IGB_TX_UNLOCK(txr);
3664 IGB_TX_LOCK_DESTROY(txr);
3665 }
3666 free(adapter->tx_rings, M_DEVBUF);
3667}
3668
3669/*********************************************************************
3670 *
3671 * Free transmit ring related data structures.
3672 *
3673 **********************************************************************/
3674static void
3675igb_free_transmit_buffers(struct tx_ring *txr)
3676{
3677 struct adapter *adapter = txr->adapter;
3678 struct igb_tx_buf *tx_buffer;
3679 int i;
3680
3681 INIT_DEBUGOUT("free_transmit_ring: begin");
3682
3683 if (txr->tx_buffers == NULL)
3684 return;
3685
3686 tx_buffer = txr->tx_buffers;
3687 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3688 if (tx_buffer->m_head != NULL) {
3689 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3690 BUS_DMASYNC_POSTWRITE);
3691 bus_dmamap_unload(txr->txtag,
3692 tx_buffer->map);
3693 m_freem(tx_buffer->m_head);
3694 tx_buffer->m_head = NULL;
3695 if (tx_buffer->map != NULL) {
3696 bus_dmamap_destroy(txr->txtag,
3697 tx_buffer->map);
3698 tx_buffer->map = NULL;
3699 }
3700 } else if (tx_buffer->map != NULL) {
3701 bus_dmamap_unload(txr->txtag,
3702 tx_buffer->map);
3703 bus_dmamap_destroy(txr->txtag,
3704 tx_buffer->map);
3705 tx_buffer->map = NULL;
3706 }
3707 }
3708#ifndef IGB_LEGACY_TX
3709 if (txr->br != NULL)
3710 buf_ring_free(txr->br, M_DEVBUF);
3711#endif
3712 if (txr->tx_buffers != NULL) {
3713 free(txr->tx_buffers, M_DEVBUF);
3714 txr->tx_buffers = NULL;
3715 }
3716 if (txr->txtag != NULL) {
3717 bus_dma_tag_destroy(txr->txtag);
3718 txr->txtag = NULL;
3719 }
3720 return;
3721}
3722
3723/**********************************************************************
3724 *
3725 * Setup work for hardware segmentation offload (TSO) on
3726 * adapters using advanced tx descriptors
3727 *
3728 **********************************************************************/
3729static int
3730igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3731 u32 *cmd_type_len, u32 *olinfo_status)
3732{
3733 struct adapter *adapter = txr->adapter;
3734 struct e1000_adv_tx_context_desc *TXD;
3735 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3736 u32 mss_l4len_idx = 0, paylen;
3737 u16 vtag = 0, eh_type;
3738 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3739 struct ether_vlan_header *eh;
3740#ifdef INET6
3741 struct ip6_hdr *ip6;
3742#endif
3743#ifdef INET
3744 struct ip *ip;
3745#endif
3746 struct tcphdr *th;
3747
3748
3749 /*
3750 * Determine where frame payload starts.
3751 * Jump over vlan headers if already present
3752 */
3753 eh = mtod(mp, struct ether_vlan_header *);
3754 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3755 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3756 eh_type = eh->evl_proto;
3757 } else {
3758 ehdrlen = ETHER_HDR_LEN;
3759 eh_type = eh->evl_encap_proto;
3760 }
3761
3762 switch (ntohs(eh_type)) {
3763#ifdef INET6
3764 case ETHERTYPE_IPV6:
3765 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3766 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3767 if (ip6->ip6_nxt != IPPROTO_TCP)
3768 return (ENXIO);
3769 ip_hlen = sizeof(struct ip6_hdr);
3770 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3771 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3772 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3773 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3774 break;
3775#endif
3776#ifdef INET
3777 case ETHERTYPE_IP:
3778 ip = (struct ip *)(mp->m_data + ehdrlen);
3779 if (ip->ip_p != IPPROTO_TCP)
3780 return (ENXIO);
3781 ip->ip_sum = 0;
3782 ip_hlen = ip->ip_hl << 2;
3783 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3784 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3785 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3786 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3787 /* Tell transmit desc to also do IPv4 checksum. */
3788 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3789 break;
3790#endif
3791 default:
3792 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3793 __func__, ntohs(eh_type));
3794 break;
3795 }
3796
3797 ctxd = txr->next_avail_desc;
3798 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3799
3800 tcp_hlen = th->th_off << 2;
3801
3802 /* This is used in the transmit desc in encap */
3803 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3804
3805 /* VLAN MACLEN IPLEN */
3806 if (mp->m_flags & M_VLANTAG) {
3807 vtag = htole16(mp->m_pkthdr.ether_vtag);
3808 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3809 }
3810
3811 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3812 vlan_macip_lens |= ip_hlen;
3813 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3814
3815 /* ADV DTYPE TUCMD */
3816 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3817 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3818 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3819
3820 /* MSS L4LEN IDX */
3821 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3822 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3823 /* 82575 needs the queue index added */
3824 if (adapter->hw.mac.type == e1000_82575)
3825 mss_l4len_idx |= txr->me << 4;
3826 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3827
3828 TXD->seqnum_seed = htole32(0);
3829
3830 if (++ctxd == txr->num_desc)
3831 ctxd = 0;
3832
3833 txr->tx_avail--;
3834 txr->next_avail_desc = ctxd;
3835 *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3836 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3837 *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3838 ++txr->tso_tx;
3839 return (0);
3840}
3841
3842/*********************************************************************
3843 *
3844 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3845 *
3846 **********************************************************************/
3847
3848static int
3849igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3850 u32 *cmd_type_len, u32 *olinfo_status)
3851{
3852 struct e1000_adv_tx_context_desc *TXD;
3853 struct adapter *adapter = txr->adapter;
3854 struct ether_vlan_header *eh;
3855 struct ip *ip;
3856 struct ip6_hdr *ip6;
3857 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3858 int ehdrlen, ip_hlen = 0;
3859 u16 etype;
3860 u8 ipproto = 0;
3861 int offload = TRUE;
3862 int ctxd = txr->next_avail_desc;
3863 u16 vtag = 0;
3864
3865 /* First check if TSO is to be used */
3866 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3867 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3868
3869 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3870 offload = FALSE;
3871
3872 /* Indicate the whole packet as payload when not doing TSO */
3873 *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3874
3875 /* Now ready a context descriptor */
3876 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3877
3878 /*
3879 ** In advanced descriptors the vlan tag must
3880 ** be placed into the context descriptor. Hence
3881 ** we need to make one even if not doing offloads.
3882 */
3883 if (mp->m_flags & M_VLANTAG) {
3884 vtag = htole16(mp->m_pkthdr.ether_vtag);
3885 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3886 } else if (offload == FALSE) /* ... no offload to do */
3887 return (0);
3888
3889 /*
3890 * Determine where frame payload starts.
3891 * Jump over vlan headers if already present,
3892 * helpful for QinQ too.
3893 */
3894 eh = mtod(mp, struct ether_vlan_header *);
3895 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3896 etype = ntohs(eh->evl_proto);
3897 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3898 } else {
3899 etype = ntohs(eh->evl_encap_proto);
3900 ehdrlen = ETHER_HDR_LEN;
3901 }
3902
3903 /* Set the ether header length */
3904 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3905
3906 switch (etype) {
3907 case ETHERTYPE_IP:
3908 ip = (struct ip *)(mp->m_data + ehdrlen);
3909 ip_hlen = ip->ip_hl << 2;
3910 ipproto = ip->ip_p;
3911 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3912 break;
3913 case ETHERTYPE_IPV6:
3914 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3915 ip_hlen = sizeof(struct ip6_hdr);
3916 /* XXX-BZ this will go badly in case of ext hdrs. */
3917 ipproto = ip6->ip6_nxt;
3918 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3919 break;
3920 default:
3921 offload = FALSE;
3922 break;
3923 }
3924
3925 vlan_macip_lens |= ip_hlen;
3926 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3927
3928 switch (ipproto) {
3929 case IPPROTO_TCP:
3930 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3931 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3932 break;
3933 case IPPROTO_UDP:
3934 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3935 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3936 break;
3937
3938#if __FreeBSD_version >= 800000
3939 case IPPROTO_SCTP:
3940 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3941 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3942 break;
3943#endif
3944 default:
3945 offload = FALSE;
3946 break;
3947 }
3948
3949 if (offload) /* For the TX descriptor setup */
3950 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3951
3952 /* 82575 needs the queue index added */
3953 if (adapter->hw.mac.type == e1000_82575)
3954 mss_l4len_idx = txr->me << 4;
3955
3956 /* Now copy bits into descriptor */
3957 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3958 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3959 TXD->seqnum_seed = htole32(0);
3960 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3961
3962 /* We've consumed the first desc, adjust counters */
3963 if (++ctxd == txr->num_desc)
3964 ctxd = 0;
3965 txr->next_avail_desc = ctxd;
3966 --txr->tx_avail;
3967
3968 return (0);
3969}
3970
3971/**********************************************************************
3972 *
3973 * Examine each tx_buffer in the used queue. If the hardware is done
3974 * processing the packet then free associated resources. The
3975 * tx_buffer is put back on the free queue.
3976 *
3977 * TRUE return means there's work in the ring to clean, FALSE its empty.
3978 **********************************************************************/
3979static bool
3980igb_txeof(struct tx_ring *txr)
3981{
3982 struct adapter *adapter = txr->adapter;
3983#ifdef DEV_NETMAP
3984 struct ifnet *ifp = adapter->ifp;
3985#endif /* DEV_NETMAP */
3986 u32 work, processed = 0;
3987 int limit = adapter->tx_process_limit;
3988 struct igb_tx_buf *buf;
3989 union e1000_adv_tx_desc *txd;
3990
3991 mtx_assert(&txr->tx_mtx, MA_OWNED);
3992
3993#ifdef DEV_NETMAP
3994 if (netmap_tx_irq(ifp, txr->me))
3995 return (FALSE);
3996#endif /* DEV_NETMAP */
3997
3998 if (txr->tx_avail == txr->num_desc) {
3999 txr->queue_status = IGB_QUEUE_IDLE;
4000 return FALSE;
4001 }
4002
4003 /* Get work starting point */
4004 work = txr->next_to_clean;
4005 buf = &txr->tx_buffers[work];
4006 txd = &txr->tx_base[work];
4007 work -= txr->num_desc; /* The distance to ring end */
4008 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4009 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4010 do {
4011 union e1000_adv_tx_desc *eop = buf->eop;
4012 if (eop == NULL) /* No work */
4013 break;
4014
4015 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4016 break; /* I/O not complete */
4017
4018 if (buf->m_head) {
4019 txr->bytes +=
4020 buf->m_head->m_pkthdr.len;
4021 bus_dmamap_sync(txr->txtag,
4022 buf->map,
4023 BUS_DMASYNC_POSTWRITE);
4024 bus_dmamap_unload(txr->txtag,
4025 buf->map);
4026 m_freem(buf->m_head);
4027 buf->m_head = NULL;
4028 }
4029 buf->eop = NULL;
4030 ++txr->tx_avail;
4031
4032 /* We clean the range if multi segment */
4033 while (txd != eop) {
4034 ++txd;
4035 ++buf;
4036 ++work;
4037 /* wrap the ring? */
4038 if (__predict_false(!work)) {
4039 work -= txr->num_desc;
4040 buf = txr->tx_buffers;
4041 txd = txr->tx_base;
4042 }
4043 if (buf->m_head) {
4044 txr->bytes +=
4045 buf->m_head->m_pkthdr.len;
4046 bus_dmamap_sync(txr->txtag,
4047 buf->map,
4048 BUS_DMASYNC_POSTWRITE);
4049 bus_dmamap_unload(txr->txtag,
4050 buf->map);
4051 m_freem(buf->m_head);
4052 buf->m_head = NULL;
4053 }
4054 ++txr->tx_avail;
4055 buf->eop = NULL;
4056
4057 }
4058 ++txr->packets;
4059 ++processed;
4060 txr->watchdog_time = ticks;
4061
4062 /* Try the next packet */
4063 ++txd;
4064 ++buf;
4065 ++work;
4066 /* reset with a wrap */
4067 if (__predict_false(!work)) {
4068 work -= txr->num_desc;
4069 buf = txr->tx_buffers;
4070 txd = txr->tx_base;
4071 }
4072 prefetch(txd);
4073 } while (__predict_true(--limit));
4074
4075 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4076 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4077
4078 work += txr->num_desc;
4079 txr->next_to_clean = work;
4080
4081 /*
4082 ** Watchdog calculation, we know there's
4083 ** work outstanding or the first return
4084 ** would have been taken, so none processed
4085 ** for too long indicates a hang.
4086 */
4087 if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4088 txr->queue_status |= IGB_QUEUE_HUNG;
4089
4090 if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4091 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4092
4093 if (txr->tx_avail == txr->num_desc) {
4094 txr->queue_status = IGB_QUEUE_IDLE;
4095 return (FALSE);
4096 }
4097
4098 return (TRUE);
4099}
4100
4101/*********************************************************************
4102 *
4103 * Refresh mbuf buffers for RX descriptor rings
4104 * - now keeps its own state so discards due to resource
4105 * exhaustion are unnecessary, if an mbuf cannot be obtained
4106 * it just returns, keeping its placeholder, thus it can simply
4107 * be recalled to try again.
4108 *
4109 **********************************************************************/
4110static void
4111igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4112{
4113 struct adapter *adapter = rxr->adapter;
4114 bus_dma_segment_t hseg[1];
4115 bus_dma_segment_t pseg[1];
4116 struct igb_rx_buf *rxbuf;
4117 struct mbuf *mh, *mp;
4118 int i, j, nsegs, error;
4119 bool refreshed = FALSE;
4120
4121 i = j = rxr->next_to_refresh;
4122 /*
4123 ** Get one descriptor beyond
4124 ** our work mark to control
4125 ** the loop.
4126 */
4127 if (++j == adapter->num_rx_desc)
4128 j = 0;
4129
4130 while (j != limit) {
4131 rxbuf = &rxr->rx_buffers[i];
4132 /* No hdr mbuf used with header split off */
4133 if (rxr->hdr_split == FALSE)
4134 goto no_split;
4135 if (rxbuf->m_head == NULL) {
4136 mh = m_gethdr(M_NOWAIT, MT_DATA);
4137 if (mh == NULL)
4138 goto update;
4139 } else
4140 mh = rxbuf->m_head;
4141
4142 mh->m_pkthdr.len = mh->m_len = MHLEN;
4143 mh->m_len = MHLEN;
4144 mh->m_flags |= M_PKTHDR;
4145 /* Get the memory mapping */
4146 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4147 rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4148 if (error != 0) {
4149 printf("Refresh mbufs: hdr dmamap load"
4150 " failure - %d\n", error);
4151 m_free(mh);
4152 rxbuf->m_head = NULL;
4153 goto update;
4154 }
4155 rxbuf->m_head = mh;
4156 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4157 BUS_DMASYNC_PREREAD);
4158 rxr->rx_base[i].read.hdr_addr =
4159 htole64(hseg[0].ds_addr);
4160no_split:
4161 if (rxbuf->m_pack == NULL) {
4162 mp = m_getjcl(M_NOWAIT, MT_DATA,
4163 M_PKTHDR, adapter->rx_mbuf_sz);
4164 if (mp == NULL)
4165 goto update;
4166 } else
4167 mp = rxbuf->m_pack;
4168
4169 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4170 /* Get the memory mapping */
4171 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4172 rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4173 if (error != 0) {
4174 printf("Refresh mbufs: payload dmamap load"
4175 " failure - %d\n", error);
4176 m_free(mp);
4177 rxbuf->m_pack = NULL;
4178 goto update;
4179 }
4180 rxbuf->m_pack = mp;
4181 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4182 BUS_DMASYNC_PREREAD);
4183 rxr->rx_base[i].read.pkt_addr =
4184 htole64(pseg[0].ds_addr);
4185 refreshed = TRUE; /* I feel wefreshed :) */
4186
4187 i = j; /* our next is precalculated */
4188 rxr->next_to_refresh = i;
4189 if (++j == adapter->num_rx_desc)
4190 j = 0;
4191 }
4192update:
4193 if (refreshed) /* update tail */
4194 E1000_WRITE_REG(&adapter->hw,
4195 E1000_RDT(rxr->me), rxr->next_to_refresh);
4196 return;
4197}
4198
4199
4200/*********************************************************************
4201 *
4202 * Allocate memory for rx_buffer structures. Since we use one
4203 * rx_buffer per received packet, the maximum number of rx_buffer's
4204 * that we'll need is equal to the number of receive descriptors
4205 * that we've allocated.
4206 *
4207 **********************************************************************/
4208static int
4209igb_allocate_receive_buffers(struct rx_ring *rxr)
4210{
4211 struct adapter *adapter = rxr->adapter;
4212 device_t dev = adapter->dev;
4213 struct igb_rx_buf *rxbuf;
4214 int i, bsize, error;
4215
4216 bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4217 if (!(rxr->rx_buffers =
4218 (struct igb_rx_buf *) malloc(bsize,
4219 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4220 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4221 error = ENOMEM;
4222 goto fail;
4223 }
4224
4225 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4226 1, 0, /* alignment, bounds */
4227 BUS_SPACE_MAXADDR, /* lowaddr */
4228 BUS_SPACE_MAXADDR, /* highaddr */
4229 NULL, NULL, /* filter, filterarg */
4230 MSIZE, /* maxsize */
4231 1, /* nsegments */
4232 MSIZE, /* maxsegsize */
4233 0, /* flags */
4234 NULL, /* lockfunc */
4235 NULL, /* lockfuncarg */
4236 &rxr->htag))) {
4237 device_printf(dev, "Unable to create RX DMA tag\n");
4238 goto fail;
4239 }
4240
4241 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4242 1, 0, /* alignment, bounds */
4243 BUS_SPACE_MAXADDR, /* lowaddr */
4244 BUS_SPACE_MAXADDR, /* highaddr */
4245 NULL, NULL, /* filter, filterarg */
4246 MJUM9BYTES, /* maxsize */
4247 1, /* nsegments */
4248 MJUM9BYTES, /* maxsegsize */
4249 0, /* flags */
4250 NULL, /* lockfunc */
4251 NULL, /* lockfuncarg */
4252 &rxr->ptag))) {
4253 device_printf(dev, "Unable to create RX payload DMA tag\n");
4254 goto fail;
4255 }
4256
4257 for (i = 0; i < adapter->num_rx_desc; i++) {
4258 rxbuf = &rxr->rx_buffers[i];
4259 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4260 if (error) {
4261 device_printf(dev,
4262 "Unable to create RX head DMA maps\n");
4263 goto fail;
4264 }
4265 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4266 if (error) {
4267 device_printf(dev,
4268 "Unable to create RX packet DMA maps\n");
4269 goto fail;
4270 }
4271 }
4272
4273 return (0);
4274
4275fail:
4276 /* Frees all, but can handle partial completion */
4277 igb_free_receive_structures(adapter);
4278 return (error);
4279}
4280
4281
4282static void
4283igb_free_receive_ring(struct rx_ring *rxr)
4284{
4285 struct adapter *adapter = rxr->adapter;
4286 struct igb_rx_buf *rxbuf;
4287
4288
4289 for (int i = 0; i < adapter->num_rx_desc; i++) {
4290 rxbuf = &rxr->rx_buffers[i];
4291 if (rxbuf->m_head != NULL) {
4292 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4293 BUS_DMASYNC_POSTREAD);
4294 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4295 rxbuf->m_head->m_flags |= M_PKTHDR;
4296 m_freem(rxbuf->m_head);
4297 }
4298 if (rxbuf->m_pack != NULL) {
4299 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4300 BUS_DMASYNC_POSTREAD);
4301 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4302 rxbuf->m_pack->m_flags |= M_PKTHDR;
4303 m_freem(rxbuf->m_pack);
4304 }
4305 rxbuf->m_head = NULL;
4306 rxbuf->m_pack = NULL;
4307 }
4308}
4309
4310
4311/*********************************************************************
4312 *
4313 * Initialize a receive ring and its buffers.
4314 *
4315 **********************************************************************/
4316static int
4317igb_setup_receive_ring(struct rx_ring *rxr)
4318{
4319 struct adapter *adapter;
4320 struct ifnet *ifp;
4321 device_t dev;
4322 struct igb_rx_buf *rxbuf;
4323 bus_dma_segment_t pseg[1], hseg[1];
4324 struct lro_ctrl *lro = &rxr->lro;
4325 int rsize, nsegs, error = 0;
4326#ifdef DEV_NETMAP
4327 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4328 struct netmap_slot *slot;
4329#endif /* DEV_NETMAP */
4330
4331 adapter = rxr->adapter;
4332 dev = adapter->dev;
4333 ifp = adapter->ifp;
4334
4335 /* Clear the ring contents */
4336 IGB_RX_LOCK(rxr);
4337#ifdef DEV_NETMAP
4338 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4339#endif /* DEV_NETMAP */
4340 rsize = roundup2(adapter->num_rx_desc *
4341 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4342 bzero((void *)rxr->rx_base, rsize);
4343
4344 /*
4345 ** Free current RX buffer structures and their mbufs
4346 */
4347 igb_free_receive_ring(rxr);
4348
4349 /* Configure for header split? */
4350 if (igb_header_split)
4351 rxr->hdr_split = TRUE;
4352
4353 /* Now replenish the ring mbufs */
4354 for (int j = 0; j < adapter->num_rx_desc; ++j) {
4355 struct mbuf *mh, *mp;
4356
4357 rxbuf = &rxr->rx_buffers[j];
4358#ifdef DEV_NETMAP
4359 if (slot) {
4360 /* slot sj is mapped to the j-th NIC-ring entry */
4361 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4362 uint64_t paddr;
4363 void *addr;
4364
4365 addr = PNMB(na, slot + sj, &paddr);
4366 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4367 /* Update descriptor */
4368 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4369 continue;
4370 }
4371#endif /* DEV_NETMAP */
4372 if (rxr->hdr_split == FALSE)
4373 goto skip_head;
4374
4375 /* First the header */
4376 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4377 if (rxbuf->m_head == NULL) {
4378 error = ENOBUFS;
4379 goto fail;
4380 }
4381 m_adj(rxbuf->m_head, ETHER_ALIGN);
4382 mh = rxbuf->m_head;
4383 mh->m_len = mh->m_pkthdr.len = MHLEN;
4384 mh->m_flags |= M_PKTHDR;
4385 /* Get the memory mapping */
4386 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4387 rxbuf->hmap, rxbuf->m_head, hseg,
4388 &nsegs, BUS_DMA_NOWAIT);
4389 if (error != 0) /* Nothing elegant to do here */
4390 goto fail;
4391 bus_dmamap_sync(rxr->htag,
4392 rxbuf->hmap, BUS_DMASYNC_PREREAD);
4393 /* Update descriptor */
4394 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4395
4396skip_head:
4397 /* Now the payload cluster */
4398 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4399 M_PKTHDR, adapter->rx_mbuf_sz);
4400 if (rxbuf->m_pack == NULL) {
4401 error = ENOBUFS;
4402 goto fail;
4403 }
4404 mp = rxbuf->m_pack;
4405 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4406 /* Get the memory mapping */
4407 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4408 rxbuf->pmap, mp, pseg,
4409 &nsegs, BUS_DMA_NOWAIT);
4410 if (error != 0)
4411 goto fail;
4412 bus_dmamap_sync(rxr->ptag,
4413 rxbuf->pmap, BUS_DMASYNC_PREREAD);
4414 /* Update descriptor */
4415 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4416 }
4417
4418 /* Setup our descriptor indices */
4419 rxr->next_to_check = 0;
4420 rxr->next_to_refresh = adapter->num_rx_desc - 1;
4421 rxr->lro_enabled = FALSE;
4422 rxr->rx_split_packets = 0;
4423 rxr->rx_bytes = 0;
4424
4425 rxr->fmp = NULL;
4426 rxr->lmp = NULL;
4427
4428 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4429 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4430
4431 /*
4432 ** Now set up the LRO interface, we
4433 ** also only do head split when LRO
4434 ** is enabled, since so often they
4435 ** are undesireable in similar setups.
4436 */
4437 if (ifp->if_capenable & IFCAP_LRO) {
4438 error = tcp_lro_init(lro);
4439 if (error) {
4440 device_printf(dev, "LRO Initialization failed!\n");
4441 goto fail;
4442 }
4443 INIT_DEBUGOUT("RX LRO Initialized\n");
4444 rxr->lro_enabled = TRUE;
4445 lro->ifp = adapter->ifp;
4446 }
4447
4448 IGB_RX_UNLOCK(rxr);
4449 return (0);
4450
4451fail:
4452 igb_free_receive_ring(rxr);
4453 IGB_RX_UNLOCK(rxr);
4454 return (error);
4455}
4456
4457
4458/*********************************************************************
4459 *
4460 * Initialize all receive rings.
4461 *
4462 **********************************************************************/
4463static int
4464igb_setup_receive_structures(struct adapter *adapter)
4465{
4466 struct rx_ring *rxr = adapter->rx_rings;
4467 int i;
4468
4469 for (i = 0; i < adapter->num_queues; i++, rxr++)
4470 if (igb_setup_receive_ring(rxr))
4471 goto fail;
4472
4473 return (0);
4474fail:
4475 /*
4476 * Free RX buffers allocated so far, we will only handle
4477 * the rings that completed, the failing case will have
4478 * cleaned up for itself. 'i' is the endpoint.
4479 */
4480 for (int j = 0; j < i; ++j) {
4481 rxr = &adapter->rx_rings[j];
4482 IGB_RX_LOCK(rxr);
4483 igb_free_receive_ring(rxr);
4484 IGB_RX_UNLOCK(rxr);
4485 }
4486
4487 return (ENOBUFS);
4488}
4489
4490/*
4491 * Initialise the RSS mapping for NICs that support multiple transmit/
4492 * receive rings.
4493 */
4494static void
4495igb_initialise_rss_mapping(struct adapter *adapter)
4496{
4497 struct e1000_hw *hw = &adapter->hw;
4498 int i;
4499 int queue_id;
4500 u32 reta;
4501 u32 rss_key[10], mrqc, shift = 0;
4502
4503 /* XXX? */
4504 if (adapter->hw.mac.type == e1000_82575)
4505 shift = 6;
4506
4507 /*
4508 * The redirection table controls which destination
4509 * queue each bucket redirects traffic to.
4510 * Each DWORD represents four queues, with the LSB
4511 * being the first queue in the DWORD.
4512 *
4513 * This just allocates buckets to queues using round-robin
4514 * allocation.
4515 *
4516 * NOTE: It Just Happens to line up with the default
4517 * RSS allocation method.
4518 */
4519
4520 /* Warning FM follows */
4521 reta = 0;
4522 for (i = 0; i < 128; i++) {
4523#ifdef RSS
4524 queue_id = rss_get_indirection_to_bucket(i);
4525 /*
4526 * If we have more queues than buckets, we'll
4527 * end up mapping buckets to a subset of the
4528 * queues.
4529 *
4530 * If we have more buckets than queues, we'll
4531 * end up instead assigning multiple buckets
4532 * to queues.
4533 *
4534 * Both are suboptimal, but we need to handle
4535 * the case so we don't go out of bounds
4536 * indexing arrays and such.
4537 */
4538 queue_id = queue_id % adapter->num_queues;
4539#else
4540 queue_id = (i % adapter->num_queues);
4541#endif
4542 /* Adjust if required */
4543 queue_id = queue_id << shift;
4544
4545 /*
4546 * The low 8 bits are for hash value (n+0);
4547 * The next 8 bits are for hash value (n+1), etc.
4548 */
4549 reta = reta >> 8;
4550 reta = reta | ( ((uint32_t) queue_id) << 24);
4551 if ((i & 3) == 3) {
4552 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4553 reta = 0;
4554 }
4555 }
4556
4557 /* Now fill in hash table */
4558
4559 /*
4560 * MRQC: Multiple Receive Queues Command
4561 * Set queuing to RSS control, number depends on the device.
4562 */
4563 mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4564
4565#ifdef RSS
4566 /* XXX ew typecasting */
4567 rss_getkey((uint8_t *) &rss_key);
4568#else
4569 arc4rand(&rss_key, sizeof(rss_key), 0);
4570#endif
4571 for (i = 0; i < 10; i++)
4572 E1000_WRITE_REG_ARRAY(hw,
4573 E1000_RSSRK(0), i, rss_key[i]);
4574
4575 /*
4576 * Configure the RSS fields to hash upon.
4577 */
4578 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4579 E1000_MRQC_RSS_FIELD_IPV4_TCP);
4580 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4581 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4582 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4583 E1000_MRQC_RSS_FIELD_IPV6_UDP);
4584 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4585 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4586
4587 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4588}
4589
4590/*********************************************************************
4591 *
4592 * Enable receive unit.
4593 *
4594 **********************************************************************/
4595static void
4596igb_initialize_receive_units(struct adapter *adapter)
4597{
4598 struct rx_ring *rxr = adapter->rx_rings;
4599 struct ifnet *ifp = adapter->ifp;
4600 struct e1000_hw *hw = &adapter->hw;
4601 u32 rctl, rxcsum, psize, srrctl = 0;
4602
4603 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4604
4605 /*
4606 * Make sure receives are disabled while setting
4607 * up the descriptor ring
4608 */
4609 rctl = E1000_READ_REG(hw, E1000_RCTL);
4610 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4611
4612 /*
4613 ** Set up for header split
4614 */
4615 if (igb_header_split) {
4616 /* Use a standard mbuf for the header */
4617 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4618 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4619 } else
4620 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4621
4622 /*
4623 ** Set up for jumbo frames
4624 */
4625 if (ifp->if_mtu > ETHERMTU) {
4626 rctl |= E1000_RCTL_LPE;
4627 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4628 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4629 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4630 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4631 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4632 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4633 }
4634 /* Set maximum packet len */
4635 psize = adapter->max_frame_size;
4636 /* are we on a vlan? */
4637 if (adapter->ifp->if_vlantrunk != NULL)
4638 psize += VLAN_TAG_SIZE;
4639 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4640 } else {
4641 rctl &= ~E1000_RCTL_LPE;
4642 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4643 rctl |= E1000_RCTL_SZ_2048;
4644 }
4645
4646 /*
4647 * If TX flow control is disabled and there's >1 queue defined,
4648 * enable DROP.
4649 *
4650 * This drops frames rather than hanging the RX MAC for all queues.
4651 */
4652 if ((adapter->num_queues > 1) &&
4653 (adapter->fc == e1000_fc_none ||
4654 adapter->fc == e1000_fc_rx_pause)) {
4655 srrctl |= E1000_SRRCTL_DROP_EN;
4656 }
4657
4658 /* Setup the Base and Length of the Rx Descriptor Rings */
4659 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4660 u64 bus_addr = rxr->rxdma.dma_paddr;
4661 u32 rxdctl;
4662
4663 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4664 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4665 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4666 (uint32_t)(bus_addr >> 32));
4667 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4668 (uint32_t)bus_addr);
4669 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4670 /* Enable this Queue */
4671 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4672 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4673 rxdctl &= 0xFFF00000;
4674 rxdctl |= IGB_RX_PTHRESH;
4675 rxdctl |= IGB_RX_HTHRESH << 8;
4676 rxdctl |= IGB_RX_WTHRESH << 16;
4677 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4678 }
4679
4680 /*
4681 ** Setup for RX MultiQueue
4682 */
4683 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4684 if (adapter->num_queues >1) {
4685
4686 /* rss setup */
4687 igb_initialise_rss_mapping(adapter);
4688
4689 /*
4690 ** NOTE: Receive Full-Packet Checksum Offload
4691 ** is mutually exclusive with Multiqueue. However
4692 ** this is not the same as TCP/IP checksums which
4693 ** still work.
4694 */
4695 rxcsum |= E1000_RXCSUM_PCSD;
4696#if __FreeBSD_version >= 800000
4697 /* For SCTP Offload */
4698 if (((hw->mac.type == e1000_82576) ||
4699 (hw->mac.type == e1000_82580)) &&
4700 (ifp->if_capenable & IFCAP_RXCSUM))
4701 rxcsum |= E1000_RXCSUM_CRCOFL;
4702#endif
4703 } else {
4704 /* Non RSS setup */
4705 if (ifp->if_capenable & IFCAP_RXCSUM) {
4706 rxcsum |= E1000_RXCSUM_IPPCSE;
4707#if __FreeBSD_version >= 800000
4708 if ((adapter->hw.mac.type == e1000_82576) ||
4709 (adapter->hw.mac.type == e1000_82580))
4710 rxcsum |= E1000_RXCSUM_CRCOFL;
4711#endif
4712 } else
4713 rxcsum &= ~E1000_RXCSUM_TUOFL;
4714 }
4715 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4716
4717 /* Setup the Receive Control Register */
4718 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4719 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4720 E1000_RCTL_RDMTS_HALF |
4721 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4722 /* Strip CRC bytes. */
4723 rctl |= E1000_RCTL_SECRC;
4724 /* Make sure VLAN Filters are off */
4725 rctl &= ~E1000_RCTL_VFE;
4726 /* Don't store bad packets */
4727 rctl &= ~E1000_RCTL_SBP;
4728
4729 /* Enable Receives */
4730 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4731
4732 /*
4733 * Setup the HW Rx Head and Tail Descriptor Pointers
4734 * - needs to be after enable
4735 */
4736 for (int i = 0; i < adapter->num_queues; i++) {
4737 rxr = &adapter->rx_rings[i];
4738 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4739#ifdef DEV_NETMAP
4740 /*
4741 * an init() while a netmap client is active must
4742 * preserve the rx buffers passed to userspace.
4743 * In this driver it means we adjust RDT to
4744 * something different from next_to_refresh
4745 * (which is not used in netmap mode).
4746 */
4747 if (ifp->if_capenable & IFCAP_NETMAP) {
4748 struct netmap_adapter *na = NA(adapter->ifp);
4749 struct netmap_kring *kring = &na->rx_rings[i];
4750 int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4751
4752 if (t >= adapter->num_rx_desc)
4753 t -= adapter->num_rx_desc;
4754 else if (t < 0)
4755 t += adapter->num_rx_desc;
4756 E1000_WRITE_REG(hw, E1000_RDT(i), t);
4757 } else
4758#endif /* DEV_NETMAP */
4759 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4760 }
4761 return;
4762}
4763
4764/*********************************************************************
4765 *
4766 * Free receive rings.
4767 *
4768 **********************************************************************/
4769static void
4770igb_free_receive_structures(struct adapter *adapter)
4771{
4772 struct rx_ring *rxr = adapter->rx_rings;
4773
4774 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4775 struct lro_ctrl *lro = &rxr->lro;
4776 igb_free_receive_buffers(rxr);
4777 tcp_lro_free(lro);
4778 igb_dma_free(adapter, &rxr->rxdma);
4779 }
4780
4781 free(adapter->rx_rings, M_DEVBUF);
4782}
4783
4784/*********************************************************************
4785 *
4786 * Free receive ring data structures.
4787 *
4788 **********************************************************************/
4789static void
4790igb_free_receive_buffers(struct rx_ring *rxr)
4791{
4792 struct adapter *adapter = rxr->adapter;
4793 struct igb_rx_buf *rxbuf;
4794 int i;
4795
4796 INIT_DEBUGOUT("free_receive_structures: begin");
4797
4798 /* Cleanup any existing buffers */
4799 if (rxr->rx_buffers != NULL) {
4800 for (i = 0; i < adapter->num_rx_desc; i++) {
4801 rxbuf = &rxr->rx_buffers[i];
4802 if (rxbuf->m_head != NULL) {
4803 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4804 BUS_DMASYNC_POSTREAD);
4805 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4806 rxbuf->m_head->m_flags |= M_PKTHDR;
4807 m_freem(rxbuf->m_head);
4808 }
4809 if (rxbuf->m_pack != NULL) {
4810 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4811 BUS_DMASYNC_POSTREAD);
4812 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4813 rxbuf->m_pack->m_flags |= M_PKTHDR;
4814 m_freem(rxbuf->m_pack);
4815 }
4816 rxbuf->m_head = NULL;
4817 rxbuf->m_pack = NULL;
4818 if (rxbuf->hmap != NULL) {
4819 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4820 rxbuf->hmap = NULL;
4821 }
4822 if (rxbuf->pmap != NULL) {
4823 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4824 rxbuf->pmap = NULL;
4825 }
4826 }
4827 if (rxr->rx_buffers != NULL) {
4828 free(rxr->rx_buffers, M_DEVBUF);
4829 rxr->rx_buffers = NULL;
4830 }
4831 }
4832
4833 if (rxr->htag != NULL) {
4834 bus_dma_tag_destroy(rxr->htag);
4835 rxr->htag = NULL;
4836 }
4837 if (rxr->ptag != NULL) {
4838 bus_dma_tag_destroy(rxr->ptag);
4839 rxr->ptag = NULL;
4840 }
4841}
4842
4843static __inline void
4844igb_rx_discard(struct rx_ring *rxr, int i)
4845{
4846 struct igb_rx_buf *rbuf;
4847
4848 rbuf = &rxr->rx_buffers[i];
4849
4850 /* Partially received? Free the chain */
4851 if (rxr->fmp != NULL) {
4852 rxr->fmp->m_flags |= M_PKTHDR;
4853 m_freem(rxr->fmp);
4854 rxr->fmp = NULL;
4855 rxr->lmp = NULL;
4856 }
4857
4858 /*
4859 ** With advanced descriptors the writeback
4860 ** clobbers the buffer addrs, so its easier
4861 ** to just free the existing mbufs and take
4862 ** the normal refresh path to get new buffers
4863 ** and mapping.
4864 */
4865 if (rbuf->m_head) {
4866 m_free(rbuf->m_head);
4867 rbuf->m_head = NULL;
4868 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4869 }
4870
4871 if (rbuf->m_pack) {
4872 m_free(rbuf->m_pack);
4873 rbuf->m_pack = NULL;
4874 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4875 }
4876
4877 return;
4878}
4879
4880static __inline void
4881igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4882{
4883
4884 /*
4885 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4886 * should be computed by hardware. Also it should not have VLAN tag in
4887 * ethernet header.
4888 */
4889 if (rxr->lro_enabled &&
4890 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4891 (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4892 (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4893 (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4894 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4895 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4896 /*
4897 * Send to the stack if:
4898 ** - LRO not enabled, or
4899 ** - no LRO resources, or
4900 ** - lro enqueue fails
4901 */
4902 if (rxr->lro.lro_cnt != 0)
4903 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4904 return;
4905 }
4906 IGB_RX_UNLOCK(rxr);
4907 (*ifp->if_input)(ifp, m);
4908 IGB_RX_LOCK(rxr);
4909}
4910
4911/*********************************************************************
4912 *
4913 * This routine executes in interrupt context. It replenishes
4914 * the mbufs in the descriptor and sends data which has been
4915 * dma'ed into host memory to upper layer.
4916 *
4917 * We loop at most count times if count is > 0, or until done if
4918 * count < 0.
4919 *
4920 * Return TRUE if more to clean, FALSE otherwise
4921 *********************************************************************/
4922static bool
4923igb_rxeof(struct igb_queue *que, int count, int *done)
4924{
4925 struct adapter *adapter = que->adapter;
4926 struct rx_ring *rxr = que->rxr;
4927 struct ifnet *ifp = adapter->ifp;
4928 struct lro_ctrl *lro = &rxr->lro;
4929 struct lro_entry *queued;
4930 int i, processed = 0, rxdone = 0;
4931 u32 ptype, staterr = 0;
4932 union e1000_adv_rx_desc *cur;
4933
4934 IGB_RX_LOCK(rxr);
4935 /* Sync the ring. */
4936 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4937 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4938
4939#ifdef DEV_NETMAP
4940 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4941 IGB_RX_UNLOCK(rxr);
4942 return (FALSE);
4943 }
4944#endif /* DEV_NETMAP */
4945
4946 /* Main clean loop */
4947 for (i = rxr->next_to_check; count != 0;) {
4948 struct mbuf *sendmp, *mh, *mp;
4949 struct igb_rx_buf *rxbuf;
4950 u16 hlen, plen, hdr, vtag, pkt_info;
4951 bool eop = FALSE;
4952
4953 cur = &rxr->rx_base[i];
4954 staterr = le32toh(cur->wb.upper.status_error);
4955 if ((staterr & E1000_RXD_STAT_DD) == 0)
4956 break;
4957 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4958 break;
4959 count--;
4960 sendmp = mh = mp = NULL;
4961 cur->wb.upper.status_error = 0;
4962 rxbuf = &rxr->rx_buffers[i];
4963 plen = le16toh(cur->wb.upper.length);
4964 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4965 if (((adapter->hw.mac.type == e1000_i350) ||
4966 (adapter->hw.mac.type == e1000_i354)) &&
4967 (staterr & E1000_RXDEXT_STATERR_LB))
4968 vtag = be16toh(cur->wb.upper.vlan);
4969 else
4970 vtag = le16toh(cur->wb.upper.vlan);
4971 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4972 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
4973 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4974
4975 /*
4976 * Free the frame (all segments) if we're at EOP and
4977 * it's an error.
4978 *
4979 * The datasheet states that EOP + status is only valid for
4980 * the final segment in a multi-segment frame.
4981 */
4982 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4983 adapter->dropped_pkts++;
4984 ++rxr->rx_discarded;
4985 igb_rx_discard(rxr, i);
4986 goto next_desc;
4987 }
4988
4989 /*
4990 ** The way the hardware is configured to
4991 ** split, it will ONLY use the header buffer
4992 ** when header split is enabled, otherwise we
4993 ** get normal behavior, ie, both header and
4994 ** payload are DMA'd into the payload buffer.
4995 **
4996 ** The fmp test is to catch the case where a
4997 ** packet spans multiple descriptors, in that
4998 ** case only the first header is valid.
4999 */
5000 if (rxr->hdr_split && rxr->fmp == NULL) {
5001 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5002 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5003 E1000_RXDADV_HDRBUFLEN_SHIFT;
5004 if (hlen > IGB_HDR_BUF)
5005 hlen = IGB_HDR_BUF;
5006 mh = rxr->rx_buffers[i].m_head;
5007 mh->m_len = hlen;
5008 /* clear buf pointer for refresh */
5009 rxbuf->m_head = NULL;
5010 /*
5011 ** Get the payload length, this
5012 ** could be zero if its a small
5013 ** packet.
5014 */
5015 if (plen > 0) {
5016 mp = rxr->rx_buffers[i].m_pack;
5017 mp->m_len = plen;
5018 mh->m_next = mp;
5019 /* clear buf pointer */
5020 rxbuf->m_pack = NULL;
5021 rxr->rx_split_packets++;
5022 }
5023 } else {
5024 /*
5025 ** Either no header split, or a
5026 ** secondary piece of a fragmented
5027 ** split packet.
5028 */
5029 mh = rxr->rx_buffers[i].m_pack;
5030 mh->m_len = plen;
5031 /* clear buf info for refresh */
5032 rxbuf->m_pack = NULL;
5033 }
5034 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5035
5036 ++processed; /* So we know when to refresh */
5037
5038 /* Initial frame - setup */
5039 if (rxr->fmp == NULL) {
5040 mh->m_pkthdr.len = mh->m_len;
5041 /* Save the head of the chain */
5042 rxr->fmp = mh;
5043 rxr->lmp = mh;
5044 if (mp != NULL) {
5045 /* Add payload if split */
5046 mh->m_pkthdr.len += mp->m_len;
5047 rxr->lmp = mh->m_next;
5048 }
5049 } else {
5050 /* Chain mbuf's together */
5051 rxr->lmp->m_next = mh;
5052 rxr->lmp = rxr->lmp->m_next;
5053 rxr->fmp->m_pkthdr.len += mh->m_len;
5054 }
5055
5056 if (eop) {
5057 rxr->fmp->m_pkthdr.rcvif = ifp;
5058 rxr->rx_packets++;
5059 /* capture data for AIM */
5060 rxr->packets++;
5061 rxr->bytes += rxr->fmp->m_pkthdr.len;
5062 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5063
5064 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5065 igb_rx_checksum(staterr, rxr->fmp, ptype);
5066
5067 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5068 (staterr & E1000_RXD_STAT_VP) != 0) {
5069 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5070 rxr->fmp->m_flags |= M_VLANTAG;
5071 }
5072
5073 /*
5074 * In case of multiqueue, we have RXCSUM.PCSD bit set
5075 * and never cleared. This means we have RSS hash
5076 * available to be used.
5077 */
5078 if (adapter->num_queues > 1) {
5079 rxr->fmp->m_pkthdr.flowid =
5080 le32toh(cur->wb.lower.hi_dword.rss);
5081 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5082 case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5083 M_HASHTYPE_SET(rxr->fmp,
5084 M_HASHTYPE_RSS_TCP_IPV4);
5085 break;
5086 case E1000_RXDADV_RSSTYPE_IPV4:
5087 M_HASHTYPE_SET(rxr->fmp,
5088 M_HASHTYPE_RSS_IPV4);
5089 break;
5090 case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5091 M_HASHTYPE_SET(rxr->fmp,
5092 M_HASHTYPE_RSS_TCP_IPV6);
5093 break;
5094 case E1000_RXDADV_RSSTYPE_IPV6_EX:
5095 M_HASHTYPE_SET(rxr->fmp,
5096 M_HASHTYPE_RSS_IPV6_EX);
5097 break;
5098 case E1000_RXDADV_RSSTYPE_IPV6:
5099 M_HASHTYPE_SET(rxr->fmp,
5100 M_HASHTYPE_RSS_IPV6);
5101 break;
5102 case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5103 M_HASHTYPE_SET(rxr->fmp,
5104 M_HASHTYPE_RSS_TCP_IPV6_EX);
5105 break;
5106 default:
5107 /* XXX fallthrough */
5108 M_HASHTYPE_SET(rxr->fmp,
5109 M_HASHTYPE_OPAQUE);
5110 }
5111 } else {
5112#ifndef IGB_LEGACY_TX
5113 rxr->fmp->m_pkthdr.flowid = que->msix;
5114 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5115#endif
5116 }
5117 sendmp = rxr->fmp;
5118 /* Make sure to set M_PKTHDR. */
5119 sendmp->m_flags |= M_PKTHDR;
5120 rxr->fmp = NULL;
5121 rxr->lmp = NULL;
5122 }
5123
5124next_desc:
5125 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5126 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5127
5128 /* Advance our pointers to the next descriptor. */
5129 if (++i == adapter->num_rx_desc)
5130 i = 0;
5131 /*
5132 ** Send to the stack or LRO
5133 */
5134 if (sendmp != NULL) {
5135 rxr->next_to_check = i;
5136 igb_rx_input(rxr, ifp, sendmp, ptype);
5137 i = rxr->next_to_check;
5138 rxdone++;
5139 }
5140
5141 /* Every 8 descriptors we go to refresh mbufs */
5142 if (processed == 8) {
5143 igb_refresh_mbufs(rxr, i);
5144 processed = 0;
5145 }
5146 }
5147
5148 /* Catch any remainders */
5149 if (igb_rx_unrefreshed(rxr))
5150 igb_refresh_mbufs(rxr, i);
5151
5152 rxr->next_to_check = i;
5153
5154 /*
5155 * Flush any outstanding LRO work
5156 */
5157 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5158 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5159 tcp_lro_flush(lro, queued);
5160 }
5161
5162 if (done != NULL)
5163 *done += rxdone;
5164
5165 IGB_RX_UNLOCK(rxr);
5166 return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5167}
5168
5169/*********************************************************************
5170 *
5171 * Verify that the hardware indicated that the checksum is valid.
5172 * Inform the stack about the status of checksum so that stack
5173 * doesn't spend time verifying the checksum.
5174 *
5175 *********************************************************************/
5176static void
5177igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5178{
5179 u16 status = (u16)staterr;
5180 u8 errors = (u8) (staterr >> 24);
5181 int sctp;
5182
5183 /* Ignore Checksum bit is set */
5184 if (status & E1000_RXD_STAT_IXSM) {
5185 mp->m_pkthdr.csum_flags = 0;
5186 return;
5187 }
5188
5189 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5190 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5191 sctp = 1;
5192 else
5193 sctp = 0;
5194 if (status & E1000_RXD_STAT_IPCS) {
5195 /* Did it pass? */
5196 if (!(errors & E1000_RXD_ERR_IPE)) {
5197 /* IP Checksum Good */
5198 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5199 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5200 } else
5201 mp->m_pkthdr.csum_flags = 0;
5202 }
5203
5204 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5205 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5206#if __FreeBSD_version >= 800000
5207 if (sctp) /* reassign */
5208 type = CSUM_SCTP_VALID;
5209#endif
5210 /* Did it pass? */
5211 if (!(errors & E1000_RXD_ERR_TCPE)) {
5212 mp->m_pkthdr.csum_flags |= type;
5213 if (sctp == 0)
5214 mp->m_pkthdr.csum_data = htons(0xffff);
5215 }
5216 }
5217 return;
5218}
5219
5220/*
5221 * This routine is run via an vlan
5222 * config EVENT
5223 */
5224static void
5225igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5226{
5227 struct adapter *adapter = ifp->if_softc;
5228 u32 index, bit;
5229
5230 if (ifp->if_softc != arg) /* Not our event */
5231 return;
5232
5233 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5234 return;
5235
5236 IGB_CORE_LOCK(adapter);
5237 index = (vtag >> 5) & 0x7F;
5238 bit = vtag & 0x1F;
5239 adapter->shadow_vfta[index] |= (1 << bit);
5240 ++adapter->num_vlans;
5241 /* Change hw filter setting */
5242 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5243 igb_setup_vlan_hw_support(adapter);
5244 IGB_CORE_UNLOCK(adapter);
5245}
5246
5247/*
5248 * This routine is run via an vlan
5249 * unconfig EVENT
5250 */
5251static void
5252igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5253{
5254 struct adapter *adapter = ifp->if_softc;
5255 u32 index, bit;
5256
5257 if (ifp->if_softc != arg)
5258 return;
5259
5260 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5261 return;
5262
5263 IGB_CORE_LOCK(adapter);
5264 index = (vtag >> 5) & 0x7F;
5265 bit = vtag & 0x1F;
5266 adapter->shadow_vfta[index] &= ~(1 << bit);
5267 --adapter->num_vlans;
5268 /* Change hw filter setting */
5269 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5270 igb_setup_vlan_hw_support(adapter);
5271 IGB_CORE_UNLOCK(adapter);
5272}
5273
5274static void
5275igb_setup_vlan_hw_support(struct adapter *adapter)
5276{
5277 struct e1000_hw *hw = &adapter->hw;
5278 struct ifnet *ifp = adapter->ifp;
5279 u32 reg;
5280
5281 if (adapter->vf_ifp) {
5282 e1000_rlpml_set_vf(hw,
5283 adapter->max_frame_size + VLAN_TAG_SIZE);
5284 return;
5285 }
5286
5287 reg = E1000_READ_REG(hw, E1000_CTRL);
5288 reg |= E1000_CTRL_VME;
5289 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5290
5291 /* Enable the Filter Table */
5292 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5293 reg = E1000_READ_REG(hw, E1000_RCTL);
5294 reg &= ~E1000_RCTL_CFIEN;
5295 reg |= E1000_RCTL_VFE;
5296 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5297 }
5298
5299 /* Update the frame size */
5300 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5301 adapter->max_frame_size + VLAN_TAG_SIZE);
5302
5303 /* Don't bother with table if no vlans */
5304 if ((adapter->num_vlans == 0) ||
5305 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5306 return;
5307 /*
5308 ** A soft reset zero's out the VFTA, so
5309 ** we need to repopulate it now.
5310 */
5311 for (int i = 0; i < IGB_VFTA_SIZE; i++)
5312 if (adapter->shadow_vfta[i] != 0) {
5313 if (adapter->vf_ifp)
5314 e1000_vfta_set_vf(hw,
5315 adapter->shadow_vfta[i], TRUE);
5316 else
5317 e1000_write_vfta(hw,
5318 i, adapter->shadow_vfta[i]);
5319 }
5320}
5321
5322static void
5323igb_enable_intr(struct adapter *adapter)
5324{
5325 /* With RSS set up what to auto clear */
5326 if (adapter->msix_mem) {
5327 u32 mask = (adapter->que_mask | adapter->link_mask);
5328 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5329 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5330 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5331 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5332 E1000_IMS_LSC);
5333 } else {
5334 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5335 IMS_ENABLE_MASK);
5336 }
5337 E1000_WRITE_FLUSH(&adapter->hw);
5338
5339 return;
5340}
5341
5342static void
5343igb_disable_intr(struct adapter *adapter)
5344{
5345 if (adapter->msix_mem) {
5346 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5347 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5348 }
5349 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5350 E1000_WRITE_FLUSH(&adapter->hw);
5351 return;
5352}
5353
5354/*
5355 * Bit of a misnomer, what this really means is
5356 * to enable OS management of the system... aka
5357 * to disable special hardware management features
5358 */
5359static void
5360igb_init_manageability(struct adapter *adapter)
5361{
5362 if (adapter->has_manage) {
5363 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5364 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5365
5366 /* disable hardware interception of ARP */
5367 manc &= ~(E1000_MANC_ARP_EN);
5368
5369 /* enable receiving management packets to the host */
5370 manc |= E1000_MANC_EN_MNG2HOST;
5371 manc2h |= 1 << 5; /* Mng Port 623 */
5372 manc2h |= 1 << 6; /* Mng Port 664 */
5373 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5374 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5375 }
5376}
5377
5378/*
5379 * Give control back to hardware management
5380 * controller if there is one.
5381 */
5382static void
5383igb_release_manageability(struct adapter *adapter)
5384{
5385 if (adapter->has_manage) {
5386 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5387
5388 /* re-enable hardware interception of ARP */
5389 manc |= E1000_MANC_ARP_EN;
5390 manc &= ~E1000_MANC_EN_MNG2HOST;
5391
5392 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5393 }
5394}
5395
5396/*
5397 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5398 * For ASF and Pass Through versions of f/w this means that
5399 * the driver is loaded.
5400 *
5401 */
5402static void
5403igb_get_hw_control(struct adapter *adapter)
5404{
5405 u32 ctrl_ext;
5406
5407 if (adapter->vf_ifp)
5408 return;
5409
5410 /* Let firmware know the driver has taken over */
5411 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5412 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5413 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5414}
5415
5416/*
5417 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5418 * For ASF and Pass Through versions of f/w this means that the
5419 * driver is no longer loaded.
5420 *
5421 */
5422static void
5423igb_release_hw_control(struct adapter *adapter)
5424{
5425 u32 ctrl_ext;
5426
5427 if (adapter->vf_ifp)
5428 return;
5429
5430 /* Let firmware taken over control of h/w */
5431 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5432 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5433 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5434}
5435
5436static int
5437igb_is_valid_ether_addr(uint8_t *addr)
5438{
5439 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5440
5441 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5442 return (FALSE);
5443 }
5444
5445 return (TRUE);
5446}
5447
5448
5449/*
5450 * Enable PCI Wake On Lan capability
5451 */
5452static void
5453igb_enable_wakeup(device_t dev)
5454{
5455 u16 cap, status;
5456 u8 id;
5457
5458 /* First find the capabilities pointer*/
5459 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5460 /* Read the PM Capabilities */
5461 id = pci_read_config(dev, cap, 1);
5462 if (id != PCIY_PMG) /* Something wrong */
5463 return;
5464 /* OK, we have the power capabilities, so
5465 now get the status register */
5466 cap += PCIR_POWER_STATUS;
5467 status = pci_read_config(dev, cap, 2);
5468 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5469 pci_write_config(dev, cap, status, 2);
5470 return;
5471}
5472
5473static void
5474igb_led_func(void *arg, int onoff)
5475{
5476 struct adapter *adapter = arg;
5477
5478 IGB_CORE_LOCK(adapter);
5479 if (onoff) {
5480 e1000_setup_led(&adapter->hw);
5481 e1000_led_on(&adapter->hw);
5482 } else {
5483 e1000_led_off(&adapter->hw);
5484 e1000_cleanup_led(&adapter->hw);
5485 }
5486 IGB_CORE_UNLOCK(adapter);
5487}
5488
5489static uint64_t
5490igb_get_vf_counter(if_t ifp, ift_counter cnt)
5491{
5492 struct adapter *adapter;
5493 struct e1000_vf_stats *stats;
5494#ifndef IGB_LEGACY_TX
5495 struct tx_ring *txr;
5496 uint64_t rv;
5497#endif
5498
5499 adapter = if_getsoftc(ifp);
5500 stats = (struct e1000_vf_stats *)adapter->stats;
5501
5502 switch (cnt) {
5503 case IFCOUNTER_IPACKETS:
5504 return (stats->gprc);
5505 case IFCOUNTER_OPACKETS:
5506 return (stats->gptc);
5507 case IFCOUNTER_IBYTES:
5508 return (stats->gorc);
5509 case IFCOUNTER_OBYTES:
5510 return (stats->gotc);
5511 case IFCOUNTER_IMCASTS:
5512 return (stats->mprc);
5513 case IFCOUNTER_IERRORS:
5514 return (adapter->dropped_pkts);
5515 case IFCOUNTER_OERRORS:
5516 return (adapter->watchdog_events);
5517#ifndef IGB_LEGACY_TX
5518 case IFCOUNTER_OQDROPS:
5519 rv = 0;
5520 txr = adapter->tx_rings;
5521 for (int i = 0; i < adapter->num_queues; i++, txr++)
5522 rv += txr->br->br_drops;
5523 return (rv);
5524#endif
5525 default:
5526 return (if_get_counter_default(ifp, cnt));
5527 }
5528}
5529
5530static uint64_t
5531igb_get_counter(if_t ifp, ift_counter cnt)
5532{
5533 struct adapter *adapter;
5534 struct e1000_hw_stats *stats;
5535#ifndef IGB_LEGACY_TX
5536 struct tx_ring *txr;
5537 uint64_t rv;
5538#endif
5539
5540 adapter = if_getsoftc(ifp);
5541 if (adapter->vf_ifp)
5542 return (igb_get_vf_counter(ifp, cnt));
5543
5544 stats = (struct e1000_hw_stats *)adapter->stats;
5545
5546 switch (cnt) {
5547 case IFCOUNTER_IPACKETS:
5548 return (stats->gprc);
5549 case IFCOUNTER_OPACKETS:
5550 return (stats->gptc);
5551 case IFCOUNTER_IBYTES:
5552 return (stats->gorc);
5553 case IFCOUNTER_OBYTES:
5554 return (stats->gotc);
5555 case IFCOUNTER_IMCASTS:
5556 return (stats->mprc);
5557 case IFCOUNTER_OMCASTS:
5558 return (stats->mptc);
5559 case IFCOUNTER_IERRORS:
5560 return (adapter->dropped_pkts + stats->rxerrc +
5561 stats->crcerrs + stats->algnerrc +
5562 stats->ruc + stats->roc + stats->cexterr);
5563 case IFCOUNTER_OERRORS:
5564 return (stats->ecol + stats->latecol +
5565 adapter->watchdog_events);
5566 case IFCOUNTER_COLLISIONS:
5567 return (stats->colc);
5568 case IFCOUNTER_IQDROPS:
5569 return (stats->mpc);
5570#ifndef IGB_LEGACY_TX
5571 case IFCOUNTER_OQDROPS:
5572 rv = 0;
5573 txr = adapter->tx_rings;
5574 for (int i = 0; i < adapter->num_queues; i++, txr++)
5575 rv += txr->br->br_drops;
5576 return (rv);
5577#endif
5578 default:
5579 return (if_get_counter_default(ifp, cnt));
5580 }
5581}
5582
5583/**********************************************************************
5584 *
5585 * Update the board statistics counters.
5586 *
5587 **********************************************************************/
5588static void
5589igb_update_stats_counters(struct adapter *adapter)
5590{
5591 struct e1000_hw *hw = &adapter->hw;
5592 struct e1000_hw_stats *stats;
5593
5594 /*
5595 ** The virtual function adapter has only a
5596 ** small controlled set of stats, do only
5597 ** those and return.
5598 */
5599 if (adapter->vf_ifp) {
5600 igb_update_vf_stats_counters(adapter);
5601 return;
5602 }
5603
5604 stats = (struct e1000_hw_stats *)adapter->stats;
5605
5606 if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5607 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5608 stats->symerrs +=
5609 E1000_READ_REG(hw,E1000_SYMERRS);
5610 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5611 }
5612
5613 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5614 stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5615 stats->scc += E1000_READ_REG(hw, E1000_SCC);
5616 stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5617
5618 stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5619 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5620 stats->colc += E1000_READ_REG(hw, E1000_COLC);
5621 stats->dc += E1000_READ_REG(hw, E1000_DC);
5622 stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5623 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5624 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5625 /*
5626 ** For watchdog management we need to know if we have been
5627 ** paused during the last interval, so capture that here.
5628 */
5629 adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5630 stats->xoffrxc += adapter->pause_frames;
5631 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5632 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5633 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5634 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5635 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5636 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5637 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5638 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5639 stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5640 stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5641 stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5642 stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5643
5644 /* For the 64-bit byte counters the low dword must be read first. */
5645 /* Both registers clear on the read of the high dword */
5646
5647 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5648 ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5649 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5650 ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5651
5652 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5653 stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5654 stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5655 stats->roc += E1000_READ_REG(hw, E1000_ROC);
5656 stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5657
5658 stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5659 stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5660 stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5661
5662 stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5663 ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5664 stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5665 ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5666
5667 stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5668 stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5669 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5670 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5671 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5672 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5673 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5674 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5675 stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5676 stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5677
5678 /* Interrupt Counts */
5679
5680 stats->iac += E1000_READ_REG(hw, E1000_IAC);
5681 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5682 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5683 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5684 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5685 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5686 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5687 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5688 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5689
5690 /* Host to Card Statistics */
5691
5692 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5693 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5694 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5695 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5696 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5697 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5698 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5699 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5700 ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5701 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5702 ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5703 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5704 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5705 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5706
5707 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5708 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5709 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5710 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5711 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5712 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5713
5714 /* Driver specific counters */
5715 adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5716 adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5717 adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5718 adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5719 adapter->packet_buf_alloc_tx =
5720 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5721 adapter->packet_buf_alloc_rx =
5722 (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5723}
5724
5725
5726/**********************************************************************
5727 *
5728 * Initialize the VF board statistics counters.
5729 *
5730 **********************************************************************/
5731static void
5732igb_vf_init_stats(struct adapter *adapter)
5733{
5734 struct e1000_hw *hw = &adapter->hw;
5735 struct e1000_vf_stats *stats;
5736
5737 stats = (struct e1000_vf_stats *)adapter->stats;
5738 if (stats == NULL)
5739 return;
5740 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5741 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5742 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5743 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5744 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5745}
5746
5747/**********************************************************************
5748 *
5749 * Update the VF board statistics counters.
5750 *
5751 **********************************************************************/
5752static void
5753igb_update_vf_stats_counters(struct adapter *adapter)
5754{
5755 struct e1000_hw *hw = &adapter->hw;
5756 struct e1000_vf_stats *stats;
5757
5758 if (adapter->link_speed == 0)
5759 return;
5760
5761 stats = (struct e1000_vf_stats *)adapter->stats;
5762
5763 UPDATE_VF_REG(E1000_VFGPRC,
5764 stats->last_gprc, stats->gprc);
5765 UPDATE_VF_REG(E1000_VFGORC,
5766 stats->last_gorc, stats->gorc);
5767 UPDATE_VF_REG(E1000_VFGPTC,
5768 stats->last_gptc, stats->gptc);
5769 UPDATE_VF_REG(E1000_VFGOTC,
5770 stats->last_gotc, stats->gotc);
5771 UPDATE_VF_REG(E1000_VFMPRC,
5772 stats->last_mprc, stats->mprc);
5773}
5774
5775/* Export a single 32-bit register via a read-only sysctl. */
5776static int
5777igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5778{
5779 struct adapter *adapter;
5780 u_int val;
5781
5782 adapter = oidp->oid_arg1;
5783 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5784 return (sysctl_handle_int(oidp, &val, 0, req));
5785}
5786
5787/*
5788** Tuneable interrupt rate handler
5789*/
5790static int
5791igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5792{
5793 struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1);
5794 int error;
5795 u32 reg, usec, rate;
5796
5797 reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5798 usec = ((reg & 0x7FFC) >> 2);
5799 if (usec > 0)
5800 rate = 1000000 / usec;
5801 else
5802 rate = 0;
5803 error = sysctl_handle_int(oidp, &rate, 0, req);
5804 if (error || !req->newptr)
5805 return error;
5806 return 0;
5807}
5808
5809/*
5810 * Add sysctl variables, one per statistic, to the system.
5811 */
5812static void
5813igb_add_hw_stats(struct adapter *adapter)
5814{
5815 device_t dev = adapter->dev;
5816
5817 struct tx_ring *txr = adapter->tx_rings;
5818 struct rx_ring *rxr = adapter->rx_rings;
5819
5820 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5821 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5822 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5823 struct e1000_hw_stats *stats = adapter->stats;
5824
5825 struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5826 struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5827
5828#define QUEUE_NAME_LEN 32
5829 char namebuf[QUEUE_NAME_LEN];
5830
5831 /* Driver Statistics */
5832 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5833 CTLFLAG_RD, &adapter->dropped_pkts,
5834 "Driver dropped packets");
5835 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5836 CTLFLAG_RD, &adapter->link_irq,
5837 "Link MSIX IRQ Handled");
5838 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5839 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5840 "Defragmenting mbuf chain failed");
5841 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5842 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5843 "Driver tx dma failure in xmit");
5844 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5845 CTLFLAG_RD, &adapter->rx_overruns,
5846 "RX overruns");
5847 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5848 CTLFLAG_RD, &adapter->watchdog_events,
5849 "Watchdog timeouts");
5850
5851 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5852 CTLFLAG_RD, &adapter->device_control,
5853 "Device Control Register");
5854 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5855 CTLFLAG_RD, &adapter->rx_control,
5856 "Receiver Control Register");
5857 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5858 CTLFLAG_RD, &adapter->int_mask,
5859 "Interrupt Mask");
5860 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5861 CTLFLAG_RD, &adapter->eint_mask,
5862 "Extended Interrupt Mask");
5863 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5864 CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5865 "Transmit Buffer Packet Allocation");
5866 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5867 CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5868 "Receive Buffer Packet Allocation");
5869 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5870 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5871 "Flow Control High Watermark");
5872 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5873 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5874 "Flow Control Low Watermark");
5875
5876 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5877 struct lro_ctrl *lro = &rxr->lro;
5878
5879 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5880 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5881 CTLFLAG_RD, NULL, "Queue Name");
5882 queue_list = SYSCTL_CHILDREN(queue_node);
5883
5884 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5885 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5886 sizeof(&adapter->queues[i]),
5887 igb_sysctl_interrupt_rate_handler,
5888 "IU", "Interrupt Rate");
5889
5890 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5891 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5892 igb_sysctl_reg_handler, "IU",
5893 "Transmit Descriptor Head");
5894 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5895 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5896 igb_sysctl_reg_handler, "IU",
5897 "Transmit Descriptor Tail");
5898 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5899 CTLFLAG_RD, &txr->no_desc_avail,
5900 "Queue Descriptors Unavailable");
5901 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5902 CTLFLAG_RD, &txr->total_packets,
5903 "Queue Packets Transmitted");
5904
5905 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5906 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5907 igb_sysctl_reg_handler, "IU",
5908 "Receive Descriptor Head");
5909 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5910 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5911 igb_sysctl_reg_handler, "IU",
5912 "Receive Descriptor Tail");
5913 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5914 CTLFLAG_RD, &rxr->rx_packets,
5915 "Queue Packets Received");
5916 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5917 CTLFLAG_RD, &rxr->rx_bytes,
5918 "Queue Bytes Received");
5919 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
5920 CTLFLAG_RD, &lro->lro_queued, 0,
5921 "LRO Queued");
5922 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
5923 CTLFLAG_RD, &lro->lro_flushed, 0,
5924 "LRO Flushed");
5925 }
5926
5927 /* MAC stats get their own sub node */
5928
5929 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5930 CTLFLAG_RD, NULL, "MAC Statistics");
5931 stat_list = SYSCTL_CHILDREN(stat_node);
5932
5933 /*
5934 ** VF adapter has a very limited set of stats
5935 ** since its not managing the metal, so to speak.
5936 */
5937 if (adapter->vf_ifp) {
5938 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5939 CTLFLAG_RD, &stats->gprc,
5940 "Good Packets Received");
5941 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5942 CTLFLAG_RD, &stats->gptc,
5943 "Good Packets Transmitted");
5944 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5945 CTLFLAG_RD, &stats->gorc,
5946 "Good Octets Received");
5947 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5948 CTLFLAG_RD, &stats->gotc,
5949 "Good Octets Transmitted");
5950 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5951 CTLFLAG_RD, &stats->mprc,
5952 "Multicast Packets Received");
5953 return;
5954 }
5955
5956 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5957 CTLFLAG_RD, &stats->ecol,
5958 "Excessive collisions");
5959 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5960 CTLFLAG_RD, &stats->scc,
5961 "Single collisions");
5962 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5963 CTLFLAG_RD, &stats->mcc,
5964 "Multiple collisions");
5965 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5966 CTLFLAG_RD, &stats->latecol,
5967 "Late collisions");
5968 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5969 CTLFLAG_RD, &stats->colc,
5970 "Collision Count");
5971 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5972 CTLFLAG_RD, &stats->symerrs,
5973 "Symbol Errors");
5974 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5975 CTLFLAG_RD, &stats->sec,
5976 "Sequence Errors");
5977 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5978 CTLFLAG_RD, &stats->dc,
5979 "Defer Count");
5980 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5981 CTLFLAG_RD, &stats->mpc,
5982 "Missed Packets");
5983 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5984 CTLFLAG_RD, &stats->rlec,
5985 "Receive Length Errors");
5986 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5987 CTLFLAG_RD, &stats->rnbc,
5988 "Receive No Buffers");
5989 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5990 CTLFLAG_RD, &stats->ruc,
5991 "Receive Undersize");
5992 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5993 CTLFLAG_RD, &stats->rfc,
5994 "Fragmented Packets Received");
5995 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5996 CTLFLAG_RD, &stats->roc,
5997 "Oversized Packets Received");
5998 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5999 CTLFLAG_RD, &stats->rjc,
6000 "Recevied Jabber");
6001 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6002 CTLFLAG_RD, &stats->rxerrc,
6003 "Receive Errors");
6004 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6005 CTLFLAG_RD, &stats->crcerrs,
6006 "CRC errors");
6007 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6008 CTLFLAG_RD, &stats->algnerrc,
6009 "Alignment Errors");
6010 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6011 CTLFLAG_RD, &stats->tncrs,
6012 "Transmit with No CRS");
6013 /* On 82575 these are collision counts */
6014 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6015 CTLFLAG_RD, &stats->cexterr,
6016 "Collision/Carrier extension errors");
6017 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6018 CTLFLAG_RD, &stats->xonrxc,
6019 "XON Received");
6020 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6021 CTLFLAG_RD, &stats->xontxc,
6022 "XON Transmitted");
6023 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6024 CTLFLAG_RD, &stats->xoffrxc,
6025 "XOFF Received");
6026 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6027 CTLFLAG_RD, &stats->xofftxc,
6028 "XOFF Transmitted");
6029 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6030 CTLFLAG_RD, &stats->fcruc,
6031 "Unsupported Flow Control Received");
6032 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6033 CTLFLAG_RD, &stats->mgprc,
6034 "Management Packets Received");
6035 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6036 CTLFLAG_RD, &stats->mgpdc,
6037 "Management Packets Dropped");
6038 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6039 CTLFLAG_RD, &stats->mgptc,
6040 "Management Packets Transmitted");
6041 /* Packet Reception Stats */
6042 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6043 CTLFLAG_RD, &stats->tpr,
6044 "Total Packets Received");
6045 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6046 CTLFLAG_RD, &stats->gprc,
6047 "Good Packets Received");
6048 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6049 CTLFLAG_RD, &stats->bprc,
6050 "Broadcast Packets Received");
6051 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6052 CTLFLAG_RD, &stats->mprc,
6053 "Multicast Packets Received");
6054 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6055 CTLFLAG_RD, &stats->prc64,
6056 "64 byte frames received");
6057 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6058 CTLFLAG_RD, &stats->prc127,
6059 "65-127 byte frames received");
6060 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6061 CTLFLAG_RD, &stats->prc255,
6062 "128-255 byte frames received");
6063 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6064 CTLFLAG_RD, &stats->prc511,
6065 "256-511 byte frames received");
6066 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6067 CTLFLAG_RD, &stats->prc1023,
6068 "512-1023 byte frames received");
6069 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6070 CTLFLAG_RD, &stats->prc1522,
6071 "1023-1522 byte frames received");
6072 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6073 CTLFLAG_RD, &stats->gorc,
6074 "Good Octets Received");
6075 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
6076 CTLFLAG_RD, &stats->tor,
6077 "Total Octets Received");
6078
6079 /* Packet Transmission Stats */
6080 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6081 CTLFLAG_RD, &stats->gotc,
6082 "Good Octets Transmitted");
6083 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
6084 CTLFLAG_RD, &stats->tot,
6085 "Total Octets Transmitted");
6086 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6087 CTLFLAG_RD, &stats->tpt,
6088 "Total Packets Transmitted");
6089 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6090 CTLFLAG_RD, &stats->gptc,
6091 "Good Packets Transmitted");
6092 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6093 CTLFLAG_RD, &stats->bptc,
6094 "Broadcast Packets Transmitted");
6095 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6096 CTLFLAG_RD, &stats->mptc,
6097 "Multicast Packets Transmitted");
6098 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6099 CTLFLAG_RD, &stats->ptc64,
6100 "64 byte frames transmitted");
6101 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6102 CTLFLAG_RD, &stats->ptc127,
6103 "65-127 byte frames transmitted");
6104 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6105 CTLFLAG_RD, &stats->ptc255,
6106 "128-255 byte frames transmitted");
6107 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6108 CTLFLAG_RD, &stats->ptc511,
6109 "256-511 byte frames transmitted");
6110 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6111 CTLFLAG_RD, &stats->ptc1023,
6112 "512-1023 byte frames transmitted");
6113 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6114 CTLFLAG_RD, &stats->ptc1522,
6115 "1024-1522 byte frames transmitted");
6116 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6117 CTLFLAG_RD, &stats->tsctc,
6118 "TSO Contexts Transmitted");
6119 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6120 CTLFLAG_RD, &stats->tsctfc,
6121 "TSO Contexts Failed");
6122
6123
6124 /* Interrupt Stats */
6125
6126 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
6127 CTLFLAG_RD, NULL, "Interrupt Statistics");
6128 int_list = SYSCTL_CHILDREN(int_node);
6129
6130 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6131 CTLFLAG_RD, &stats->iac,
6132 "Interrupt Assertion Count");
6133
6134 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6135 CTLFLAG_RD, &stats->icrxptc,
6136 "Interrupt Cause Rx Pkt Timer Expire Count");
6137
6138 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6139 CTLFLAG_RD, &stats->icrxatc,
6140 "Interrupt Cause Rx Abs Timer Expire Count");
6141
6142 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6143 CTLFLAG_RD, &stats->ictxptc,
6144 "Interrupt Cause Tx Pkt Timer Expire Count");
6145
6146 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6147 CTLFLAG_RD, &stats->ictxatc,
6148 "Interrupt Cause Tx Abs Timer Expire Count");
6149
6150 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6151 CTLFLAG_RD, &stats->ictxqec,
6152 "Interrupt Cause Tx Queue Empty Count");
6153
6154 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6155 CTLFLAG_RD, &stats->ictxqmtc,
6156 "Interrupt Cause Tx Queue Min Thresh Count");
6157
6158 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6159 CTLFLAG_RD, &stats->icrxdmtc,
6160 "Interrupt Cause Rx Desc Min Thresh Count");
6161
6162 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6163 CTLFLAG_RD, &stats->icrxoc,
6164 "Interrupt Cause Receiver Overrun Count");
6165
6166 /* Host to Card Stats */
6167
6168 host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
6169 CTLFLAG_RD, NULL,
6170 "Host to Card Statistics");
6171
6172 host_list = SYSCTL_CHILDREN(host_node);
6173
6174 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6175 CTLFLAG_RD, &stats->cbtmpc,
6176 "Circuit Breaker Tx Packet Count");
6177
6178 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6179 CTLFLAG_RD, &stats->htdpmc,
6180 "Host Transmit Discarded Packets");
6181
6182 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6183 CTLFLAG_RD, &stats->rpthc,
6184 "Rx Packets To Host");
6185
6186 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6187 CTLFLAG_RD, &stats->cbrmpc,
6188 "Circuit Breaker Rx Packet Count");
6189
6190 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6191 CTLFLAG_RD, &stats->cbrdpc,
6192 "Circuit Breaker Rx Dropped Count");
6193
6194 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6195 CTLFLAG_RD, &stats->hgptc,
6196 "Host Good Packets Tx Count");
6197
6198 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6199 CTLFLAG_RD, &stats->htcbdpc,
6200 "Host Tx Circuit Breaker Dropped Count");
6201
6202 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6203 CTLFLAG_RD, &stats->hgorc,
6204 "Host Good Octets Received Count");
6205
6206 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6207 CTLFLAG_RD, &stats->hgotc,
6208 "Host Good Octets Transmit Count");
6209
6210 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6211 CTLFLAG_RD, &stats->lenerrs,
6212 "Length Errors");
6213
6214 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6215 CTLFLAG_RD, &stats->scvpc,
6216 "SerDes/SGMII Code Violation Pkt Count");
6217
6218 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6219 CTLFLAG_RD, &stats->hrmpc,
6220 "Header Redirection Missed Packet Count");
6221}
6222
6223
6224/**********************************************************************
6225 *
6226 * This routine provides a way to dump out the adapter eeprom,
6227 * often a useful debug/service tool. This only dumps the first
6228 * 32 words, stuff that matters is in that extent.
6229 *
6230 **********************************************************************/
6231static int
6232igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6233{
6234 struct adapter *adapter;
6235 int error;
6236 int result;
6237
6238 result = -1;
6239 error = sysctl_handle_int(oidp, &result, 0, req);
6240
6241 if (error || !req->newptr)
6242 return (error);
6243
6244 /*
6245 * This value will cause a hex dump of the
6246 * first 32 16-bit words of the EEPROM to
6247 * the screen.
6248 */
6249 if (result == 1) {
6250 adapter = (struct adapter *)arg1;
6251 igb_print_nvm_info(adapter);
6252 }
6253
6254 return (error);
6255}
6256
6257static void
6258igb_print_nvm_info(struct adapter *adapter)
6259{
6260 u16 eeprom_data;
6261 int i, j, row = 0;
6262
6263 /* Its a bit crude, but it gets the job done */
6264 printf("\nInterface EEPROM Dump:\n");
6265 printf("Offset\n0x0000 ");
6266 for (i = 0, j = 0; i < 32; i++, j++) {
6267 if (j == 8) { /* Make the offset block */
6268 j = 0; ++row;
6269 printf("\n0x00%x0 ",row);
6270 }
6271 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6272 printf("%04x ", eeprom_data);
6273 }
6274 printf("\n");
6275}
6276
6277static void
6278igb_set_sysctl_value(struct adapter *adapter, const char *name,
6279 const char *description, int *limit, int value)
6280{
6281 *limit = value;
6282 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6283 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6284 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6285}
6286
6287/*
6288** Set flow control using sysctl:
6289** Flow control values:
6290** 0 - off
6291** 1 - rx pause
6292** 2 - tx pause
6293** 3 - full
6294*/
6295static int
6296igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6297{
6298 int error;
6299 static int input = 3; /* default is full */
6300 struct adapter *adapter = (struct adapter *) arg1;
6301
6302 error = sysctl_handle_int(oidp, &input, 0, req);
6303
6304 if ((error) || (req->newptr == NULL))
6305 return (error);
6306
6307 switch (input) {
6308 case e1000_fc_rx_pause:
6309 case e1000_fc_tx_pause:
6310 case e1000_fc_full:
6311 case e1000_fc_none:
6312 adapter->hw.fc.requested_mode = input;
6313 adapter->fc = input;
6314 break;
6315 default:
6316 /* Do nothing */
6317 return (error);
6318 }
6319
6320 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6321 e1000_force_mac_fc(&adapter->hw);
6322 /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6323 return (error);
6324}
6325
6326/*
6327** Manage DMA Coalesce:
6328** Control values:
6329** 0/1 - off/on
6330** Legal timer values are:
6331** 250,500,1000-10000 in thousands
6332*/
6333static int
6334igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6335{
6336 struct adapter *adapter = (struct adapter *) arg1;
6337 int error;
6338
6339 error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6340
6341 if ((error) || (req->newptr == NULL))
6342 return (error);
6343
6344 switch (adapter->dmac) {
6345 case 0:
6346 /* Disabling */
6347 break;
6348 case 1: /* Just enable and use default */
6349 adapter->dmac = 1000;
6350 break;
6351 case 250:
6352 case 500:
6353 case 1000:
6354 case 2000:
6355 case 3000:
6356 case 4000:
6357 case 5000:
6358 case 6000:
6359 case 7000:
6360 case 8000:
6361 case 9000:
6362 case 10000:
6363 /* Legal values - allow */
6364 break;
6365 default:
6366 /* Do nothing, illegal value */
6367 adapter->dmac = 0;
6368 return (EINVAL);
6369 }
6370 /* Reinit the interface */
6371 igb_init(adapter);
6372 return (error);
6373}
6374
6375/*
6376** Manage Energy Efficient Ethernet:
6377** Control values:
6378** 0/1 - enabled/disabled
6379*/
6380static int
6381igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6382{
6383 struct adapter *adapter = (struct adapter *) arg1;
6384 int error, value;
6385
6386 value = adapter->hw.dev_spec._82575.eee_disable;
6387 error = sysctl_handle_int(oidp, &value, 0, req);
6388 if (error || req->newptr == NULL)
6389 return (error);
6390 IGB_CORE_LOCK(adapter);
6391 adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6392 igb_init_locked(adapter);
6393 IGB_CORE_UNLOCK(adapter);
6394 return (0);
6395}