Deleted Added
sdiff udiff text old ( 294327 ) new ( 295323 )
full compact
1/******************************************************************************
2
3 Copyright (c) 2001-2015, Intel Corporation
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
15
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: head/sys/dev/e1000/if_igb.c 294327 2016-01-19 15:33:28Z hselasky $*/
34
35
36#include "opt_inet.h"
37#include "opt_inet6.h"
38#include "opt_rss.h"
39
40#ifdef HAVE_KERNEL_OPTION_HEADERS
41#include "opt_device_polling.h"
42#include "opt_altq.h"
43#endif
44
45#include "if_igb.h"
46
47/*********************************************************************
48 * Driver version:
49 *********************************************************************/
50char igb_driver_version[] = "2.5.2";
51
52
53/*********************************************************************
54 * PCI Device ID Table
55 *
56 * Used by probe to select devices to load on
57 * Last field stores an index into e1000_strings
58 * Last entry must be all 0s
59 *
60 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
61 *********************************************************************/
62
63static igb_vendor_info_t igb_vendor_info_array[] =
64{
65 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0},
66 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0},
67 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0},
68 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0},
69 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0},
70 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0},
71 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0},
72 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0},
73 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0},
74 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0},
75 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0},
76 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0},
77 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0},
78 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0},
79 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0},
80 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0},
81 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0},
82 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0},
83 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0},
84 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0},
85 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0},
86 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0},
87 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0},
88 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0},
89 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0},
90 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0},
91 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0},
92 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0},
93 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0},
94 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0},
95 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0},
96 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0},
97 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0},
98 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0},
99 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0},
100 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0},
101 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0},
102 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0},
103 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0},
104 /* required last entry */
105 {0, 0, 0, 0, 0}
106};
107
108/*********************************************************************
109 * Table of branding strings for all supported NICs.
110 *********************************************************************/
111
112static char *igb_strings[] = {
113 "Intel(R) PRO/1000 Network Connection"
114};
115
116/*********************************************************************
117 * Function prototypes
118 *********************************************************************/
119static int igb_probe(device_t);
120static int igb_attach(device_t);
121static int igb_detach(device_t);
122static int igb_shutdown(device_t);
123static int igb_suspend(device_t);
124static int igb_resume(device_t);
125#ifndef IGB_LEGACY_TX
126static int igb_mq_start(struct ifnet *, struct mbuf *);
127static int igb_mq_start_locked(struct ifnet *, struct tx_ring *);
128static void igb_qflush(struct ifnet *);
129static void igb_deferred_mq_start(void *, int);
130#else
131static void igb_start(struct ifnet *);
132static void igb_start_locked(struct tx_ring *, struct ifnet *ifp);
133#endif
134static int igb_ioctl(struct ifnet *, u_long, caddr_t);
135static uint64_t igb_get_counter(if_t, ift_counter);
136static void igb_init(void *);
137static void igb_init_locked(struct adapter *);
138static void igb_stop(void *);
139static void igb_media_status(struct ifnet *, struct ifmediareq *);
140static int igb_media_change(struct ifnet *);
141static void igb_identify_hardware(struct adapter *);
142static int igb_allocate_pci_resources(struct adapter *);
143static int igb_allocate_msix(struct adapter *);
144static int igb_allocate_legacy(struct adapter *);
145static int igb_setup_msix(struct adapter *);
146static void igb_free_pci_resources(struct adapter *);
147static void igb_local_timer(void *);
148static void igb_reset(struct adapter *);
149static int igb_setup_interface(device_t, struct adapter *);
150static int igb_allocate_queues(struct adapter *);
151static void igb_configure_queues(struct adapter *);
152
153static int igb_allocate_transmit_buffers(struct tx_ring *);
154static void igb_setup_transmit_structures(struct adapter *);
155static void igb_setup_transmit_ring(struct tx_ring *);
156static void igb_initialize_transmit_units(struct adapter *);
157static void igb_free_transmit_structures(struct adapter *);
158static void igb_free_transmit_buffers(struct tx_ring *);
159
160static int igb_allocate_receive_buffers(struct rx_ring *);
161static int igb_setup_receive_structures(struct adapter *);
162static int igb_setup_receive_ring(struct rx_ring *);
163static void igb_initialize_receive_units(struct adapter *);
164static void igb_free_receive_structures(struct adapter *);
165static void igb_free_receive_buffers(struct rx_ring *);
166static void igb_free_receive_ring(struct rx_ring *);
167
168static void igb_enable_intr(struct adapter *);
169static void igb_disable_intr(struct adapter *);
170static void igb_update_stats_counters(struct adapter *);
171static bool igb_txeof(struct tx_ring *);
172
173static __inline void igb_rx_discard(struct rx_ring *, int);
174static __inline void igb_rx_input(struct rx_ring *,
175 struct ifnet *, struct mbuf *, u32);
176
177static bool igb_rxeof(struct igb_queue *, int, int *);
178static void igb_rx_checksum(u32, struct mbuf *, u32);
179static int igb_tx_ctx_setup(struct tx_ring *,
180 struct mbuf *, u32 *, u32 *);
181static int igb_tso_setup(struct tx_ring *,
182 struct mbuf *, u32 *, u32 *);
183static void igb_set_promisc(struct adapter *);
184static void igb_disable_promisc(struct adapter *);
185static void igb_set_multi(struct adapter *);
186static void igb_update_link_status(struct adapter *);
187static void igb_refresh_mbufs(struct rx_ring *, int);
188
189static void igb_register_vlan(void *, struct ifnet *, u16);
190static void igb_unregister_vlan(void *, struct ifnet *, u16);
191static void igb_setup_vlan_hw_support(struct adapter *);
192
193static int igb_xmit(struct tx_ring *, struct mbuf **);
194static int igb_dma_malloc(struct adapter *, bus_size_t,
195 struct igb_dma_alloc *, int);
196static void igb_dma_free(struct adapter *, struct igb_dma_alloc *);
197static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
198static void igb_print_nvm_info(struct adapter *);
199static int igb_is_valid_ether_addr(u8 *);
200static void igb_add_hw_stats(struct adapter *);
201
202static void igb_vf_init_stats(struct adapter *);
203static void igb_update_vf_stats_counters(struct adapter *);
204
205/* Management and WOL Support */
206static void igb_init_manageability(struct adapter *);
207static void igb_release_manageability(struct adapter *);
208static void igb_get_hw_control(struct adapter *);
209static void igb_release_hw_control(struct adapter *);
210static void igb_enable_wakeup(device_t);
211static void igb_led_func(void *, int);
212
213static int igb_irq_fast(void *);
214static void igb_msix_que(void *);
215static void igb_msix_link(void *);
216static void igb_handle_que(void *context, int pending);
217static void igb_handle_link(void *context, int pending);
218static void igb_handle_link_locked(struct adapter *);
219
220static void igb_set_sysctl_value(struct adapter *, const char *,
221 const char *, int *, int);
222static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS);
223static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS);
224static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS);
225
226#ifdef DEVICE_POLLING
227static poll_handler_t igb_poll;
228#endif /* POLLING */
229
230/*********************************************************************
231 * FreeBSD Device Interface Entry Points
232 *********************************************************************/
233
234static device_method_t igb_methods[] = {
235 /* Device interface */
236 DEVMETHOD(device_probe, igb_probe),
237 DEVMETHOD(device_attach, igb_attach),
238 DEVMETHOD(device_detach, igb_detach),
239 DEVMETHOD(device_shutdown, igb_shutdown),
240 DEVMETHOD(device_suspend, igb_suspend),
241 DEVMETHOD(device_resume, igb_resume),
242 DEVMETHOD_END
243};
244
245static driver_t igb_driver = {
246 "igb", igb_methods, sizeof(struct adapter),
247};
248
249static devclass_t igb_devclass;
250DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0);
251MODULE_DEPEND(igb, pci, 1, 1, 1);
252MODULE_DEPEND(igb, ether, 1, 1, 1);
253#ifdef DEV_NETMAP
254MODULE_DEPEND(igb, netmap, 1, 1, 1);
255#endif /* DEV_NETMAP */
256
257/*********************************************************************
258 * Tunable default values.
259 *********************************************************************/
260
261static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters");
262
263/* Descriptor defaults */
264static int igb_rxd = IGB_DEFAULT_RXD;
265static int igb_txd = IGB_DEFAULT_TXD;
266SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0,
267 "Number of receive descriptors per queue");
268SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0,
269 "Number of transmit descriptors per queue");
270
271/*
272** AIM: Adaptive Interrupt Moderation
273** which means that the interrupt rate
274** is varied over time based on the
275** traffic for that interrupt vector
276*/
277static int igb_enable_aim = TRUE;
278SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0,
279 "Enable adaptive interrupt moderation");
280
281/*
282 * MSIX should be the default for best performance,
283 * but this allows it to be forced off for testing.
284 */
285static int igb_enable_msix = 1;
286SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0,
287 "Enable MSI-X interrupts");
288
289/*
290** Tuneable Interrupt rate
291*/
292static int igb_max_interrupt_rate = 8000;
293SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN,
294 &igb_max_interrupt_rate, 0, "Maximum interrupts per second");
295
296#ifndef IGB_LEGACY_TX
297/*
298** Tuneable number of buffers in the buf-ring (drbr_xxx)
299*/
300static int igb_buf_ring_size = IGB_BR_SIZE;
301SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN,
302 &igb_buf_ring_size, 0, "Size of the bufring");
303#endif
304
305/*
306** Header split causes the packet header to
307** be dma'd to a seperate mbuf from the payload.
308** this can have memory alignment benefits. But
309** another plus is that small packets often fit
310** into the header and thus use no cluster. Its
311** a very workload dependent type feature.
312*/
313static int igb_header_split = FALSE;
314SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0,
315 "Enable receive mbuf header split");
316
317/*
318** This will autoconfigure based on the
319** number of CPUs and max supported
320** MSIX messages if left at 0.
321*/
322static int igb_num_queues = 0;
323SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0,
324 "Number of queues to configure, 0 indicates autoconfigure");
325
326/*
327** Global variable to store last used CPU when binding queues
328** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a
329** queue is bound to a cpu.
330*/
331static int igb_last_bind_cpu = -1;
332
333/* How many packets rxeof tries to clean at a time */
334static int igb_rx_process_limit = 100;
335SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
336 &igb_rx_process_limit, 0,
337 "Maximum number of received packets to process at a time, -1 means unlimited");
338
339/* How many packets txeof tries to clean at a time */
340static int igb_tx_process_limit = -1;
341SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN,
342 &igb_tx_process_limit, 0,
343 "Maximum number of sent packets to process at a time, -1 means unlimited");
344
345#ifdef DEV_NETMAP /* see ixgbe.c for details */
346#include <dev/netmap/if_igb_netmap.h>
347#endif /* DEV_NETMAP */
348/*********************************************************************
349 * Device identification routine
350 *
351 * igb_probe determines if the driver should be loaded on
352 * adapter based on PCI vendor/device id of the adapter.
353 *
354 * return BUS_PROBE_DEFAULT on success, positive on failure
355 *********************************************************************/
356
357static int
358igb_probe(device_t dev)
359{
360 char adapter_name[256];
361 uint16_t pci_vendor_id = 0;
362 uint16_t pci_device_id = 0;
363 uint16_t pci_subvendor_id = 0;
364 uint16_t pci_subdevice_id = 0;
365 igb_vendor_info_t *ent;
366
367 INIT_DEBUGOUT("igb_probe: begin");
368
369 pci_vendor_id = pci_get_vendor(dev);
370 if (pci_vendor_id != IGB_INTEL_VENDOR_ID)
371 return (ENXIO);
372
373 pci_device_id = pci_get_device(dev);
374 pci_subvendor_id = pci_get_subvendor(dev);
375 pci_subdevice_id = pci_get_subdevice(dev);
376
377 ent = igb_vendor_info_array;
378 while (ent->vendor_id != 0) {
379 if ((pci_vendor_id == ent->vendor_id) &&
380 (pci_device_id == ent->device_id) &&
381
382 ((pci_subvendor_id == ent->subvendor_id) ||
383 (ent->subvendor_id == 0)) &&
384
385 ((pci_subdevice_id == ent->subdevice_id) ||
386 (ent->subdevice_id == 0))) {
387 sprintf(adapter_name, "%s, Version - %s",
388 igb_strings[ent->index],
389 igb_driver_version);
390 device_set_desc_copy(dev, adapter_name);
391 return (BUS_PROBE_DEFAULT);
392 }
393 ent++;
394 }
395 return (ENXIO);
396}
397
398/*********************************************************************
399 * Device initialization routine
400 *
401 * The attach entry point is called when the driver is being loaded.
402 * This routine identifies the type of hardware, allocates all resources
403 * and initializes the hardware.
404 *
405 * return 0 on success, positive on failure
406 *********************************************************************/
407
408static int
409igb_attach(device_t dev)
410{
411 struct adapter *adapter;
412 int error = 0;
413 u16 eeprom_data;
414
415 INIT_DEBUGOUT("igb_attach: begin");
416
417 if (resource_disabled("igb", device_get_unit(dev))) {
418 device_printf(dev, "Disabled by device hint\n");
419 return (ENXIO);
420 }
421
422 adapter = device_get_softc(dev);
423 adapter->dev = adapter->osdep.dev = dev;
424 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
425
426 /* SYSCTLs */
427 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
428 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
429 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
430 igb_sysctl_nvm_info, "I", "NVM Information");
431
432 igb_set_sysctl_value(adapter, "enable_aim",
433 "Interrupt Moderation", &adapter->enable_aim,
434 igb_enable_aim);
435
436 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
437 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
438 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW,
439 adapter, 0, igb_set_flowcntl, "I", "Flow Control");
440
441 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
442
443 /* Determine hardware and mac info */
444 igb_identify_hardware(adapter);
445
446 /* Setup PCI resources */
447 if (igb_allocate_pci_resources(adapter)) {
448 device_printf(dev, "Allocation of PCI resources failed\n");
449 error = ENXIO;
450 goto err_pci;
451 }
452
453 /* Do Shared Code initialization */
454 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
455 device_printf(dev, "Setup of Shared code failed\n");
456 error = ENXIO;
457 goto err_pci;
458 }
459
460 e1000_get_bus_info(&adapter->hw);
461
462 /* Sysctls for limiting the amount of work done in the taskqueues */
463 igb_set_sysctl_value(adapter, "rx_processing_limit",
464 "max number of rx packets to process",
465 &adapter->rx_process_limit, igb_rx_process_limit);
466
467 igb_set_sysctl_value(adapter, "tx_processing_limit",
468 "max number of tx packets to process",
469 &adapter->tx_process_limit, igb_tx_process_limit);
470
471 /*
472 * Validate number of transmit and receive descriptors. It
473 * must not exceed hardware maximum, and must be multiple
474 * of E1000_DBA_ALIGN.
475 */
476 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 ||
477 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) {
478 device_printf(dev, "Using %d TX descriptors instead of %d!\n",
479 IGB_DEFAULT_TXD, igb_txd);
480 adapter->num_tx_desc = IGB_DEFAULT_TXD;
481 } else
482 adapter->num_tx_desc = igb_txd;
483 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 ||
484 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) {
485 device_printf(dev, "Using %d RX descriptors instead of %d!\n",
486 IGB_DEFAULT_RXD, igb_rxd);
487 adapter->num_rx_desc = IGB_DEFAULT_RXD;
488 } else
489 adapter->num_rx_desc = igb_rxd;
490
491 adapter->hw.mac.autoneg = DO_AUTO_NEG;
492 adapter->hw.phy.autoneg_wait_to_complete = FALSE;
493 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
494
495 /* Copper options */
496 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
497 adapter->hw.phy.mdix = AUTO_ALL_MODES;
498 adapter->hw.phy.disable_polarity_correction = FALSE;
499 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE;
500 }
501
502 /*
503 * Set the frame limits assuming
504 * standard ethernet sized frames.
505 */
506 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
507
508 /*
509 ** Allocate and Setup Queues
510 */
511 if (igb_allocate_queues(adapter)) {
512 error = ENOMEM;
513 goto err_pci;
514 }
515
516 /* Allocate the appropriate stats memory */
517 if (adapter->vf_ifp) {
518 adapter->stats =
519 (struct e1000_vf_stats *)malloc(sizeof \
520 (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
521 igb_vf_init_stats(adapter);
522 } else
523 adapter->stats =
524 (struct e1000_hw_stats *)malloc(sizeof \
525 (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO);
526 if (adapter->stats == NULL) {
527 device_printf(dev, "Can not allocate stats memory\n");
528 error = ENOMEM;
529 goto err_late;
530 }
531
532 /* Allocate multicast array memory. */
533 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
534 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
535 if (adapter->mta == NULL) {
536 device_printf(dev, "Can not allocate multicast setup array\n");
537 error = ENOMEM;
538 goto err_late;
539 }
540
541 /* Some adapter-specific advanced features */
542 if (adapter->hw.mac.type >= e1000_i350) {
543 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
544 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
545 OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW,
546 adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce");
547 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
548 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
549 OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW,
550 adapter, 0, igb_sysctl_eee, "I",
551 "Disable Energy Efficient Ethernet");
552 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
553 if (adapter->hw.mac.type == e1000_i354)
554 e1000_set_eee_i354(&adapter->hw);
555 else
556 e1000_set_eee_i350(&adapter->hw);
557 }
558 }
559
560 /*
561 ** Start from a known state, this is
562 ** important in reading the nvm and
563 ** mac from that.
564 */
565 e1000_reset_hw(&adapter->hw);
566
567 /* Make sure we have a good EEPROM before we read from it */
568 if (((adapter->hw.mac.type != e1000_i210) &&
569 (adapter->hw.mac.type != e1000_i211)) &&
570 (e1000_validate_nvm_checksum(&adapter->hw) < 0)) {
571 /*
572 ** Some PCI-E parts fail the first check due to
573 ** the link being in sleep state, call it again,
574 ** if it fails a second time its a real issue.
575 */
576 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
577 device_printf(dev,
578 "The EEPROM Checksum Is Not Valid\n");
579 error = EIO;
580 goto err_late;
581 }
582 }
583
584 /*
585 ** Copy the permanent MAC address out of the EEPROM
586 */
587 if (e1000_read_mac_addr(&adapter->hw) < 0) {
588 device_printf(dev, "EEPROM read error while reading MAC"
589 " address\n");
590 error = EIO;
591 goto err_late;
592 }
593 /* Check its sanity */
594 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) {
595 device_printf(dev, "Invalid MAC address\n");
596 error = EIO;
597 goto err_late;
598 }
599
600 /* Setup OS specific network interface */
601 if (igb_setup_interface(dev, adapter) != 0)
602 goto err_late;
603
604 /* Now get a good starting state */
605 igb_reset(adapter);
606
607 /* Initialize statistics */
608 igb_update_stats_counters(adapter);
609
610 adapter->hw.mac.get_link_status = 1;
611 igb_update_link_status(adapter);
612
613 /* Indicate SOL/IDER usage */
614 if (e1000_check_reset_block(&adapter->hw))
615 device_printf(dev,
616 "PHY reset is blocked due to SOL/IDER session.\n");
617
618 /* Determine if we have to control management hardware */
619 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
620
621 /*
622 * Setup Wake-on-Lan
623 */
624 /* APME bit in EEPROM is mapped to WUC.APME */
625 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME;
626 if (eeprom_data)
627 adapter->wol = E1000_WUFC_MAG;
628
629 /* Register for VLAN events */
630 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
631 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
632 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
633 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
634
635 igb_add_hw_stats(adapter);
636
637 /* Tell the stack that the interface is not active */
638 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
639 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE;
640
641 adapter->led_dev = led_create(igb_led_func, adapter,
642 device_get_nameunit(dev));
643
644 /*
645 ** Configure Interrupts
646 */
647 if ((adapter->msix > 1) && (igb_enable_msix))
648 error = igb_allocate_msix(adapter);
649 else /* MSI or Legacy */
650 error = igb_allocate_legacy(adapter);
651 if (error)
652 goto err_late;
653
654#ifdef DEV_NETMAP
655 igb_netmap_attach(adapter);
656#endif /* DEV_NETMAP */
657 INIT_DEBUGOUT("igb_attach: end");
658
659 return (0);
660
661err_late:
662 igb_detach(dev);
663 igb_free_transmit_structures(adapter);
664 igb_free_receive_structures(adapter);
665 igb_release_hw_control(adapter);
666err_pci:
667 igb_free_pci_resources(adapter);
668 if (adapter->ifp != NULL)
669 if_free(adapter->ifp);
670 free(adapter->mta, M_DEVBUF);
671 IGB_CORE_LOCK_DESTROY(adapter);
672
673 return (error);
674}
675
676/*********************************************************************
677 * Device removal routine
678 *
679 * The detach entry point is called when the driver is being removed.
680 * This routine stops the adapter and deallocates all the resources
681 * that were allocated for driver operation.
682 *
683 * return 0 on success, positive on failure
684 *********************************************************************/
685
686static int
687igb_detach(device_t dev)
688{
689 struct adapter *adapter = device_get_softc(dev);
690 struct ifnet *ifp = adapter->ifp;
691
692 INIT_DEBUGOUT("igb_detach: begin");
693
694 /* Make sure VLANS are not using driver */
695 if (adapter->ifp->if_vlantrunk != NULL) {
696 device_printf(dev,"Vlan in use, detach first\n");
697 return (EBUSY);
698 }
699
700 ether_ifdetach(adapter->ifp);
701
702 if (adapter->led_dev != NULL)
703 led_destroy(adapter->led_dev);
704
705#ifdef DEVICE_POLLING
706 if (ifp->if_capenable & IFCAP_POLLING)
707 ether_poll_deregister(ifp);
708#endif
709
710 IGB_CORE_LOCK(adapter);
711 adapter->in_detach = 1;
712 igb_stop(adapter);
713 IGB_CORE_UNLOCK(adapter);
714
715 e1000_phy_hw_reset(&adapter->hw);
716
717 /* Give control back to firmware */
718 igb_release_manageability(adapter);
719 igb_release_hw_control(adapter);
720
721 if (adapter->wol) {
722 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
723 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
724 igb_enable_wakeup(dev);
725 }
726
727 /* Unregister VLAN events */
728 if (adapter->vlan_attach != NULL)
729 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
730 if (adapter->vlan_detach != NULL)
731 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
732
733 callout_drain(&adapter->timer);
734
735#ifdef DEV_NETMAP
736 netmap_detach(adapter->ifp);
737#endif /* DEV_NETMAP */
738 igb_free_pci_resources(adapter);
739 bus_generic_detach(dev);
740 if_free(ifp);
741
742 igb_free_transmit_structures(adapter);
743 igb_free_receive_structures(adapter);
744 if (adapter->mta != NULL)
745 free(adapter->mta, M_DEVBUF);
746
747 IGB_CORE_LOCK_DESTROY(adapter);
748
749 return (0);
750}
751
752/*********************************************************************
753 *
754 * Shutdown entry point
755 *
756 **********************************************************************/
757
758static int
759igb_shutdown(device_t dev)
760{
761 return igb_suspend(dev);
762}
763
764/*
765 * Suspend/resume device methods.
766 */
767static int
768igb_suspend(device_t dev)
769{
770 struct adapter *adapter = device_get_softc(dev);
771
772 IGB_CORE_LOCK(adapter);
773
774 igb_stop(adapter);
775
776 igb_release_manageability(adapter);
777 igb_release_hw_control(adapter);
778
779 if (adapter->wol) {
780 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
781 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
782 igb_enable_wakeup(dev);
783 }
784
785 IGB_CORE_UNLOCK(adapter);
786
787 return bus_generic_suspend(dev);
788}
789
790static int
791igb_resume(device_t dev)
792{
793 struct adapter *adapter = device_get_softc(dev);
794 struct tx_ring *txr = adapter->tx_rings;
795 struct ifnet *ifp = adapter->ifp;
796
797 IGB_CORE_LOCK(adapter);
798 igb_init_locked(adapter);
799 igb_init_manageability(adapter);
800
801 if ((ifp->if_flags & IFF_UP) &&
802 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
803 for (int i = 0; i < adapter->num_queues; i++, txr++) {
804 IGB_TX_LOCK(txr);
805#ifndef IGB_LEGACY_TX
806 /* Process the stack queue only if not depleted */
807 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
808 !drbr_empty(ifp, txr->br))
809 igb_mq_start_locked(ifp, txr);
810#else
811 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
812 igb_start_locked(txr, ifp);
813#endif
814 IGB_TX_UNLOCK(txr);
815 }
816 }
817 IGB_CORE_UNLOCK(adapter);
818
819 return bus_generic_resume(dev);
820}
821
822
823#ifdef IGB_LEGACY_TX
824
825/*********************************************************************
826 * Transmit entry point
827 *
828 * igb_start is called by the stack to initiate a transmit.
829 * The driver will remain in this routine as long as there are
830 * packets to transmit and transmit resources are available.
831 * In case resources are not available stack is notified and
832 * the packet is requeued.
833 **********************************************************************/
834
835static void
836igb_start_locked(struct tx_ring *txr, struct ifnet *ifp)
837{
838 struct adapter *adapter = ifp->if_softc;
839 struct mbuf *m_head;
840
841 IGB_TX_LOCK_ASSERT(txr);
842
843 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
844 IFF_DRV_RUNNING)
845 return;
846 if (!adapter->link_active)
847 return;
848
849 /* Call cleanup if number of TX descriptors low */
850 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
851 igb_txeof(txr);
852
853 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
854 if (txr->tx_avail <= IGB_MAX_SCATTER) {
855 txr->queue_status |= IGB_QUEUE_DEPLETED;
856 break;
857 }
858 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
859 if (m_head == NULL)
860 break;
861 /*
862 * Encapsulation can modify our pointer, and or make it
863 * NULL on failure. In that event, we can't requeue.
864 */
865 if (igb_xmit(txr, &m_head)) {
866 if (m_head != NULL)
867 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
868 if (txr->tx_avail <= IGB_MAX_SCATTER)
869 txr->queue_status |= IGB_QUEUE_DEPLETED;
870 break;
871 }
872
873 /* Send a copy of the frame to the BPF listener */
874 ETHER_BPF_MTAP(ifp, m_head);
875
876 /* Set watchdog on */
877 txr->watchdog_time = ticks;
878 txr->queue_status |= IGB_QUEUE_WORKING;
879 }
880}
881
882/*
883 * Legacy TX driver routine, called from the
884 * stack, always uses tx[0], and spins for it.
885 * Should not be used with multiqueue tx
886 */
887static void
888igb_start(struct ifnet *ifp)
889{
890 struct adapter *adapter = ifp->if_softc;
891 struct tx_ring *txr = adapter->tx_rings;
892
893 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
894 IGB_TX_LOCK(txr);
895 igb_start_locked(txr, ifp);
896 IGB_TX_UNLOCK(txr);
897 }
898 return;
899}
900
901#else /* ~IGB_LEGACY_TX */
902
903/*
904** Multiqueue Transmit Entry:
905** quick turnaround to the stack
906**
907*/
908static int
909igb_mq_start(struct ifnet *ifp, struct mbuf *m)
910{
911 struct adapter *adapter = ifp->if_softc;
912 struct igb_queue *que;
913 struct tx_ring *txr;
914 int i, err = 0;
915#ifdef RSS
916 uint32_t bucket_id;
917#endif
918
919 /* Which queue to use */
920 /*
921 * When doing RSS, map it to the same outbound queue
922 * as the incoming flow would be mapped to.
923 *
924 * If everything is setup correctly, it should be the
925 * same bucket that the current CPU we're on is.
926 */
927 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
928#ifdef RSS
929 if (rss_hash2bucket(m->m_pkthdr.flowid,
930 M_HASHTYPE_GET(m), &bucket_id) == 0) {
931 /* XXX TODO: spit out something if bucket_id > num_queues? */
932 i = bucket_id % adapter->num_queues;
933 } else {
934#endif
935 i = m->m_pkthdr.flowid % adapter->num_queues;
936#ifdef RSS
937 }
938#endif
939 } else {
940 i = curcpu % adapter->num_queues;
941 }
942 txr = &adapter->tx_rings[i];
943 que = &adapter->queues[i];
944
945 err = drbr_enqueue(ifp, txr->br, m);
946 if (err)
947 return (err);
948 if (IGB_TX_TRYLOCK(txr)) {
949 igb_mq_start_locked(ifp, txr);
950 IGB_TX_UNLOCK(txr);
951 } else
952 taskqueue_enqueue(que->tq, &txr->txq_task);
953
954 return (0);
955}
956
957static int
958igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
959{
960 struct adapter *adapter = txr->adapter;
961 struct mbuf *next;
962 int err = 0, enq = 0;
963
964 IGB_TX_LOCK_ASSERT(txr);
965
966 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
967 adapter->link_active == 0)
968 return (ENETDOWN);
969
970 /* Process the queue */
971 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
972 if ((err = igb_xmit(txr, &next)) != 0) {
973 if (next == NULL) {
974 /* It was freed, move forward */
975 drbr_advance(ifp, txr->br);
976 } else {
977 /*
978 * Still have one left, it may not be
979 * the same since the transmit function
980 * may have changed it.
981 */
982 drbr_putback(ifp, txr->br, next);
983 }
984 break;
985 }
986 drbr_advance(ifp, txr->br);
987 enq++;
988 if (next->m_flags & M_MCAST && adapter->vf_ifp)
989 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
990 ETHER_BPF_MTAP(ifp, next);
991 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
992 break;
993 }
994 if (enq > 0) {
995 /* Set the watchdog */
996 txr->queue_status |= IGB_QUEUE_WORKING;
997 txr->watchdog_time = ticks;
998 }
999 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD)
1000 igb_txeof(txr);
1001 if (txr->tx_avail <= IGB_MAX_SCATTER)
1002 txr->queue_status |= IGB_QUEUE_DEPLETED;
1003 return (err);
1004}
1005
1006/*
1007 * Called from a taskqueue to drain queued transmit packets.
1008 */
1009static void
1010igb_deferred_mq_start(void *arg, int pending)
1011{
1012 struct tx_ring *txr = arg;
1013 struct adapter *adapter = txr->adapter;
1014 struct ifnet *ifp = adapter->ifp;
1015
1016 IGB_TX_LOCK(txr);
1017 if (!drbr_empty(ifp, txr->br))
1018 igb_mq_start_locked(ifp, txr);
1019 IGB_TX_UNLOCK(txr);
1020}
1021
1022/*
1023** Flush all ring buffers
1024*/
1025static void
1026igb_qflush(struct ifnet *ifp)
1027{
1028 struct adapter *adapter = ifp->if_softc;
1029 struct tx_ring *txr = adapter->tx_rings;
1030 struct mbuf *m;
1031
1032 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1033 IGB_TX_LOCK(txr);
1034 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
1035 m_freem(m);
1036 IGB_TX_UNLOCK(txr);
1037 }
1038 if_qflush(ifp);
1039}
1040#endif /* ~IGB_LEGACY_TX */
1041
1042/*********************************************************************
1043 * Ioctl entry point
1044 *
1045 * igb_ioctl is called when the user wants to configure the
1046 * interface.
1047 *
1048 * return 0 on success, positive on failure
1049 **********************************************************************/
1050
1051static int
1052igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
1053{
1054 struct adapter *adapter = ifp->if_softc;
1055 struct ifreq *ifr = (struct ifreq *)data;
1056#if defined(INET) || defined(INET6)
1057 struct ifaddr *ifa = (struct ifaddr *)data;
1058#endif
1059 bool avoid_reset = FALSE;
1060 int error = 0;
1061
1062 if (adapter->in_detach)
1063 return (error);
1064
1065 switch (command) {
1066 case SIOCSIFADDR:
1067#ifdef INET
1068 if (ifa->ifa_addr->sa_family == AF_INET)
1069 avoid_reset = TRUE;
1070#endif
1071#ifdef INET6
1072 if (ifa->ifa_addr->sa_family == AF_INET6)
1073 avoid_reset = TRUE;
1074#endif
1075 /*
1076 ** Calling init results in link renegotiation,
1077 ** so we avoid doing it when possible.
1078 */
1079 if (avoid_reset) {
1080 ifp->if_flags |= IFF_UP;
1081 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1082 igb_init(adapter);
1083#ifdef INET
1084 if (!(ifp->if_flags & IFF_NOARP))
1085 arp_ifinit(ifp, ifa);
1086#endif
1087 } else
1088 error = ether_ioctl(ifp, command, data);
1089 break;
1090 case SIOCSIFMTU:
1091 {
1092 int max_frame_size;
1093
1094 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1095
1096 IGB_CORE_LOCK(adapter);
1097 max_frame_size = 9234;
1098 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1099 ETHER_CRC_LEN) {
1100 IGB_CORE_UNLOCK(adapter);
1101 error = EINVAL;
1102 break;
1103 }
1104
1105 ifp->if_mtu = ifr->ifr_mtu;
1106 adapter->max_frame_size =
1107 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
1108 igb_init_locked(adapter);
1109 IGB_CORE_UNLOCK(adapter);
1110 break;
1111 }
1112 case SIOCSIFFLAGS:
1113 IOCTL_DEBUGOUT("ioctl rcv'd:\
1114 SIOCSIFFLAGS (Set Interface Flags)");
1115 IGB_CORE_LOCK(adapter);
1116 if (ifp->if_flags & IFF_UP) {
1117 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1118 if ((ifp->if_flags ^ adapter->if_flags) &
1119 (IFF_PROMISC | IFF_ALLMULTI)) {
1120 igb_disable_promisc(adapter);
1121 igb_set_promisc(adapter);
1122 }
1123 } else
1124 igb_init_locked(adapter);
1125 } else
1126 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1127 igb_stop(adapter);
1128 adapter->if_flags = ifp->if_flags;
1129 IGB_CORE_UNLOCK(adapter);
1130 break;
1131 case SIOCADDMULTI:
1132 case SIOCDELMULTI:
1133 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1134 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1135 IGB_CORE_LOCK(adapter);
1136 igb_disable_intr(adapter);
1137 igb_set_multi(adapter);
1138#ifdef DEVICE_POLLING
1139 if (!(ifp->if_capenable & IFCAP_POLLING))
1140#endif
1141 igb_enable_intr(adapter);
1142 IGB_CORE_UNLOCK(adapter);
1143 }
1144 break;
1145 case SIOCSIFMEDIA:
1146 /* Check SOL/IDER usage */
1147 IGB_CORE_LOCK(adapter);
1148 if (e1000_check_reset_block(&adapter->hw)) {
1149 IGB_CORE_UNLOCK(adapter);
1150 device_printf(adapter->dev, "Media change is"
1151 " blocked due to SOL/IDER session.\n");
1152 break;
1153 }
1154 IGB_CORE_UNLOCK(adapter);
1155 case SIOCGIFMEDIA:
1156 IOCTL_DEBUGOUT("ioctl rcv'd: \
1157 SIOCxIFMEDIA (Get/Set Interface Media)");
1158 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1159 break;
1160 case SIOCSIFCAP:
1161 {
1162 int mask, reinit;
1163
1164 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1165 reinit = 0;
1166 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1167#ifdef DEVICE_POLLING
1168 if (mask & IFCAP_POLLING) {
1169 if (ifr->ifr_reqcap & IFCAP_POLLING) {
1170 error = ether_poll_register(igb_poll, ifp);
1171 if (error)
1172 return (error);
1173 IGB_CORE_LOCK(adapter);
1174 igb_disable_intr(adapter);
1175 ifp->if_capenable |= IFCAP_POLLING;
1176 IGB_CORE_UNLOCK(adapter);
1177 } else {
1178 error = ether_poll_deregister(ifp);
1179 /* Enable interrupt even in error case */
1180 IGB_CORE_LOCK(adapter);
1181 igb_enable_intr(adapter);
1182 ifp->if_capenable &= ~IFCAP_POLLING;
1183 IGB_CORE_UNLOCK(adapter);
1184 }
1185 }
1186#endif
1187 if (mask & IFCAP_HWCSUM) {
1188 ifp->if_capenable ^= IFCAP_HWCSUM;
1189 reinit = 1;
1190 }
1191 if (mask & IFCAP_TSO4) {
1192 ifp->if_capenable ^= IFCAP_TSO4;
1193 reinit = 1;
1194 }
1195 if (mask & IFCAP_TSO6) {
1196 ifp->if_capenable ^= IFCAP_TSO6;
1197 reinit = 1;
1198 }
1199 if (mask & IFCAP_VLAN_HWTAGGING) {
1200 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1201 reinit = 1;
1202 }
1203 if (mask & IFCAP_VLAN_HWFILTER) {
1204 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1205 reinit = 1;
1206 }
1207 if (mask & IFCAP_VLAN_HWTSO) {
1208 ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1209 reinit = 1;
1210 }
1211 if (mask & IFCAP_LRO) {
1212 ifp->if_capenable ^= IFCAP_LRO;
1213 reinit = 1;
1214 }
1215 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING))
1216 igb_init(adapter);
1217 VLAN_CAPABILITIES(ifp);
1218 break;
1219 }
1220
1221 default:
1222 error = ether_ioctl(ifp, command, data);
1223 break;
1224 }
1225
1226 return (error);
1227}
1228
1229
1230/*********************************************************************
1231 * Init entry point
1232 *
1233 * This routine is used in two ways. It is used by the stack as
1234 * init entry point in network interface structure. It is also used
1235 * by the driver as a hw/sw initialization routine to get to a
1236 * consistent state.
1237 *
1238 * return 0 on success, positive on failure
1239 **********************************************************************/
1240
1241static void
1242igb_init_locked(struct adapter *adapter)
1243{
1244 struct ifnet *ifp = adapter->ifp;
1245 device_t dev = adapter->dev;
1246
1247 INIT_DEBUGOUT("igb_init: begin");
1248
1249 IGB_CORE_LOCK_ASSERT(adapter);
1250
1251 igb_disable_intr(adapter);
1252 callout_stop(&adapter->timer);
1253
1254 /* Get the latest mac address, User can use a LAA */
1255 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr,
1256 ETHER_ADDR_LEN);
1257
1258 /* Put the address into the Receive Address Array */
1259 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1260
1261 igb_reset(adapter);
1262 igb_update_link_status(adapter);
1263
1264 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1265
1266 /* Set hardware offload abilities */
1267 ifp->if_hwassist = 0;
1268 if (ifp->if_capenable & IFCAP_TXCSUM) {
1269 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP);
1270#if __FreeBSD_version >= 800000
1271 if ((adapter->hw.mac.type == e1000_82576) ||
1272 (adapter->hw.mac.type == e1000_82580))
1273 ifp->if_hwassist |= CSUM_SCTP;
1274#endif
1275 }
1276
1277 if (ifp->if_capenable & IFCAP_TSO)
1278 ifp->if_hwassist |= CSUM_TSO;
1279
1280 /* Configure for OS presence */
1281 igb_init_manageability(adapter);
1282
1283 /* Prepare transmit descriptors and buffers */
1284 igb_setup_transmit_structures(adapter);
1285 igb_initialize_transmit_units(adapter);
1286
1287 /* Setup Multicast table */
1288 igb_set_multi(adapter);
1289
1290 /*
1291 ** Figure out the desired mbuf pool
1292 ** for doing jumbo/packetsplit
1293 */
1294 if (adapter->max_frame_size <= 2048)
1295 adapter->rx_mbuf_sz = MCLBYTES;
1296 else if (adapter->max_frame_size <= 4096)
1297 adapter->rx_mbuf_sz = MJUMPAGESIZE;
1298 else
1299 adapter->rx_mbuf_sz = MJUM9BYTES;
1300
1301 /* Prepare receive descriptors and buffers */
1302 if (igb_setup_receive_structures(adapter)) {
1303 device_printf(dev, "Could not setup receive structures\n");
1304 return;
1305 }
1306 igb_initialize_receive_units(adapter);
1307 e1000_rx_fifo_flush_82575(&adapter->hw);
1308
1309 /* Enable VLAN support */
1310 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1311 igb_setup_vlan_hw_support(adapter);
1312
1313 /* Don't lose promiscuous settings */
1314 igb_set_promisc(adapter);
1315
1316 ifp->if_drv_flags |= IFF_DRV_RUNNING;
1317 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1318
1319 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
1320 e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1321
1322 if (adapter->msix > 1) /* Set up queue routing */
1323 igb_configure_queues(adapter);
1324
1325 /* this clears any pending interrupts */
1326 E1000_READ_REG(&adapter->hw, E1000_ICR);
1327#ifdef DEVICE_POLLING
1328 /*
1329 * Only enable interrupts if we are not polling, make sure
1330 * they are off otherwise.
1331 */
1332 if (ifp->if_capenable & IFCAP_POLLING)
1333 igb_disable_intr(adapter);
1334 else
1335#endif /* DEVICE_POLLING */
1336 {
1337 igb_enable_intr(adapter);
1338 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC);
1339 }
1340
1341 /* Set Energy Efficient Ethernet */
1342 if (adapter->hw.phy.media_type == e1000_media_type_copper) {
1343 if (adapter->hw.mac.type == e1000_i354)
1344 e1000_set_eee_i354(&adapter->hw);
1345 else
1346 e1000_set_eee_i350(&adapter->hw);
1347 }
1348}
1349
1350static void
1351igb_init(void *arg)
1352{
1353 struct adapter *adapter = arg;
1354
1355 IGB_CORE_LOCK(adapter);
1356 igb_init_locked(adapter);
1357 IGB_CORE_UNLOCK(adapter);
1358}
1359
1360
1361static void
1362igb_handle_que(void *context, int pending)
1363{
1364 struct igb_queue *que = context;
1365 struct adapter *adapter = que->adapter;
1366 struct tx_ring *txr = que->txr;
1367 struct ifnet *ifp = adapter->ifp;
1368
1369 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1370 bool more;
1371
1372 more = igb_rxeof(que, adapter->rx_process_limit, NULL);
1373
1374 IGB_TX_LOCK(txr);
1375 igb_txeof(txr);
1376#ifndef IGB_LEGACY_TX
1377 /* Process the stack queue only if not depleted */
1378 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1379 !drbr_empty(ifp, txr->br))
1380 igb_mq_start_locked(ifp, txr);
1381#else
1382 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1383 igb_start_locked(txr, ifp);
1384#endif
1385 IGB_TX_UNLOCK(txr);
1386 /* Do we need another? */
1387 if (more) {
1388 taskqueue_enqueue(que->tq, &que->que_task);
1389 return;
1390 }
1391 }
1392
1393#ifdef DEVICE_POLLING
1394 if (ifp->if_capenable & IFCAP_POLLING)
1395 return;
1396#endif
1397 /* Reenable this interrupt */
1398 if (que->eims)
1399 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1400 else
1401 igb_enable_intr(adapter);
1402}
1403
1404/* Deal with link in a sleepable context */
1405static void
1406igb_handle_link(void *context, int pending)
1407{
1408 struct adapter *adapter = context;
1409
1410 IGB_CORE_LOCK(adapter);
1411 igb_handle_link_locked(adapter);
1412 IGB_CORE_UNLOCK(adapter);
1413}
1414
1415static void
1416igb_handle_link_locked(struct adapter *adapter)
1417{
1418 struct tx_ring *txr = adapter->tx_rings;
1419 struct ifnet *ifp = adapter->ifp;
1420
1421 IGB_CORE_LOCK_ASSERT(adapter);
1422 adapter->hw.mac.get_link_status = 1;
1423 igb_update_link_status(adapter);
1424 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) {
1425 for (int i = 0; i < adapter->num_queues; i++, txr++) {
1426 IGB_TX_LOCK(txr);
1427#ifndef IGB_LEGACY_TX
1428 /* Process the stack queue only if not depleted */
1429 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1430 !drbr_empty(ifp, txr->br))
1431 igb_mq_start_locked(ifp, txr);
1432#else
1433 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1434 igb_start_locked(txr, ifp);
1435#endif
1436 IGB_TX_UNLOCK(txr);
1437 }
1438 }
1439}
1440
1441/*********************************************************************
1442 *
1443 * MSI/Legacy Deferred
1444 * Interrupt Service routine
1445 *
1446 *********************************************************************/
1447static int
1448igb_irq_fast(void *arg)
1449{
1450 struct adapter *adapter = arg;
1451 struct igb_queue *que = adapter->queues;
1452 u32 reg_icr;
1453
1454
1455 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1456
1457 /* Hot eject? */
1458 if (reg_icr == 0xffffffff)
1459 return FILTER_STRAY;
1460
1461 /* Definitely not our interrupt. */
1462 if (reg_icr == 0x0)
1463 return FILTER_STRAY;
1464
1465 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0)
1466 return FILTER_STRAY;
1467
1468 /*
1469 * Mask interrupts until the taskqueue is finished running. This is
1470 * cheap, just assume that it is needed. This also works around the
1471 * MSI message reordering errata on certain systems.
1472 */
1473 igb_disable_intr(adapter);
1474 taskqueue_enqueue(que->tq, &que->que_task);
1475
1476 /* Link status change */
1477 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1478 taskqueue_enqueue(que->tq, &adapter->link_task);
1479
1480 if (reg_icr & E1000_ICR_RXO)
1481 adapter->rx_overruns++;
1482 return FILTER_HANDLED;
1483}
1484
1485#ifdef DEVICE_POLLING
1486#if __FreeBSD_version >= 800000
1487#define POLL_RETURN_COUNT(a) (a)
1488static int
1489#else
1490#define POLL_RETURN_COUNT(a)
1491static void
1492#endif
1493igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count)
1494{
1495 struct adapter *adapter = ifp->if_softc;
1496 struct igb_queue *que;
1497 struct tx_ring *txr;
1498 u32 reg_icr, rx_done = 0;
1499 u32 loop = IGB_MAX_LOOP;
1500 bool more;
1501
1502 IGB_CORE_LOCK(adapter);
1503 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1504 IGB_CORE_UNLOCK(adapter);
1505 return POLL_RETURN_COUNT(rx_done);
1506 }
1507
1508 if (cmd == POLL_AND_CHECK_STATUS) {
1509 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1510 /* Link status change */
1511 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC))
1512 igb_handle_link_locked(adapter);
1513
1514 if (reg_icr & E1000_ICR_RXO)
1515 adapter->rx_overruns++;
1516 }
1517 IGB_CORE_UNLOCK(adapter);
1518
1519 for (int i = 0; i < adapter->num_queues; i++) {
1520 que = &adapter->queues[i];
1521 txr = que->txr;
1522
1523 igb_rxeof(que, count, &rx_done);
1524
1525 IGB_TX_LOCK(txr);
1526 do {
1527 more = igb_txeof(txr);
1528 } while (loop-- && more);
1529#ifndef IGB_LEGACY_TX
1530 if (!drbr_empty(ifp, txr->br))
1531 igb_mq_start_locked(ifp, txr);
1532#else
1533 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1534 igb_start_locked(txr, ifp);
1535#endif
1536 IGB_TX_UNLOCK(txr);
1537 }
1538
1539 return POLL_RETURN_COUNT(rx_done);
1540}
1541#endif /* DEVICE_POLLING */
1542
1543/*********************************************************************
1544 *
1545 * MSIX Que Interrupt Service routine
1546 *
1547 **********************************************************************/
1548static void
1549igb_msix_que(void *arg)
1550{
1551 struct igb_queue *que = arg;
1552 struct adapter *adapter = que->adapter;
1553 struct ifnet *ifp = adapter->ifp;
1554 struct tx_ring *txr = que->txr;
1555 struct rx_ring *rxr = que->rxr;
1556 u32 newitr = 0;
1557 bool more_rx;
1558
1559 /* Ignore spurious interrupts */
1560 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1561 return;
1562
1563 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims);
1564 ++que->irqs;
1565
1566 IGB_TX_LOCK(txr);
1567 igb_txeof(txr);
1568#ifndef IGB_LEGACY_TX
1569 /* Process the stack queue only if not depleted */
1570 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) &&
1571 !drbr_empty(ifp, txr->br))
1572 igb_mq_start_locked(ifp, txr);
1573#else
1574 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
1575 igb_start_locked(txr, ifp);
1576#endif
1577 IGB_TX_UNLOCK(txr);
1578
1579 more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL);
1580
1581 if (adapter->enable_aim == FALSE)
1582 goto no_calc;
1583 /*
1584 ** Do Adaptive Interrupt Moderation:
1585 ** - Write out last calculated setting
1586 ** - Calculate based on average size over
1587 ** the last interval.
1588 */
1589 if (que->eitr_setting)
1590 E1000_WRITE_REG(&adapter->hw,
1591 E1000_EITR(que->msix), que->eitr_setting);
1592
1593 que->eitr_setting = 0;
1594
1595 /* Idle, do nothing */
1596 if ((txr->bytes == 0) && (rxr->bytes == 0))
1597 goto no_calc;
1598
1599 /* Used half Default if sub-gig */
1600 if (adapter->link_speed != 1000)
1601 newitr = IGB_DEFAULT_ITR / 2;
1602 else {
1603 if ((txr->bytes) && (txr->packets))
1604 newitr = txr->bytes/txr->packets;
1605 if ((rxr->bytes) && (rxr->packets))
1606 newitr = max(newitr,
1607 (rxr->bytes / rxr->packets));
1608 newitr += 24; /* account for hardware frame, crc */
1609 /* set an upper boundary */
1610 newitr = min(newitr, 3000);
1611 /* Be nice to the mid range */
1612 if ((newitr > 300) && (newitr < 1200))
1613 newitr = (newitr / 3);
1614 else
1615 newitr = (newitr / 2);
1616 }
1617 newitr &= 0x7FFC; /* Mask invalid bits */
1618 if (adapter->hw.mac.type == e1000_82575)
1619 newitr |= newitr << 16;
1620 else
1621 newitr |= E1000_EITR_CNT_IGNR;
1622
1623 /* save for next interrupt */
1624 que->eitr_setting = newitr;
1625
1626 /* Reset state */
1627 txr->bytes = 0;
1628 txr->packets = 0;
1629 rxr->bytes = 0;
1630 rxr->packets = 0;
1631
1632no_calc:
1633 /* Schedule a clean task if needed*/
1634 if (more_rx)
1635 taskqueue_enqueue(que->tq, &que->que_task);
1636 else
1637 /* Reenable this interrupt */
1638 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims);
1639 return;
1640}
1641
1642
1643/*********************************************************************
1644 *
1645 * MSIX Link Interrupt Service routine
1646 *
1647 **********************************************************************/
1648
1649static void
1650igb_msix_link(void *arg)
1651{
1652 struct adapter *adapter = arg;
1653 u32 icr;
1654
1655 ++adapter->link_irq;
1656 icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1657 if (!(icr & E1000_ICR_LSC))
1658 goto spurious;
1659 igb_handle_link(adapter, 0);
1660
1661spurious:
1662 /* Rearm */
1663 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC);
1664 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask);
1665 return;
1666}
1667
1668
1669/*********************************************************************
1670 *
1671 * Media Ioctl callback
1672 *
1673 * This routine is called whenever the user queries the status of
1674 * the interface using ifconfig.
1675 *
1676 **********************************************************************/
1677static void
1678igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
1679{
1680 struct adapter *adapter = ifp->if_softc;
1681
1682 INIT_DEBUGOUT("igb_media_status: begin");
1683
1684 IGB_CORE_LOCK(adapter);
1685 igb_update_link_status(adapter);
1686
1687 ifmr->ifm_status = IFM_AVALID;
1688 ifmr->ifm_active = IFM_ETHER;
1689
1690 if (!adapter->link_active) {
1691 IGB_CORE_UNLOCK(adapter);
1692 return;
1693 }
1694
1695 ifmr->ifm_status |= IFM_ACTIVE;
1696
1697 switch (adapter->link_speed) {
1698 case 10:
1699 ifmr->ifm_active |= IFM_10_T;
1700 break;
1701 case 100:
1702 /*
1703 ** Support for 100Mb SFP - these are Fiber
1704 ** but the media type appears as serdes
1705 */
1706 if (adapter->hw.phy.media_type ==
1707 e1000_media_type_internal_serdes)
1708 ifmr->ifm_active |= IFM_100_FX;
1709 else
1710 ifmr->ifm_active |= IFM_100_TX;
1711 break;
1712 case 1000:
1713 ifmr->ifm_active |= IFM_1000_T;
1714 break;
1715 case 2500:
1716 ifmr->ifm_active |= IFM_2500_SX;
1717 break;
1718 }
1719
1720 if (adapter->link_duplex == FULL_DUPLEX)
1721 ifmr->ifm_active |= IFM_FDX;
1722 else
1723 ifmr->ifm_active |= IFM_HDX;
1724
1725 IGB_CORE_UNLOCK(adapter);
1726}
1727
1728/*********************************************************************
1729 *
1730 * Media Ioctl callback
1731 *
1732 * This routine is called when the user changes speed/duplex using
1733 * media/mediopt option with ifconfig.
1734 *
1735 **********************************************************************/
1736static int
1737igb_media_change(struct ifnet *ifp)
1738{
1739 struct adapter *adapter = ifp->if_softc;
1740 struct ifmedia *ifm = &adapter->media;
1741
1742 INIT_DEBUGOUT("igb_media_change: begin");
1743
1744 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1745 return (EINVAL);
1746
1747 IGB_CORE_LOCK(adapter);
1748 switch (IFM_SUBTYPE(ifm->ifm_media)) {
1749 case IFM_AUTO:
1750 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1751 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1752 break;
1753 case IFM_1000_LX:
1754 case IFM_1000_SX:
1755 case IFM_1000_T:
1756 adapter->hw.mac.autoneg = DO_AUTO_NEG;
1757 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1758 break;
1759 case IFM_100_TX:
1760 adapter->hw.mac.autoneg = FALSE;
1761 adapter->hw.phy.autoneg_advertised = 0;
1762 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1763 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1764 else
1765 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1766 break;
1767 case IFM_10_T:
1768 adapter->hw.mac.autoneg = FALSE;
1769 adapter->hw.phy.autoneg_advertised = 0;
1770 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1771 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1772 else
1773 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1774 break;
1775 default:
1776 device_printf(adapter->dev, "Unsupported media type\n");
1777 }
1778
1779 igb_init_locked(adapter);
1780 IGB_CORE_UNLOCK(adapter);
1781
1782 return (0);
1783}
1784
1785
1786/*********************************************************************
1787 *
1788 * This routine maps the mbufs to Advanced TX descriptors.
1789 *
1790 **********************************************************************/
1791static int
1792igb_xmit(struct tx_ring *txr, struct mbuf **m_headp)
1793{
1794 struct adapter *adapter = txr->adapter;
1795 u32 olinfo_status = 0, cmd_type_len;
1796 int i, j, error, nsegs;
1797 int first;
1798 bool remap = TRUE;
1799 struct mbuf *m_head;
1800 bus_dma_segment_t segs[IGB_MAX_SCATTER];
1801 bus_dmamap_t map;
1802 struct igb_tx_buf *txbuf;
1803 union e1000_adv_tx_desc *txd = NULL;
1804
1805 m_head = *m_headp;
1806
1807 /* Basic descriptor defines */
1808 cmd_type_len = (E1000_ADVTXD_DTYP_DATA |
1809 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT);
1810
1811 if (m_head->m_flags & M_VLANTAG)
1812 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
1813
1814 /*
1815 * Important to capture the first descriptor
1816 * used because it will contain the index of
1817 * the one we tell the hardware to report back
1818 */
1819 first = txr->next_avail_desc;
1820 txbuf = &txr->tx_buffers[first];
1821 map = txbuf->map;
1822
1823 /*
1824 * Map the packet for DMA.
1825 */
1826retry:
1827 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
1828 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1829
1830 if (__predict_false(error)) {
1831 struct mbuf *m;
1832
1833 switch (error) {
1834 case EFBIG:
1835 /* Try it again? - one try */
1836 if (remap == TRUE) {
1837 remap = FALSE;
1838 m = m_collapse(*m_headp, M_NOWAIT,
1839 IGB_MAX_SCATTER);
1840 if (m == NULL) {
1841 adapter->mbuf_defrag_failed++;
1842 m_freem(*m_headp);
1843 *m_headp = NULL;
1844 return (ENOBUFS);
1845 }
1846 *m_headp = m;
1847 goto retry;
1848 } else
1849 return (error);
1850 default:
1851 txr->no_tx_dma_setup++;
1852 m_freem(*m_headp);
1853 *m_headp = NULL;
1854 return (error);
1855 }
1856 }
1857
1858 /* Make certain there are enough descriptors */
1859 if (nsegs > txr->tx_avail - 2) {
1860 txr->no_desc_avail++;
1861 bus_dmamap_unload(txr->txtag, map);
1862 return (ENOBUFS);
1863 }
1864 m_head = *m_headp;
1865
1866 /*
1867 ** Set up the appropriate offload context
1868 ** this will consume the first descriptor
1869 */
1870 error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
1871 if (__predict_false(error)) {
1872 m_freem(*m_headp);
1873 *m_headp = NULL;
1874 return (error);
1875 }
1876
1877 /* 82575 needs the queue index added */
1878 if (adapter->hw.mac.type == e1000_82575)
1879 olinfo_status |= txr->me << 4;
1880
1881 i = txr->next_avail_desc;
1882 for (j = 0; j < nsegs; j++) {
1883 bus_size_t seglen;
1884 bus_addr_t segaddr;
1885
1886 txbuf = &txr->tx_buffers[i];
1887 txd = &txr->tx_base[i];
1888 seglen = segs[j].ds_len;
1889 segaddr = htole64(segs[j].ds_addr);
1890
1891 txd->read.buffer_addr = segaddr;
1892 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS |
1893 cmd_type_len | seglen);
1894 txd->read.olinfo_status = htole32(olinfo_status);
1895
1896 if (++i == txr->num_desc)
1897 i = 0;
1898 }
1899
1900 txd->read.cmd_type_len |=
1901 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1902 txr->tx_avail -= nsegs;
1903 txr->next_avail_desc = i;
1904
1905 txbuf->m_head = m_head;
1906 /*
1907 ** Here we swap the map so the last descriptor,
1908 ** which gets the completion interrupt has the
1909 ** real map, and the first descriptor gets the
1910 ** unused map from this descriptor.
1911 */
1912 txr->tx_buffers[first].map = txbuf->map;
1913 txbuf->map = map;
1914 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
1915
1916 /* Set the EOP descriptor that will be marked done */
1917 txbuf = &txr->tx_buffers[first];
1918 txbuf->eop = txd;
1919
1920 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1921 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1922 /*
1923 * Advance the Transmit Descriptor Tail (Tdt), this tells the
1924 * hardware that this frame is available to transmit.
1925 */
1926 ++txr->total_packets;
1927 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i);
1928
1929 return (0);
1930}
1931static void
1932igb_set_promisc(struct adapter *adapter)
1933{
1934 struct ifnet *ifp = adapter->ifp;
1935 struct e1000_hw *hw = &adapter->hw;
1936 u32 reg;
1937
1938 if (adapter->vf_ifp) {
1939 e1000_promisc_set_vf(hw, e1000_promisc_enabled);
1940 return;
1941 }
1942
1943 reg = E1000_READ_REG(hw, E1000_RCTL);
1944 if (ifp->if_flags & IFF_PROMISC) {
1945 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1946 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1947 } else if (ifp->if_flags & IFF_ALLMULTI) {
1948 reg |= E1000_RCTL_MPE;
1949 reg &= ~E1000_RCTL_UPE;
1950 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1951 }
1952}
1953
1954static void
1955igb_disable_promisc(struct adapter *adapter)
1956{
1957 struct e1000_hw *hw = &adapter->hw;
1958 struct ifnet *ifp = adapter->ifp;
1959 u32 reg;
1960 int mcnt = 0;
1961
1962 if (adapter->vf_ifp) {
1963 e1000_promisc_set_vf(hw, e1000_promisc_disabled);
1964 return;
1965 }
1966 reg = E1000_READ_REG(hw, E1000_RCTL);
1967 reg &= (~E1000_RCTL_UPE);
1968 if (ifp->if_flags & IFF_ALLMULTI)
1969 mcnt = MAX_NUM_MULTICAST_ADDRESSES;
1970 else {
1971 struct ifmultiaddr *ifma;
1972#if __FreeBSD_version < 800000
1973 IF_ADDR_LOCK(ifp);
1974#else
1975 if_maddr_rlock(ifp);
1976#endif
1977 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1978 if (ifma->ifma_addr->sa_family != AF_LINK)
1979 continue;
1980 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
1981 break;
1982 mcnt++;
1983 }
1984#if __FreeBSD_version < 800000
1985 IF_ADDR_UNLOCK(ifp);
1986#else
1987 if_maddr_runlock(ifp);
1988#endif
1989 }
1990 /* Don't disable if in MAX groups */
1991 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
1992 reg &= (~E1000_RCTL_MPE);
1993 E1000_WRITE_REG(hw, E1000_RCTL, reg);
1994}
1995
1996
1997/*********************************************************************
1998 * Multicast Update
1999 *
2000 * This routine is called whenever multicast address list is updated.
2001 *
2002 **********************************************************************/
2003
2004static void
2005igb_set_multi(struct adapter *adapter)
2006{
2007 struct ifnet *ifp = adapter->ifp;
2008 struct ifmultiaddr *ifma;
2009 u32 reg_rctl = 0;
2010 u8 *mta;
2011
2012 int mcnt = 0;
2013
2014 IOCTL_DEBUGOUT("igb_set_multi: begin");
2015
2016 mta = adapter->mta;
2017 bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN *
2018 MAX_NUM_MULTICAST_ADDRESSES);
2019
2020#if __FreeBSD_version < 800000
2021 IF_ADDR_LOCK(ifp);
2022#else
2023 if_maddr_rlock(ifp);
2024#endif
2025 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2026 if (ifma->ifma_addr->sa_family != AF_LINK)
2027 continue;
2028
2029 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES)
2030 break;
2031
2032 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2033 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN);
2034 mcnt++;
2035 }
2036#if __FreeBSD_version < 800000
2037 IF_ADDR_UNLOCK(ifp);
2038#else
2039 if_maddr_runlock(ifp);
2040#endif
2041
2042 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2043 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2044 reg_rctl |= E1000_RCTL_MPE;
2045 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2046 } else
2047 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2048}
2049
2050
2051/*********************************************************************
2052 * Timer routine:
2053 * This routine checks for link status,
2054 * updates statistics, and does the watchdog.
2055 *
2056 **********************************************************************/
2057
2058static void
2059igb_local_timer(void *arg)
2060{
2061 struct adapter *adapter = arg;
2062 device_t dev = adapter->dev;
2063 struct ifnet *ifp = adapter->ifp;
2064 struct tx_ring *txr = adapter->tx_rings;
2065 struct igb_queue *que = adapter->queues;
2066 int hung = 0, busy = 0;
2067
2068
2069 IGB_CORE_LOCK_ASSERT(adapter);
2070
2071 igb_update_link_status(adapter);
2072 igb_update_stats_counters(adapter);
2073
2074 /*
2075 ** Check the TX queues status
2076 ** - central locked handling of OACTIVE
2077 ** - watchdog only if all queues show hung
2078 */
2079 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) {
2080 if ((txr->queue_status & IGB_QUEUE_HUNG) &&
2081 (adapter->pause_frames == 0))
2082 ++hung;
2083 if (txr->queue_status & IGB_QUEUE_DEPLETED)
2084 ++busy;
2085 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0)
2086 taskqueue_enqueue(que->tq, &que->que_task);
2087 }
2088 if (hung == adapter->num_queues)
2089 goto timeout;
2090 if (busy == adapter->num_queues)
2091 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2092 else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) &&
2093 (busy < adapter->num_queues))
2094 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
2095
2096 adapter->pause_frames = 0;
2097 callout_reset(&adapter->timer, hz, igb_local_timer, adapter);
2098#ifndef DEVICE_POLLING
2099 /* Schedule all queue interrupts - deadlock protection */
2100 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask);
2101#endif
2102 return;
2103
2104timeout:
2105 device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2106 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me,
2107 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)),
2108 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me)));
2109 device_printf(dev,"TX(%d) desc avail = %d,"
2110 "Next TX to Clean = %d\n",
2111 txr->me, txr->tx_avail, txr->next_to_clean);
2112 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2113 adapter->watchdog_events++;
2114 igb_init_locked(adapter);
2115}
2116
2117static void
2118igb_update_link_status(struct adapter *adapter)
2119{
2120 struct e1000_hw *hw = &adapter->hw;
2121 struct e1000_fc_info *fc = &hw->fc;
2122 struct ifnet *ifp = adapter->ifp;
2123 device_t dev = adapter->dev;
2124 struct tx_ring *txr = adapter->tx_rings;
2125 u32 link_check, thstat, ctrl;
2126 char *flowctl = NULL;
2127
2128 link_check = thstat = ctrl = 0;
2129
2130 /* Get the cached link value or read for real */
2131 switch (hw->phy.media_type) {
2132 case e1000_media_type_copper:
2133 if (hw->mac.get_link_status) {
2134 /* Do the work to read phy */
2135 e1000_check_for_link(hw);
2136 link_check = !hw->mac.get_link_status;
2137 } else
2138 link_check = TRUE;
2139 break;
2140 case e1000_media_type_fiber:
2141 e1000_check_for_link(hw);
2142 link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2143 E1000_STATUS_LU);
2144 break;
2145 case e1000_media_type_internal_serdes:
2146 e1000_check_for_link(hw);
2147 link_check = adapter->hw.mac.serdes_has_link;
2148 break;
2149 /* VF device is type_unknown */
2150 case e1000_media_type_unknown:
2151 e1000_check_for_link(hw);
2152 link_check = !hw->mac.get_link_status;
2153 /* Fall thru */
2154 default:
2155 break;
2156 }
2157
2158 /* Check for thermal downshift or shutdown */
2159 if (hw->mac.type == e1000_i350) {
2160 thstat = E1000_READ_REG(hw, E1000_THSTAT);
2161 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT);
2162 }
2163
2164 /* Get the flow control for display */
2165 switch (fc->current_mode) {
2166 case e1000_fc_rx_pause:
2167 flowctl = "RX";
2168 break;
2169 case e1000_fc_tx_pause:
2170 flowctl = "TX";
2171 break;
2172 case e1000_fc_full:
2173 flowctl = "Full";
2174 break;
2175 case e1000_fc_none:
2176 default:
2177 flowctl = "None";
2178 break;
2179 }
2180
2181 /* Now we check if a transition has happened */
2182 if (link_check && (adapter->link_active == 0)) {
2183 e1000_get_speed_and_duplex(&adapter->hw,
2184 &adapter->link_speed, &adapter->link_duplex);
2185 if (bootverbose)
2186 device_printf(dev, "Link is up %d Mbps %s,"
2187 " Flow Control: %s\n",
2188 adapter->link_speed,
2189 ((adapter->link_duplex == FULL_DUPLEX) ?
2190 "Full Duplex" : "Half Duplex"), flowctl);
2191 adapter->link_active = 1;
2192 ifp->if_baudrate = adapter->link_speed * 1000000;
2193 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2194 (thstat & E1000_THSTAT_LINK_THROTTLE))
2195 device_printf(dev, "Link: thermal downshift\n");
2196 /* Delay Link Up for Phy update */
2197 if (((hw->mac.type == e1000_i210) ||
2198 (hw->mac.type == e1000_i211)) &&
2199 (hw->phy.id == I210_I_PHY_ID))
2200 msec_delay(I210_LINK_DELAY);
2201 /* Reset if the media type changed. */
2202 if (hw->dev_spec._82575.media_changed) {
2203 hw->dev_spec._82575.media_changed = false;
2204 adapter->flags |= IGB_MEDIA_RESET;
2205 igb_reset(adapter);
2206 }
2207 /* This can sleep */
2208 if_link_state_change(ifp, LINK_STATE_UP);
2209 } else if (!link_check && (adapter->link_active == 1)) {
2210 ifp->if_baudrate = adapter->link_speed = 0;
2211 adapter->link_duplex = 0;
2212 if (bootverbose)
2213 device_printf(dev, "Link is Down\n");
2214 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) &&
2215 (thstat & E1000_THSTAT_PWR_DOWN))
2216 device_printf(dev, "Link: thermal shutdown\n");
2217 adapter->link_active = 0;
2218 /* This can sleep */
2219 if_link_state_change(ifp, LINK_STATE_DOWN);
2220 /* Reset queue state */
2221 for (int i = 0; i < adapter->num_queues; i++, txr++)
2222 txr->queue_status = IGB_QUEUE_IDLE;
2223 }
2224}
2225
2226/*********************************************************************
2227 *
2228 * This routine disables all traffic on the adapter by issuing a
2229 * global reset on the MAC and deallocates TX/RX buffers.
2230 *
2231 **********************************************************************/
2232
2233static void
2234igb_stop(void *arg)
2235{
2236 struct adapter *adapter = arg;
2237 struct ifnet *ifp = adapter->ifp;
2238 struct tx_ring *txr = adapter->tx_rings;
2239
2240 IGB_CORE_LOCK_ASSERT(adapter);
2241
2242 INIT_DEBUGOUT("igb_stop: begin");
2243
2244 igb_disable_intr(adapter);
2245
2246 callout_stop(&adapter->timer);
2247
2248 /* Tell the stack that the interface is no longer active */
2249 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2250 ifp->if_drv_flags |= IFF_DRV_OACTIVE;
2251
2252 /* Disarm watchdog timer. */
2253 for (int i = 0; i < adapter->num_queues; i++, txr++) {
2254 IGB_TX_LOCK(txr);
2255 txr->queue_status = IGB_QUEUE_IDLE;
2256 IGB_TX_UNLOCK(txr);
2257 }
2258
2259 e1000_reset_hw(&adapter->hw);
2260 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2261
2262 e1000_led_off(&adapter->hw);
2263 e1000_cleanup_led(&adapter->hw);
2264}
2265
2266
2267/*********************************************************************
2268 *
2269 * Determine hardware revision.
2270 *
2271 **********************************************************************/
2272static void
2273igb_identify_hardware(struct adapter *adapter)
2274{
2275 device_t dev = adapter->dev;
2276
2277 /* Make sure our PCI config space has the necessary stuff set */
2278 pci_enable_busmaster(dev);
2279 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2280
2281 /* Save off the information about this board */
2282 adapter->hw.vendor_id = pci_get_vendor(dev);
2283 adapter->hw.device_id = pci_get_device(dev);
2284 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2285 adapter->hw.subsystem_vendor_id =
2286 pci_read_config(dev, PCIR_SUBVEND_0, 2);
2287 adapter->hw.subsystem_device_id =
2288 pci_read_config(dev, PCIR_SUBDEV_0, 2);
2289
2290 /* Set MAC type early for PCI setup */
2291 e1000_set_mac_type(&adapter->hw);
2292
2293 /* Are we a VF device? */
2294 if ((adapter->hw.mac.type == e1000_vfadapt) ||
2295 (adapter->hw.mac.type == e1000_vfadapt_i350))
2296 adapter->vf_ifp = 1;
2297 else
2298 adapter->vf_ifp = 0;
2299}
2300
2301static int
2302igb_allocate_pci_resources(struct adapter *adapter)
2303{
2304 device_t dev = adapter->dev;
2305 int rid;
2306
2307 rid = PCIR_BAR(0);
2308 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2309 &rid, RF_ACTIVE);
2310 if (adapter->pci_mem == NULL) {
2311 device_printf(dev, "Unable to allocate bus resource: memory\n");
2312 return (ENXIO);
2313 }
2314 adapter->osdep.mem_bus_space_tag =
2315 rman_get_bustag(adapter->pci_mem);
2316 adapter->osdep.mem_bus_space_handle =
2317 rman_get_bushandle(adapter->pci_mem);
2318 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2319
2320 adapter->num_queues = 1; /* Defaults for Legacy or MSI */
2321
2322 /* This will setup either MSI/X or MSI */
2323 adapter->msix = igb_setup_msix(adapter);
2324 adapter->hw.back = &adapter->osdep;
2325
2326 return (0);
2327}
2328
2329/*********************************************************************
2330 *
2331 * Setup the Legacy or MSI Interrupt handler
2332 *
2333 **********************************************************************/
2334static int
2335igb_allocate_legacy(struct adapter *adapter)
2336{
2337 device_t dev = adapter->dev;
2338 struct igb_queue *que = adapter->queues;
2339#ifndef IGB_LEGACY_TX
2340 struct tx_ring *txr = adapter->tx_rings;
2341#endif
2342 int error, rid = 0;
2343
2344 /* Turn off all interrupts */
2345 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2346
2347 /* MSI RID is 1 */
2348 if (adapter->msix == 1)
2349 rid = 1;
2350
2351 /* We allocate a single interrupt resource */
2352 adapter->res = bus_alloc_resource_any(dev,
2353 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2354 if (adapter->res == NULL) {
2355 device_printf(dev, "Unable to allocate bus resource: "
2356 "interrupt\n");
2357 return (ENXIO);
2358 }
2359
2360#ifndef IGB_LEGACY_TX
2361 TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr);
2362#endif
2363
2364 /*
2365 * Try allocating a fast interrupt and the associated deferred
2366 * processing contexts.
2367 */
2368 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2369 /* Make tasklet for deferred link handling */
2370 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter);
2371 que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT,
2372 taskqueue_thread_enqueue, &que->tq);
2373 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq",
2374 device_get_nameunit(adapter->dev));
2375 if ((error = bus_setup_intr(dev, adapter->res,
2376 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL,
2377 adapter, &adapter->tag)) != 0) {
2378 device_printf(dev, "Failed to register fast interrupt "
2379 "handler: %d\n", error);
2380 taskqueue_free(que->tq);
2381 que->tq = NULL;
2382 return (error);
2383 }
2384
2385 return (0);
2386}
2387
2388
2389/*********************************************************************
2390 *
2391 * Setup the MSIX Queue Interrupt handlers:
2392 *
2393 **********************************************************************/
2394static int
2395igb_allocate_msix(struct adapter *adapter)
2396{
2397 device_t dev = adapter->dev;
2398 struct igb_queue *que = adapter->queues;
2399 int error, rid, vector = 0;
2400 int cpu_id = 0;
2401#ifdef RSS
2402 cpuset_t cpu_mask;
2403#endif
2404
2405 /* Be sure to start with all interrupts disabled */
2406 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
2407 E1000_WRITE_FLUSH(&adapter->hw);
2408
2409#ifdef RSS
2410 /*
2411 * If we're doing RSS, the number of queues needs to
2412 * match the number of RSS buckets that are configured.
2413 *
2414 * + If there's more queues than RSS buckets, we'll end
2415 * up with queues that get no traffic.
2416 *
2417 * + If there's more RSS buckets than queues, we'll end
2418 * up having multiple RSS buckets map to the same queue,
2419 * so there'll be some contention.
2420 */
2421 if (adapter->num_queues != rss_getnumbuckets()) {
2422 device_printf(dev,
2423 "%s: number of queues (%d) != number of RSS buckets (%d)"
2424 "; performance will be impacted.\n",
2425 __func__,
2426 adapter->num_queues,
2427 rss_getnumbuckets());
2428 }
2429#endif
2430
2431 for (int i = 0; i < adapter->num_queues; i++, vector++, que++) {
2432 rid = vector +1;
2433 que->res = bus_alloc_resource_any(dev,
2434 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2435 if (que->res == NULL) {
2436 device_printf(dev,
2437 "Unable to allocate bus resource: "
2438 "MSIX Queue Interrupt\n");
2439 return (ENXIO);
2440 }
2441 error = bus_setup_intr(dev, que->res,
2442 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2443 igb_msix_que, que, &que->tag);
2444 if (error) {
2445 que->res = NULL;
2446 device_printf(dev, "Failed to register Queue handler");
2447 return (error);
2448 }
2449#if __FreeBSD_version >= 800504
2450 bus_describe_intr(dev, que->res, que->tag, "que %d", i);
2451#endif
2452 que->msix = vector;
2453 if (adapter->hw.mac.type == e1000_82575)
2454 que->eims = E1000_EICR_TX_QUEUE0 << i;
2455 else
2456 que->eims = 1 << vector;
2457
2458#ifdef RSS
2459 /*
2460 * The queue ID is used as the RSS layer bucket ID.
2461 * We look up the queue ID -> RSS CPU ID and select
2462 * that.
2463 */
2464 cpu_id = rss_getcpu(i % rss_getnumbuckets());
2465#else
2466 /*
2467 * Bind the msix vector, and thus the
2468 * rings to the corresponding cpu.
2469 *
2470 * This just happens to match the default RSS round-robin
2471 * bucket -> queue -> CPU allocation.
2472 */
2473 if (adapter->num_queues > 1) {
2474 if (igb_last_bind_cpu < 0)
2475 igb_last_bind_cpu = CPU_FIRST();
2476 cpu_id = igb_last_bind_cpu;
2477 }
2478#endif
2479
2480 if (adapter->num_queues > 1) {
2481 bus_bind_intr(dev, que->res, cpu_id);
2482#ifdef RSS
2483 device_printf(dev,
2484 "Bound queue %d to RSS bucket %d\n",
2485 i, cpu_id);
2486#else
2487 device_printf(dev,
2488 "Bound queue %d to cpu %d\n",
2489 i, cpu_id);
2490#endif
2491 }
2492
2493#ifndef IGB_LEGACY_TX
2494 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start,
2495 que->txr);
2496#endif
2497 /* Make tasklet for deferred handling */
2498 TASK_INIT(&que->que_task, 0, igb_handle_que, que);
2499 que->tq = taskqueue_create("igb_que", M_NOWAIT,
2500 taskqueue_thread_enqueue, &que->tq);
2501 if (adapter->num_queues > 1) {
2502 /*
2503 * Only pin the taskqueue thread to a CPU if
2504 * RSS is in use.
2505 *
2506 * This again just happens to match the default RSS
2507 * round-robin bucket -> queue -> CPU allocation.
2508 */
2509#ifdef RSS
2510 CPU_SETOF(cpu_id, &cpu_mask);
2511 taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET,
2512 &cpu_mask,
2513 "%s que (bucket %d)",
2514 device_get_nameunit(adapter->dev),
2515 cpu_id);
2516#else
2517 taskqueue_start_threads(&que->tq, 1, PI_NET,
2518 "%s que (qid %d)",
2519 device_get_nameunit(adapter->dev),
2520 cpu_id);
2521#endif
2522 } else {
2523 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
2524 device_get_nameunit(adapter->dev));
2525 }
2526
2527 /* Finally update the last bound CPU id */
2528 if (adapter->num_queues > 1)
2529 igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu);
2530 }
2531
2532 /* And Link */
2533 rid = vector + 1;
2534 adapter->res = bus_alloc_resource_any(dev,
2535 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2536 if (adapter->res == NULL) {
2537 device_printf(dev,
2538 "Unable to allocate bus resource: "
2539 "MSIX Link Interrupt\n");
2540 return (ENXIO);
2541 }
2542 if ((error = bus_setup_intr(dev, adapter->res,
2543 INTR_TYPE_NET | INTR_MPSAFE, NULL,
2544 igb_msix_link, adapter, &adapter->tag)) != 0) {
2545 device_printf(dev, "Failed to register Link handler");
2546 return (error);
2547 }
2548#if __FreeBSD_version >= 800504
2549 bus_describe_intr(dev, adapter->res, adapter->tag, "link");
2550#endif
2551 adapter->linkvec = vector;
2552
2553 return (0);
2554}
2555
2556
2557static void
2558igb_configure_queues(struct adapter *adapter)
2559{
2560 struct e1000_hw *hw = &adapter->hw;
2561 struct igb_queue *que;
2562 u32 tmp, ivar = 0, newitr = 0;
2563
2564 /* First turn on RSS capability */
2565 if (adapter->hw.mac.type != e1000_82575)
2566 E1000_WRITE_REG(hw, E1000_GPIE,
2567 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME |
2568 E1000_GPIE_PBA | E1000_GPIE_NSICR);
2569
2570 /* Turn on MSIX */
2571 switch (adapter->hw.mac.type) {
2572 case e1000_82580:
2573 case e1000_i350:
2574 case e1000_i354:
2575 case e1000_i210:
2576 case e1000_i211:
2577 case e1000_vfadapt:
2578 case e1000_vfadapt_i350:
2579 /* RX entries */
2580 for (int i = 0; i < adapter->num_queues; i++) {
2581 u32 index = i >> 1;
2582 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2583 que = &adapter->queues[i];
2584 if (i & 1) {
2585 ivar &= 0xFF00FFFF;
2586 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2587 } else {
2588 ivar &= 0xFFFFFF00;
2589 ivar |= que->msix | E1000_IVAR_VALID;
2590 }
2591 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2592 }
2593 /* TX entries */
2594 for (int i = 0; i < adapter->num_queues; i++) {
2595 u32 index = i >> 1;
2596 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2597 que = &adapter->queues[i];
2598 if (i & 1) {
2599 ivar &= 0x00FFFFFF;
2600 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2601 } else {
2602 ivar &= 0xFFFF00FF;
2603 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2604 }
2605 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2606 adapter->que_mask |= que->eims;
2607 }
2608
2609 /* And for the link interrupt */
2610 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2611 adapter->link_mask = 1 << adapter->linkvec;
2612 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2613 break;
2614 case e1000_82576:
2615 /* RX entries */
2616 for (int i = 0; i < adapter->num_queues; i++) {
2617 u32 index = i & 0x7; /* Each IVAR has two entries */
2618 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2619 que = &adapter->queues[i];
2620 if (i < 8) {
2621 ivar &= 0xFFFFFF00;
2622 ivar |= que->msix | E1000_IVAR_VALID;
2623 } else {
2624 ivar &= 0xFF00FFFF;
2625 ivar |= (que->msix | E1000_IVAR_VALID) << 16;
2626 }
2627 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2628 adapter->que_mask |= que->eims;
2629 }
2630 /* TX entries */
2631 for (int i = 0; i < adapter->num_queues; i++) {
2632 u32 index = i & 0x7; /* Each IVAR has two entries */
2633 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
2634 que = &adapter->queues[i];
2635 if (i < 8) {
2636 ivar &= 0xFFFF00FF;
2637 ivar |= (que->msix | E1000_IVAR_VALID) << 8;
2638 } else {
2639 ivar &= 0x00FFFFFF;
2640 ivar |= (que->msix | E1000_IVAR_VALID) << 24;
2641 }
2642 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
2643 adapter->que_mask |= que->eims;
2644 }
2645
2646 /* And for the link interrupt */
2647 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8;
2648 adapter->link_mask = 1 << adapter->linkvec;
2649 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
2650 break;
2651
2652 case e1000_82575:
2653 /* enable MSI-X support*/
2654 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT);
2655 tmp |= E1000_CTRL_EXT_PBA_CLR;
2656 /* Auto-Mask interrupts upon ICR read. */
2657 tmp |= E1000_CTRL_EXT_EIAME;
2658 tmp |= E1000_CTRL_EXT_IRCA;
2659 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp);
2660
2661 /* Queues */
2662 for (int i = 0; i < adapter->num_queues; i++) {
2663 que = &adapter->queues[i];
2664 tmp = E1000_EICR_RX_QUEUE0 << i;
2665 tmp |= E1000_EICR_TX_QUEUE0 << i;
2666 que->eims = tmp;
2667 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0),
2668 i, que->eims);
2669 adapter->que_mask |= que->eims;
2670 }
2671
2672 /* Link */
2673 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec),
2674 E1000_EIMS_OTHER);
2675 adapter->link_mask |= E1000_EIMS_OTHER;
2676 default:
2677 break;
2678 }
2679
2680 /* Set the starting interrupt rate */
2681 if (igb_max_interrupt_rate > 0)
2682 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC;
2683
2684 if (hw->mac.type == e1000_82575)
2685 newitr |= newitr << 16;
2686 else
2687 newitr |= E1000_EITR_CNT_IGNR;
2688
2689 for (int i = 0; i < adapter->num_queues; i++) {
2690 que = &adapter->queues[i];
2691 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr);
2692 }
2693
2694 return;
2695}
2696
2697
2698static void
2699igb_free_pci_resources(struct adapter *adapter)
2700{
2701 struct igb_queue *que = adapter->queues;
2702 device_t dev = adapter->dev;
2703 int rid;
2704
2705 /*
2706 ** There is a slight possibility of a failure mode
2707 ** in attach that will result in entering this function
2708 ** before interrupt resources have been initialized, and
2709 ** in that case we do not want to execute the loops below
2710 ** We can detect this reliably by the state of the adapter
2711 ** res pointer.
2712 */
2713 if (adapter->res == NULL)
2714 goto mem;
2715
2716 /*
2717 * First release all the interrupt resources:
2718 */
2719 for (int i = 0; i < adapter->num_queues; i++, que++) {
2720 rid = que->msix + 1;
2721 if (que->tag != NULL) {
2722 bus_teardown_intr(dev, que->res, que->tag);
2723 que->tag = NULL;
2724 }
2725 if (que->res != NULL)
2726 bus_release_resource(dev,
2727 SYS_RES_IRQ, rid, que->res);
2728 }
2729
2730 /* Clean the Legacy or Link interrupt last */
2731 if (adapter->linkvec) /* we are doing MSIX */
2732 rid = adapter->linkvec + 1;
2733 else
2734 (adapter->msix != 0) ? (rid = 1):(rid = 0);
2735
2736 que = adapter->queues;
2737 if (adapter->tag != NULL) {
2738 taskqueue_drain(que->tq, &adapter->link_task);
2739 bus_teardown_intr(dev, adapter->res, adapter->tag);
2740 adapter->tag = NULL;
2741 }
2742 if (adapter->res != NULL)
2743 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res);
2744
2745 for (int i = 0; i < adapter->num_queues; i++, que++) {
2746 if (que->tq != NULL) {
2747#ifndef IGB_LEGACY_TX
2748 taskqueue_drain(que->tq, &que->txr->txq_task);
2749#endif
2750 taskqueue_drain(que->tq, &que->que_task);
2751 taskqueue_free(que->tq);
2752 }
2753 }
2754mem:
2755 if (adapter->msix)
2756 pci_release_msi(dev);
2757
2758 if (adapter->msix_mem != NULL)
2759 bus_release_resource(dev, SYS_RES_MEMORY,
2760 adapter->memrid, adapter->msix_mem);
2761
2762 if (adapter->pci_mem != NULL)
2763 bus_release_resource(dev, SYS_RES_MEMORY,
2764 PCIR_BAR(0), adapter->pci_mem);
2765
2766}
2767
2768/*
2769 * Setup Either MSI/X or MSI
2770 */
2771static int
2772igb_setup_msix(struct adapter *adapter)
2773{
2774 device_t dev = adapter->dev;
2775 int bar, want, queues, msgs, maxqueues;
2776
2777 /* tuneable override */
2778 if (igb_enable_msix == 0)
2779 goto msi;
2780
2781 /* First try MSI/X */
2782 msgs = pci_msix_count(dev);
2783 if (msgs == 0)
2784 goto msi;
2785 /*
2786 ** Some new devices, as with ixgbe, now may
2787 ** use a different BAR, so we need to keep
2788 ** track of which is used.
2789 */
2790 adapter->memrid = PCIR_BAR(IGB_MSIX_BAR);
2791 bar = pci_read_config(dev, adapter->memrid, 4);
2792 if (bar == 0) /* use next bar */
2793 adapter->memrid += 4;
2794 adapter->msix_mem = bus_alloc_resource_any(dev,
2795 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE);
2796 if (adapter->msix_mem == NULL) {
2797 /* May not be enabled */
2798 device_printf(adapter->dev,
2799 "Unable to map MSIX table \n");
2800 goto msi;
2801 }
2802
2803 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus;
2804
2805 /* Override via tuneable */
2806 if (igb_num_queues != 0)
2807 queues = igb_num_queues;
2808
2809#ifdef RSS
2810 /* If we're doing RSS, clamp at the number of RSS buckets */
2811 if (queues > rss_getnumbuckets())
2812 queues = rss_getnumbuckets();
2813#endif
2814
2815
2816 /* Sanity check based on HW */
2817 switch (adapter->hw.mac.type) {
2818 case e1000_82575:
2819 maxqueues = 4;
2820 break;
2821 case e1000_82576:
2822 case e1000_82580:
2823 case e1000_i350:
2824 case e1000_i354:
2825 maxqueues = 8;
2826 break;
2827 case e1000_i210:
2828 maxqueues = 4;
2829 break;
2830 case e1000_i211:
2831 maxqueues = 2;
2832 break;
2833 default: /* VF interfaces */
2834 maxqueues = 1;
2835 break;
2836 }
2837
2838 /* Final clamp on the actual hardware capability */
2839 if (queues > maxqueues)
2840 queues = maxqueues;
2841
2842 /*
2843 ** One vector (RX/TX pair) per queue
2844 ** plus an additional for Link interrupt
2845 */
2846 want = queues + 1;
2847 if (msgs >= want)
2848 msgs = want;
2849 else {
2850 device_printf(adapter->dev,
2851 "MSIX Configuration Problem, "
2852 "%d vectors configured, but %d queues wanted!\n",
2853 msgs, want);
2854 goto msi;
2855 }
2856 if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) {
2857 device_printf(adapter->dev,
2858 "Using MSIX interrupts with %d vectors\n", msgs);
2859 adapter->num_queues = queues;
2860 return (msgs);
2861 }
2862 /*
2863 ** If MSIX alloc failed or provided us with
2864 ** less than needed, free and fall through to MSI
2865 */
2866 pci_release_msi(dev);
2867
2868msi:
2869 if (adapter->msix_mem != NULL) {
2870 bus_release_resource(dev, SYS_RES_MEMORY,
2871 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem);
2872 adapter->msix_mem = NULL;
2873 }
2874 msgs = 1;
2875 if (pci_alloc_msi(dev, &msgs) == 0) {
2876 device_printf(adapter->dev," Using an MSI interrupt\n");
2877 return (msgs);
2878 }
2879 device_printf(adapter->dev," Using a Legacy interrupt\n");
2880 return (0);
2881}
2882
2883/*********************************************************************
2884 *
2885 * Initialize the DMA Coalescing feature
2886 *
2887 **********************************************************************/
2888static void
2889igb_init_dmac(struct adapter *adapter, u32 pba)
2890{
2891 device_t dev = adapter->dev;
2892 struct e1000_hw *hw = &adapter->hw;
2893 u32 dmac, reg = ~E1000_DMACR_DMAC_EN;
2894 u16 hwm;
2895
2896 if (hw->mac.type == e1000_i211)
2897 return;
2898
2899 if (hw->mac.type > e1000_82580) {
2900
2901 if (adapter->dmac == 0) { /* Disabling it */
2902 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2903 return;
2904 } else
2905 device_printf(dev, "DMA Coalescing enabled\n");
2906
2907 /* Set starting threshold */
2908 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0);
2909
2910 hwm = 64 * pba - adapter->max_frame_size / 16;
2911 if (hwm < 64 * (pba - 6))
2912 hwm = 64 * (pba - 6);
2913 reg = E1000_READ_REG(hw, E1000_FCRTC);
2914 reg &= ~E1000_FCRTC_RTH_COAL_MASK;
2915 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT)
2916 & E1000_FCRTC_RTH_COAL_MASK);
2917 E1000_WRITE_REG(hw, E1000_FCRTC, reg);
2918
2919
2920 dmac = pba - adapter->max_frame_size / 512;
2921 if (dmac < pba - 10)
2922 dmac = pba - 10;
2923 reg = E1000_READ_REG(hw, E1000_DMACR);
2924 reg &= ~E1000_DMACR_DMACTHR_MASK;
2925 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT)
2926 & E1000_DMACR_DMACTHR_MASK);
2927
2928 /* transition to L0x or L1 if available..*/
2929 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK);
2930
2931 /* Check if status is 2.5Gb backplane connection
2932 * before configuration of watchdog timer, which is
2933 * in msec values in 12.8usec intervals
2934 * watchdog timer= msec values in 32usec intervals
2935 * for non 2.5Gb connection
2936 */
2937 if (hw->mac.type == e1000_i354) {
2938 int status = E1000_READ_REG(hw, E1000_STATUS);
2939 if ((status & E1000_STATUS_2P5_SKU) &&
2940 (!(status & E1000_STATUS_2P5_SKU_OVER)))
2941 reg |= ((adapter->dmac * 5) >> 6);
2942 else
2943 reg |= (adapter->dmac >> 5);
2944 } else {
2945 reg |= (adapter->dmac >> 5);
2946 }
2947
2948 E1000_WRITE_REG(hw, E1000_DMACR, reg);
2949
2950 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0);
2951
2952 /* Set the interval before transition */
2953 reg = E1000_READ_REG(hw, E1000_DMCTLX);
2954 if (hw->mac.type == e1000_i350)
2955 reg |= IGB_DMCTLX_DCFLUSH_DIS;
2956 /*
2957 ** in 2.5Gb connection, TTLX unit is 0.4 usec
2958 ** which is 0x4*2 = 0xA. But delay is still 4 usec
2959 */
2960 if (hw->mac.type == e1000_i354) {
2961 int status = E1000_READ_REG(hw, E1000_STATUS);
2962 if ((status & E1000_STATUS_2P5_SKU) &&
2963 (!(status & E1000_STATUS_2P5_SKU_OVER)))
2964 reg |= 0xA;
2965 else
2966 reg |= 0x4;
2967 } else {
2968 reg |= 0x4;
2969 }
2970
2971 E1000_WRITE_REG(hw, E1000_DMCTLX, reg);
2972
2973 /* free space in tx packet buffer to wake from DMA coal */
2974 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE -
2975 (2 * adapter->max_frame_size)) >> 6);
2976
2977 /* make low power state decision controlled by DMA coal */
2978 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2979 reg &= ~E1000_PCIEMISC_LX_DECISION;
2980 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg);
2981
2982 } else if (hw->mac.type == e1000_82580) {
2983 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC);
2984 E1000_WRITE_REG(hw, E1000_PCIEMISC,
2985 reg & ~E1000_PCIEMISC_LX_DECISION);
2986 E1000_WRITE_REG(hw, E1000_DMACR, 0);
2987 }
2988}
2989
2990
2991/*********************************************************************
2992 *
2993 * Set up an fresh starting state
2994 *
2995 **********************************************************************/
2996static void
2997igb_reset(struct adapter *adapter)
2998{
2999 device_t dev = adapter->dev;
3000 struct e1000_hw *hw = &adapter->hw;
3001 struct e1000_fc_info *fc = &hw->fc;
3002 struct ifnet *ifp = adapter->ifp;
3003 u32 pba = 0;
3004 u16 hwm;
3005
3006 INIT_DEBUGOUT("igb_reset: begin");
3007
3008 /* Let the firmware know the OS is in control */
3009 igb_get_hw_control(adapter);
3010
3011 /*
3012 * Packet Buffer Allocation (PBA)
3013 * Writing PBA sets the receive portion of the buffer
3014 * the remainder is used for the transmit buffer.
3015 */
3016 switch (hw->mac.type) {
3017 case e1000_82575:
3018 pba = E1000_PBA_32K;
3019 break;
3020 case e1000_82576:
3021 case e1000_vfadapt:
3022 pba = E1000_READ_REG(hw, E1000_RXPBS);
3023 pba &= E1000_RXPBS_SIZE_MASK_82576;
3024 break;
3025 case e1000_82580:
3026 case e1000_i350:
3027 case e1000_i354:
3028 case e1000_vfadapt_i350:
3029 pba = E1000_READ_REG(hw, E1000_RXPBS);
3030 pba = e1000_rxpbs_adjust_82580(pba);
3031 break;
3032 case e1000_i210:
3033 case e1000_i211:
3034 pba = E1000_PBA_34K;
3035 default:
3036 break;
3037 }
3038
3039 /* Special needs in case of Jumbo frames */
3040 if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) {
3041 u32 tx_space, min_tx, min_rx;
3042 pba = E1000_READ_REG(hw, E1000_PBA);
3043 tx_space = pba >> 16;
3044 pba &= 0xffff;
3045 min_tx = (adapter->max_frame_size +
3046 sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2;
3047 min_tx = roundup2(min_tx, 1024);
3048 min_tx >>= 10;
3049 min_rx = adapter->max_frame_size;
3050 min_rx = roundup2(min_rx, 1024);
3051 min_rx >>= 10;
3052 if (tx_space < min_tx &&
3053 ((min_tx - tx_space) < pba)) {
3054 pba = pba - (min_tx - tx_space);
3055 /*
3056 * if short on rx space, rx wins
3057 * and must trump tx adjustment
3058 */
3059 if (pba < min_rx)
3060 pba = min_rx;
3061 }
3062 E1000_WRITE_REG(hw, E1000_PBA, pba);
3063 }
3064
3065 INIT_DEBUGOUT1("igb_init: pba=%dK",pba);
3066
3067 /*
3068 * These parameters control the automatic generation (Tx) and
3069 * response (Rx) to Ethernet PAUSE frames.
3070 * - High water mark should allow for at least two frames to be
3071 * received after sending an XOFF.
3072 * - Low water mark works best when it is very near the high water mark.
3073 * This allows the receiver to restart by sending XON when it has
3074 * drained a bit.
3075 */
3076 hwm = min(((pba << 10) * 9 / 10),
3077 ((pba << 10) - 2 * adapter->max_frame_size));
3078
3079 if (hw->mac.type < e1000_82576) {
3080 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
3081 fc->low_water = fc->high_water - 8;
3082 } else {
3083 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */
3084 fc->low_water = fc->high_water - 16;
3085 }
3086
3087 fc->pause_time = IGB_FC_PAUSE_TIME;
3088 fc->send_xon = TRUE;
3089 if (adapter->fc)
3090 fc->requested_mode = adapter->fc;
3091 else
3092 fc->requested_mode = e1000_fc_default;
3093
3094 /* Issue a global reset */
3095 e1000_reset_hw(hw);
3096 E1000_WRITE_REG(hw, E1000_WUC, 0);
3097
3098 /* Reset for AutoMediaDetect */
3099 if (adapter->flags & IGB_MEDIA_RESET) {
3100 e1000_setup_init_funcs(hw, TRUE);
3101 e1000_get_bus_info(hw);
3102 adapter->flags &= ~IGB_MEDIA_RESET;
3103 }
3104
3105 if (e1000_init_hw(hw) < 0)
3106 device_printf(dev, "Hardware Initialization Failed\n");
3107
3108 /* Setup DMA Coalescing */
3109 igb_init_dmac(adapter, pba);
3110
3111 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
3112 e1000_get_phy_info(hw);
3113 e1000_check_for_link(hw);
3114 return;
3115}
3116
3117/*********************************************************************
3118 *
3119 * Setup networking device structure and register an interface.
3120 *
3121 **********************************************************************/
3122static int
3123igb_setup_interface(device_t dev, struct adapter *adapter)
3124{
3125 struct ifnet *ifp;
3126
3127 INIT_DEBUGOUT("igb_setup_interface: begin");
3128
3129 ifp = adapter->ifp = if_alloc(IFT_ETHER);
3130 if (ifp == NULL) {
3131 device_printf(dev, "can not allocate ifnet structure\n");
3132 return (-1);
3133 }
3134 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
3135 ifp->if_init = igb_init;
3136 ifp->if_softc = adapter;
3137 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3138 ifp->if_ioctl = igb_ioctl;
3139 ifp->if_get_counter = igb_get_counter;
3140#ifndef IGB_LEGACY_TX
3141 ifp->if_transmit = igb_mq_start;
3142 ifp->if_qflush = igb_qflush;
3143#else
3144 ifp->if_start = igb_start;
3145 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
3146 ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
3147 IFQ_SET_READY(&ifp->if_snd);
3148#endif
3149
3150 ether_ifattach(ifp, adapter->hw.mac.addr);
3151
3152 ifp->if_capabilities = ifp->if_capenable = 0;
3153
3154 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
3155 ifp->if_capabilities |= IFCAP_TSO;
3156 ifp->if_capabilities |= IFCAP_JUMBO_MTU;
3157 ifp->if_capenable = ifp->if_capabilities;
3158
3159 /* Don't enable LRO by default */
3160 ifp->if_capabilities |= IFCAP_LRO;
3161
3162#ifdef DEVICE_POLLING
3163 ifp->if_capabilities |= IFCAP_POLLING;
3164#endif
3165
3166 /*
3167 * Tell the upper layer(s) we
3168 * support full VLAN capability.
3169 */
3170 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
3171 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING
3172 | IFCAP_VLAN_HWTSO
3173 | IFCAP_VLAN_MTU;
3174 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING
3175 | IFCAP_VLAN_HWTSO
3176 | IFCAP_VLAN_MTU;
3177
3178 /*
3179 ** Don't turn this on by default, if vlans are
3180 ** created on another pseudo device (eg. lagg)
3181 ** then vlan events are not passed thru, breaking
3182 ** operation, but with HW FILTER off it works. If
3183 ** using vlans directly on the igb driver you can
3184 ** enable this and get full hardware tag filtering.
3185 */
3186 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
3187
3188 /*
3189 * Specify the media types supported by this adapter and register
3190 * callbacks to update media and link information
3191 */
3192 ifmedia_init(&adapter->media, IFM_IMASK,
3193 igb_media_change, igb_media_status);
3194 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
3195 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
3196 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX,
3197 0, NULL);
3198 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL);
3199 } else {
3200 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
3201 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
3202 0, NULL);
3203 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
3204 0, NULL);
3205 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
3206 0, NULL);
3207 if (adapter->hw.phy.type != e1000_phy_ife) {
3208 ifmedia_add(&adapter->media,
3209 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
3210 ifmedia_add(&adapter->media,
3211 IFM_ETHER | IFM_1000_T, 0, NULL);
3212 }
3213 }
3214 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3215 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
3216 return (0);
3217}
3218
3219
3220/*
3221 * Manage DMA'able memory.
3222 */
3223static void
3224igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3225{
3226 if (error)
3227 return;
3228 *(bus_addr_t *) arg = segs[0].ds_addr;
3229}
3230
3231static int
3232igb_dma_malloc(struct adapter *adapter, bus_size_t size,
3233 struct igb_dma_alloc *dma, int mapflags)
3234{
3235 int error;
3236
3237 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
3238 IGB_DBA_ALIGN, 0, /* alignment, bounds */
3239 BUS_SPACE_MAXADDR, /* lowaddr */
3240 BUS_SPACE_MAXADDR, /* highaddr */
3241 NULL, NULL, /* filter, filterarg */
3242 size, /* maxsize */
3243 1, /* nsegments */
3244 size, /* maxsegsize */
3245 0, /* flags */
3246 NULL, /* lockfunc */
3247 NULL, /* lockarg */
3248 &dma->dma_tag);
3249 if (error) {
3250 device_printf(adapter->dev,
3251 "%s: bus_dma_tag_create failed: %d\n",
3252 __func__, error);
3253 goto fail_0;
3254 }
3255
3256 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
3257 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
3258 if (error) {
3259 device_printf(adapter->dev,
3260 "%s: bus_dmamem_alloc(%ju) failed: %d\n",
3261 __func__, (uintmax_t)size, error);
3262 goto fail_2;
3263 }
3264
3265 dma->dma_paddr = 0;
3266 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3267 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
3268 if (error || dma->dma_paddr == 0) {
3269 device_printf(adapter->dev,
3270 "%s: bus_dmamap_load failed: %d\n",
3271 __func__, error);
3272 goto fail_3;
3273 }
3274
3275 return (0);
3276
3277fail_3:
3278 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3279fail_2:
3280 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3281 bus_dma_tag_destroy(dma->dma_tag);
3282fail_0:
3283 dma->dma_tag = NULL;
3284
3285 return (error);
3286}
3287
3288static void
3289igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma)
3290{
3291 if (dma->dma_tag == NULL)
3292 return;
3293 if (dma->dma_paddr != 0) {
3294 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3295 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3296 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3297 dma->dma_paddr = 0;
3298 }
3299 if (dma->dma_vaddr != NULL) {
3300 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
3301 dma->dma_vaddr = NULL;
3302 }
3303 bus_dma_tag_destroy(dma->dma_tag);
3304 dma->dma_tag = NULL;
3305}
3306
3307
3308/*********************************************************************
3309 *
3310 * Allocate memory for the transmit and receive rings, and then
3311 * the descriptors associated with each, called only once at attach.
3312 *
3313 **********************************************************************/
3314static int
3315igb_allocate_queues(struct adapter *adapter)
3316{
3317 device_t dev = adapter->dev;
3318 struct igb_queue *que = NULL;
3319 struct tx_ring *txr = NULL;
3320 struct rx_ring *rxr = NULL;
3321 int rsize, tsize, error = E1000_SUCCESS;
3322 int txconf = 0, rxconf = 0;
3323
3324 /* First allocate the top level queue structs */
3325 if (!(adapter->queues =
3326 (struct igb_queue *) malloc(sizeof(struct igb_queue) *
3327 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3328 device_printf(dev, "Unable to allocate queue memory\n");
3329 error = ENOMEM;
3330 goto fail;
3331 }
3332
3333 /* Next allocate the TX ring struct memory */
3334 if (!(adapter->tx_rings =
3335 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
3336 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3337 device_printf(dev, "Unable to allocate TX ring memory\n");
3338 error = ENOMEM;
3339 goto tx_fail;
3340 }
3341
3342 /* Now allocate the RX */
3343 if (!(adapter->rx_rings =
3344 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
3345 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3346 device_printf(dev, "Unable to allocate RX ring memory\n");
3347 error = ENOMEM;
3348 goto rx_fail;
3349 }
3350
3351 tsize = roundup2(adapter->num_tx_desc *
3352 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN);
3353 /*
3354 * Now set up the TX queues, txconf is needed to handle the
3355 * possibility that things fail midcourse and we need to
3356 * undo memory gracefully
3357 */
3358 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
3359 /* Set up some basics */
3360 txr = &adapter->tx_rings[i];
3361 txr->adapter = adapter;
3362 txr->me = i;
3363 txr->num_desc = adapter->num_tx_desc;
3364
3365 /* Initialize the TX lock */
3366 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
3367 device_get_nameunit(dev), txr->me);
3368 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
3369
3370 if (igb_dma_malloc(adapter, tsize,
3371 &txr->txdma, BUS_DMA_NOWAIT)) {
3372 device_printf(dev,
3373 "Unable to allocate TX Descriptor memory\n");
3374 error = ENOMEM;
3375 goto err_tx_desc;
3376 }
3377 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr;
3378 bzero((void *)txr->tx_base, tsize);
3379
3380 /* Now allocate transmit buffers for the ring */
3381 if (igb_allocate_transmit_buffers(txr)) {
3382 device_printf(dev,
3383 "Critical Failure setting up transmit buffers\n");
3384 error = ENOMEM;
3385 goto err_tx_desc;
3386 }
3387#ifndef IGB_LEGACY_TX
3388 /* Allocate a buf ring */
3389 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF,
3390 M_WAITOK, &txr->tx_mtx);
3391#endif
3392 }
3393
3394 /*
3395 * Next the RX queues...
3396 */
3397 rsize = roundup2(adapter->num_rx_desc *
3398 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
3399 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
3400 rxr = &adapter->rx_rings[i];
3401 rxr->adapter = adapter;
3402 rxr->me = i;
3403
3404 /* Initialize the RX lock */
3405 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
3406 device_get_nameunit(dev), txr->me);
3407 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
3408
3409 if (igb_dma_malloc(adapter, rsize,
3410 &rxr->rxdma, BUS_DMA_NOWAIT)) {
3411 device_printf(dev,
3412 "Unable to allocate RxDescriptor memory\n");
3413 error = ENOMEM;
3414 goto err_rx_desc;
3415 }
3416 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr;
3417 bzero((void *)rxr->rx_base, rsize);
3418
3419 /* Allocate receive buffers for the ring*/
3420 if (igb_allocate_receive_buffers(rxr)) {
3421 device_printf(dev,
3422 "Critical Failure setting up receive buffers\n");
3423 error = ENOMEM;
3424 goto err_rx_desc;
3425 }
3426 }
3427
3428 /*
3429 ** Finally set up the queue holding structs
3430 */
3431 for (int i = 0; i < adapter->num_queues; i++) {
3432 que = &adapter->queues[i];
3433 que->adapter = adapter;
3434 que->txr = &adapter->tx_rings[i];
3435 que->rxr = &adapter->rx_rings[i];
3436 }
3437
3438 return (0);
3439
3440err_rx_desc:
3441 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
3442 igb_dma_free(adapter, &rxr->rxdma);
3443err_tx_desc:
3444 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
3445 igb_dma_free(adapter, &txr->txdma);
3446 free(adapter->rx_rings, M_DEVBUF);
3447rx_fail:
3448#ifndef IGB_LEGACY_TX
3449 buf_ring_free(txr->br, M_DEVBUF);
3450#endif
3451 free(adapter->tx_rings, M_DEVBUF);
3452tx_fail:
3453 free(adapter->queues, M_DEVBUF);
3454fail:
3455 return (error);
3456}
3457
3458/*********************************************************************
3459 *
3460 * Allocate memory for tx_buffer structures. The tx_buffer stores all
3461 * the information needed to transmit a packet on the wire. This is
3462 * called only once at attach, setup is done every reset.
3463 *
3464 **********************************************************************/
3465static int
3466igb_allocate_transmit_buffers(struct tx_ring *txr)
3467{
3468 struct adapter *adapter = txr->adapter;
3469 device_t dev = adapter->dev;
3470 struct igb_tx_buf *txbuf;
3471 int error, i;
3472
3473 /*
3474 * Setup DMA descriptor areas.
3475 */
3476 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
3477 1, 0, /* alignment, bounds */
3478 BUS_SPACE_MAXADDR, /* lowaddr */
3479 BUS_SPACE_MAXADDR, /* highaddr */
3480 NULL, NULL, /* filter, filterarg */
3481 IGB_TSO_SIZE, /* maxsize */
3482 IGB_MAX_SCATTER, /* nsegments */
3483 PAGE_SIZE, /* maxsegsize */
3484 0, /* flags */
3485 NULL, /* lockfunc */
3486 NULL, /* lockfuncarg */
3487 &txr->txtag))) {
3488 device_printf(dev,"Unable to allocate TX DMA tag\n");
3489 goto fail;
3490 }
3491
3492 if (!(txr->tx_buffers =
3493 (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) *
3494 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
3495 device_printf(dev, "Unable to allocate tx_buffer memory\n");
3496 error = ENOMEM;
3497 goto fail;
3498 }
3499
3500 /* Create the descriptor buffer dma maps */
3501 txbuf = txr->tx_buffers;
3502 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3503 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
3504 if (error != 0) {
3505 device_printf(dev, "Unable to create TX DMA map\n");
3506 goto fail;
3507 }
3508 }
3509
3510 return 0;
3511fail:
3512 /* We free all, it handles case where we are in the middle */
3513 igb_free_transmit_structures(adapter);
3514 return (error);
3515}
3516
3517/*********************************************************************
3518 *
3519 * Initialize a transmit ring.
3520 *
3521 **********************************************************************/
3522static void
3523igb_setup_transmit_ring(struct tx_ring *txr)
3524{
3525 struct adapter *adapter = txr->adapter;
3526 struct igb_tx_buf *txbuf;
3527 int i;
3528#ifdef DEV_NETMAP
3529 struct netmap_adapter *na = NA(adapter->ifp);
3530 struct netmap_slot *slot;
3531#endif /* DEV_NETMAP */
3532
3533 /* Clear the old descriptor contents */
3534 IGB_TX_LOCK(txr);
3535#ifdef DEV_NETMAP
3536 slot = netmap_reset(na, NR_TX, txr->me, 0);
3537#endif /* DEV_NETMAP */
3538 bzero((void *)txr->tx_base,
3539 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc);
3540 /* Reset indices */
3541 txr->next_avail_desc = 0;
3542 txr->next_to_clean = 0;
3543
3544 /* Free any existing tx buffers. */
3545 txbuf = txr->tx_buffers;
3546 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
3547 if (txbuf->m_head != NULL) {
3548 bus_dmamap_sync(txr->txtag, txbuf->map,
3549 BUS_DMASYNC_POSTWRITE);
3550 bus_dmamap_unload(txr->txtag, txbuf->map);
3551 m_freem(txbuf->m_head);
3552 txbuf->m_head = NULL;
3553 }
3554#ifdef DEV_NETMAP
3555 if (slot) {
3556 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
3557 /* no need to set the address */
3558 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
3559 }
3560#endif /* DEV_NETMAP */
3561 /* clear the watch index */
3562 txbuf->eop = NULL;
3563 }
3564
3565 /* Set number of descriptors available */
3566 txr->tx_avail = adapter->num_tx_desc;
3567
3568 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
3569 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3570 IGB_TX_UNLOCK(txr);
3571}
3572
3573/*********************************************************************
3574 *
3575 * Initialize all transmit rings.
3576 *
3577 **********************************************************************/
3578static void
3579igb_setup_transmit_structures(struct adapter *adapter)
3580{
3581 struct tx_ring *txr = adapter->tx_rings;
3582
3583 for (int i = 0; i < adapter->num_queues; i++, txr++)
3584 igb_setup_transmit_ring(txr);
3585
3586 return;
3587}
3588
3589/*********************************************************************
3590 *
3591 * Enable transmit unit.
3592 *
3593 **********************************************************************/
3594static void
3595igb_initialize_transmit_units(struct adapter *adapter)
3596{
3597 struct tx_ring *txr = adapter->tx_rings;
3598 struct e1000_hw *hw = &adapter->hw;
3599 u32 tctl, txdctl;
3600
3601 INIT_DEBUGOUT("igb_initialize_transmit_units: begin");
3602 tctl = txdctl = 0;
3603
3604 /* Setup the Tx Descriptor Rings */
3605 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3606 u64 bus_addr = txr->txdma.dma_paddr;
3607
3608 E1000_WRITE_REG(hw, E1000_TDLEN(i),
3609 adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
3610 E1000_WRITE_REG(hw, E1000_TDBAH(i),
3611 (uint32_t)(bus_addr >> 32));
3612 E1000_WRITE_REG(hw, E1000_TDBAL(i),
3613 (uint32_t)bus_addr);
3614
3615 /* Setup the HW Tx Head and Tail descriptor pointers */
3616 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
3617 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
3618
3619 HW_DEBUGOUT2("Base = %x, Length = %x\n",
3620 E1000_READ_REG(hw, E1000_TDBAL(i)),
3621 E1000_READ_REG(hw, E1000_TDLEN(i)));
3622
3623 txr->queue_status = IGB_QUEUE_IDLE;
3624
3625 txdctl |= IGB_TX_PTHRESH;
3626 txdctl |= IGB_TX_HTHRESH << 8;
3627 txdctl |= IGB_TX_WTHRESH << 16;
3628 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
3629 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
3630 }
3631
3632 if (adapter->vf_ifp)
3633 return;
3634
3635 e1000_config_collision_dist(hw);
3636
3637 /* Program the Transmit Control Register */
3638 tctl = E1000_READ_REG(hw, E1000_TCTL);
3639 tctl &= ~E1000_TCTL_CT;
3640 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
3641 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
3642
3643 /* This write will effectively turn on the transmit unit. */
3644 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
3645}
3646
3647/*********************************************************************
3648 *
3649 * Free all transmit rings.
3650 *
3651 **********************************************************************/
3652static void
3653igb_free_transmit_structures(struct adapter *adapter)
3654{
3655 struct tx_ring *txr = adapter->tx_rings;
3656
3657 for (int i = 0; i < adapter->num_queues; i++, txr++) {
3658 IGB_TX_LOCK(txr);
3659 igb_free_transmit_buffers(txr);
3660 igb_dma_free(adapter, &txr->txdma);
3661 IGB_TX_UNLOCK(txr);
3662 IGB_TX_LOCK_DESTROY(txr);
3663 }
3664 free(adapter->tx_rings, M_DEVBUF);
3665}
3666
3667/*********************************************************************
3668 *
3669 * Free transmit ring related data structures.
3670 *
3671 **********************************************************************/
3672static void
3673igb_free_transmit_buffers(struct tx_ring *txr)
3674{
3675 struct adapter *adapter = txr->adapter;
3676 struct igb_tx_buf *tx_buffer;
3677 int i;
3678
3679 INIT_DEBUGOUT("free_transmit_ring: begin");
3680
3681 if (txr->tx_buffers == NULL)
3682 return;
3683
3684 tx_buffer = txr->tx_buffers;
3685 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
3686 if (tx_buffer->m_head != NULL) {
3687 bus_dmamap_sync(txr->txtag, tx_buffer->map,
3688 BUS_DMASYNC_POSTWRITE);
3689 bus_dmamap_unload(txr->txtag,
3690 tx_buffer->map);
3691 m_freem(tx_buffer->m_head);
3692 tx_buffer->m_head = NULL;
3693 if (tx_buffer->map != NULL) {
3694 bus_dmamap_destroy(txr->txtag,
3695 tx_buffer->map);
3696 tx_buffer->map = NULL;
3697 }
3698 } else if (tx_buffer->map != NULL) {
3699 bus_dmamap_unload(txr->txtag,
3700 tx_buffer->map);
3701 bus_dmamap_destroy(txr->txtag,
3702 tx_buffer->map);
3703 tx_buffer->map = NULL;
3704 }
3705 }
3706#ifndef IGB_LEGACY_TX
3707 if (txr->br != NULL)
3708 buf_ring_free(txr->br, M_DEVBUF);
3709#endif
3710 if (txr->tx_buffers != NULL) {
3711 free(txr->tx_buffers, M_DEVBUF);
3712 txr->tx_buffers = NULL;
3713 }
3714 if (txr->txtag != NULL) {
3715 bus_dma_tag_destroy(txr->txtag);
3716 txr->txtag = NULL;
3717 }
3718 return;
3719}
3720
3721/**********************************************************************
3722 *
3723 * Setup work for hardware segmentation offload (TSO) on
3724 * adapters using advanced tx descriptors
3725 *
3726 **********************************************************************/
3727static int
3728igb_tso_setup(struct tx_ring *txr, struct mbuf *mp,
3729 u32 *cmd_type_len, u32 *olinfo_status)
3730{
3731 struct adapter *adapter = txr->adapter;
3732 struct e1000_adv_tx_context_desc *TXD;
3733 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
3734 u32 mss_l4len_idx = 0, paylen;
3735 u16 vtag = 0, eh_type;
3736 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
3737 struct ether_vlan_header *eh;
3738#ifdef INET6
3739 struct ip6_hdr *ip6;
3740#endif
3741#ifdef INET
3742 struct ip *ip;
3743#endif
3744 struct tcphdr *th;
3745
3746
3747 /*
3748 * Determine where frame payload starts.
3749 * Jump over vlan headers if already present
3750 */
3751 eh = mtod(mp, struct ether_vlan_header *);
3752 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3753 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3754 eh_type = eh->evl_proto;
3755 } else {
3756 ehdrlen = ETHER_HDR_LEN;
3757 eh_type = eh->evl_encap_proto;
3758 }
3759
3760 switch (ntohs(eh_type)) {
3761#ifdef INET6
3762 case ETHERTYPE_IPV6:
3763 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3764 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
3765 if (ip6->ip6_nxt != IPPROTO_TCP)
3766 return (ENXIO);
3767 ip_hlen = sizeof(struct ip6_hdr);
3768 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3769 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
3770 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
3771 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3772 break;
3773#endif
3774#ifdef INET
3775 case ETHERTYPE_IP:
3776 ip = (struct ip *)(mp->m_data + ehdrlen);
3777 if (ip->ip_p != IPPROTO_TCP)
3778 return (ENXIO);
3779 ip->ip_sum = 0;
3780 ip_hlen = ip->ip_hl << 2;
3781 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
3782 th->th_sum = in_pseudo(ip->ip_src.s_addr,
3783 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
3784 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3785 /* Tell transmit desc to also do IPv4 checksum. */
3786 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3787 break;
3788#endif
3789 default:
3790 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
3791 __func__, ntohs(eh_type));
3792 break;
3793 }
3794
3795 ctxd = txr->next_avail_desc;
3796 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3797
3798 tcp_hlen = th->th_off << 2;
3799
3800 /* This is used in the transmit desc in encap */
3801 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
3802
3803 /* VLAN MACLEN IPLEN */
3804 if (mp->m_flags & M_VLANTAG) {
3805 vtag = htole16(mp->m_pkthdr.ether_vtag);
3806 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3807 }
3808
3809 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3810 vlan_macip_lens |= ip_hlen;
3811 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3812
3813 /* ADV DTYPE TUCMD */
3814 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3815 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3816 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3817
3818 /* MSS L4LEN IDX */
3819 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT);
3820 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT);
3821 /* 82575 needs the queue index added */
3822 if (adapter->hw.mac.type == e1000_82575)
3823 mss_l4len_idx |= txr->me << 4;
3824 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3825
3826 TXD->seqnum_seed = htole32(0);
3827
3828 if (++ctxd == txr->num_desc)
3829 ctxd = 0;
3830
3831 txr->tx_avail--;
3832 txr->next_avail_desc = ctxd;
3833 *cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3834 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3835 *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT;
3836 ++txr->tso_tx;
3837 return (0);
3838}
3839
3840/*********************************************************************
3841 *
3842 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
3843 *
3844 **********************************************************************/
3845
3846static int
3847igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
3848 u32 *cmd_type_len, u32 *olinfo_status)
3849{
3850 struct e1000_adv_tx_context_desc *TXD;
3851 struct adapter *adapter = txr->adapter;
3852 struct ether_vlan_header *eh;
3853 struct ip *ip;
3854 struct ip6_hdr *ip6;
3855 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0;
3856 int ehdrlen, ip_hlen = 0;
3857 u16 etype;
3858 u8 ipproto = 0;
3859 int offload = TRUE;
3860 int ctxd = txr->next_avail_desc;
3861 u16 vtag = 0;
3862
3863 /* First check if TSO is to be used */
3864 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
3865 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status));
3866
3867 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
3868 offload = FALSE;
3869
3870 /* Indicate the whole packet as payload when not doing TSO */
3871 *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT;
3872
3873 /* Now ready a context descriptor */
3874 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd];
3875
3876 /*
3877 ** In advanced descriptors the vlan tag must
3878 ** be placed into the context descriptor. Hence
3879 ** we need to make one even if not doing offloads.
3880 */
3881 if (mp->m_flags & M_VLANTAG) {
3882 vtag = htole16(mp->m_pkthdr.ether_vtag);
3883 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT);
3884 } else if (offload == FALSE) /* ... no offload to do */
3885 return (0);
3886
3887 /*
3888 * Determine where frame payload starts.
3889 * Jump over vlan headers if already present,
3890 * helpful for QinQ too.
3891 */
3892 eh = mtod(mp, struct ether_vlan_header *);
3893 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
3894 etype = ntohs(eh->evl_proto);
3895 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
3896 } else {
3897 etype = ntohs(eh->evl_encap_proto);
3898 ehdrlen = ETHER_HDR_LEN;
3899 }
3900
3901 /* Set the ether header length */
3902 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT;
3903
3904 switch (etype) {
3905 case ETHERTYPE_IP:
3906 ip = (struct ip *)(mp->m_data + ehdrlen);
3907 ip_hlen = ip->ip_hl << 2;
3908 ipproto = ip->ip_p;
3909 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4;
3910 break;
3911 case ETHERTYPE_IPV6:
3912 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3913 ip_hlen = sizeof(struct ip6_hdr);
3914 /* XXX-BZ this will go badly in case of ext hdrs. */
3915 ipproto = ip6->ip6_nxt;
3916 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6;
3917 break;
3918 default:
3919 offload = FALSE;
3920 break;
3921 }
3922
3923 vlan_macip_lens |= ip_hlen;
3924 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
3925
3926 switch (ipproto) {
3927 case IPPROTO_TCP:
3928 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
3929 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP;
3930 break;
3931 case IPPROTO_UDP:
3932 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
3933 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP;
3934 break;
3935
3936#if __FreeBSD_version >= 800000
3937 case IPPROTO_SCTP:
3938 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
3939 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3940 break;
3941#endif
3942 default:
3943 offload = FALSE;
3944 break;
3945 }
3946
3947 if (offload) /* For the TX descriptor setup */
3948 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3949
3950 /* 82575 needs the queue index added */
3951 if (adapter->hw.mac.type == e1000_82575)
3952 mss_l4len_idx = txr->me << 4;
3953
3954 /* Now copy bits into descriptor */
3955 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
3956 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
3957 TXD->seqnum_seed = htole32(0);
3958 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
3959
3960 /* We've consumed the first desc, adjust counters */
3961 if (++ctxd == txr->num_desc)
3962 ctxd = 0;
3963 txr->next_avail_desc = ctxd;
3964 --txr->tx_avail;
3965
3966 return (0);
3967}
3968
3969/**********************************************************************
3970 *
3971 * Examine each tx_buffer in the used queue. If the hardware is done
3972 * processing the packet then free associated resources. The
3973 * tx_buffer is put back on the free queue.
3974 *
3975 * TRUE return means there's work in the ring to clean, FALSE its empty.
3976 **********************************************************************/
3977static bool
3978igb_txeof(struct tx_ring *txr)
3979{
3980 struct adapter *adapter = txr->adapter;
3981#ifdef DEV_NETMAP
3982 struct ifnet *ifp = adapter->ifp;
3983#endif /* DEV_NETMAP */
3984 u32 work, processed = 0;
3985 int limit = adapter->tx_process_limit;
3986 struct igb_tx_buf *buf;
3987 union e1000_adv_tx_desc *txd;
3988
3989 mtx_assert(&txr->tx_mtx, MA_OWNED);
3990
3991#ifdef DEV_NETMAP
3992 if (netmap_tx_irq(ifp, txr->me))
3993 return (FALSE);
3994#endif /* DEV_NETMAP */
3995
3996 if (txr->tx_avail == txr->num_desc) {
3997 txr->queue_status = IGB_QUEUE_IDLE;
3998 return FALSE;
3999 }
4000
4001 /* Get work starting point */
4002 work = txr->next_to_clean;
4003 buf = &txr->tx_buffers[work];
4004 txd = &txr->tx_base[work];
4005 work -= txr->num_desc; /* The distance to ring end */
4006 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4007 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4008 do {
4009 union e1000_adv_tx_desc *eop = buf->eop;
4010 if (eop == NULL) /* No work */
4011 break;
4012
4013 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0)
4014 break; /* I/O not complete */
4015
4016 if (buf->m_head) {
4017 txr->bytes +=
4018 buf->m_head->m_pkthdr.len;
4019 bus_dmamap_sync(txr->txtag,
4020 buf->map,
4021 BUS_DMASYNC_POSTWRITE);
4022 bus_dmamap_unload(txr->txtag,
4023 buf->map);
4024 m_freem(buf->m_head);
4025 buf->m_head = NULL;
4026 }
4027 buf->eop = NULL;
4028 ++txr->tx_avail;
4029
4030 /* We clean the range if multi segment */
4031 while (txd != eop) {
4032 ++txd;
4033 ++buf;
4034 ++work;
4035 /* wrap the ring? */
4036 if (__predict_false(!work)) {
4037 work -= txr->num_desc;
4038 buf = txr->tx_buffers;
4039 txd = txr->tx_base;
4040 }
4041 if (buf->m_head) {
4042 txr->bytes +=
4043 buf->m_head->m_pkthdr.len;
4044 bus_dmamap_sync(txr->txtag,
4045 buf->map,
4046 BUS_DMASYNC_POSTWRITE);
4047 bus_dmamap_unload(txr->txtag,
4048 buf->map);
4049 m_freem(buf->m_head);
4050 buf->m_head = NULL;
4051 }
4052 ++txr->tx_avail;
4053 buf->eop = NULL;
4054
4055 }
4056 ++txr->packets;
4057 ++processed;
4058 txr->watchdog_time = ticks;
4059
4060 /* Try the next packet */
4061 ++txd;
4062 ++buf;
4063 ++work;
4064 /* reset with a wrap */
4065 if (__predict_false(!work)) {
4066 work -= txr->num_desc;
4067 buf = txr->tx_buffers;
4068 txd = txr->tx_base;
4069 }
4070 prefetch(txd);
4071 } while (__predict_true(--limit));
4072
4073 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
4074 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4075
4076 work += txr->num_desc;
4077 txr->next_to_clean = work;
4078
4079 /*
4080 ** Watchdog calculation, we know there's
4081 ** work outstanding or the first return
4082 ** would have been taken, so none processed
4083 ** for too long indicates a hang.
4084 */
4085 if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG))
4086 txr->queue_status |= IGB_QUEUE_HUNG;
4087
4088 if (txr->tx_avail >= IGB_QUEUE_THRESHOLD)
4089 txr->queue_status &= ~IGB_QUEUE_DEPLETED;
4090
4091 if (txr->tx_avail == txr->num_desc) {
4092 txr->queue_status = IGB_QUEUE_IDLE;
4093 return (FALSE);
4094 }
4095
4096 return (TRUE);
4097}
4098
4099/*********************************************************************
4100 *
4101 * Refresh mbuf buffers for RX descriptor rings
4102 * - now keeps its own state so discards due to resource
4103 * exhaustion are unnecessary, if an mbuf cannot be obtained
4104 * it just returns, keeping its placeholder, thus it can simply
4105 * be recalled to try again.
4106 *
4107 **********************************************************************/
4108static void
4109igb_refresh_mbufs(struct rx_ring *rxr, int limit)
4110{
4111 struct adapter *adapter = rxr->adapter;
4112 bus_dma_segment_t hseg[1];
4113 bus_dma_segment_t pseg[1];
4114 struct igb_rx_buf *rxbuf;
4115 struct mbuf *mh, *mp;
4116 int i, j, nsegs, error;
4117 bool refreshed = FALSE;
4118
4119 i = j = rxr->next_to_refresh;
4120 /*
4121 ** Get one descriptor beyond
4122 ** our work mark to control
4123 ** the loop.
4124 */
4125 if (++j == adapter->num_rx_desc)
4126 j = 0;
4127
4128 while (j != limit) {
4129 rxbuf = &rxr->rx_buffers[i];
4130 /* No hdr mbuf used with header split off */
4131 if (rxr->hdr_split == FALSE)
4132 goto no_split;
4133 if (rxbuf->m_head == NULL) {
4134 mh = m_gethdr(M_NOWAIT, MT_DATA);
4135 if (mh == NULL)
4136 goto update;
4137 } else
4138 mh = rxbuf->m_head;
4139
4140 mh->m_pkthdr.len = mh->m_len = MHLEN;
4141 mh->m_len = MHLEN;
4142 mh->m_flags |= M_PKTHDR;
4143 /* Get the memory mapping */
4144 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4145 rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
4146 if (error != 0) {
4147 printf("Refresh mbufs: hdr dmamap load"
4148 " failure - %d\n", error);
4149 m_free(mh);
4150 rxbuf->m_head = NULL;
4151 goto update;
4152 }
4153 rxbuf->m_head = mh;
4154 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4155 BUS_DMASYNC_PREREAD);
4156 rxr->rx_base[i].read.hdr_addr =
4157 htole64(hseg[0].ds_addr);
4158no_split:
4159 if (rxbuf->m_pack == NULL) {
4160 mp = m_getjcl(M_NOWAIT, MT_DATA,
4161 M_PKTHDR, adapter->rx_mbuf_sz);
4162 if (mp == NULL)
4163 goto update;
4164 } else
4165 mp = rxbuf->m_pack;
4166
4167 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4168 /* Get the memory mapping */
4169 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4170 rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
4171 if (error != 0) {
4172 printf("Refresh mbufs: payload dmamap load"
4173 " failure - %d\n", error);
4174 m_free(mp);
4175 rxbuf->m_pack = NULL;
4176 goto update;
4177 }
4178 rxbuf->m_pack = mp;
4179 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4180 BUS_DMASYNC_PREREAD);
4181 rxr->rx_base[i].read.pkt_addr =
4182 htole64(pseg[0].ds_addr);
4183 refreshed = TRUE; /* I feel wefreshed :) */
4184
4185 i = j; /* our next is precalculated */
4186 rxr->next_to_refresh = i;
4187 if (++j == adapter->num_rx_desc)
4188 j = 0;
4189 }
4190update:
4191 if (refreshed) /* update tail */
4192 E1000_WRITE_REG(&adapter->hw,
4193 E1000_RDT(rxr->me), rxr->next_to_refresh);
4194 return;
4195}
4196
4197
4198/*********************************************************************
4199 *
4200 * Allocate memory for rx_buffer structures. Since we use one
4201 * rx_buffer per received packet, the maximum number of rx_buffer's
4202 * that we'll need is equal to the number of receive descriptors
4203 * that we've allocated.
4204 *
4205 **********************************************************************/
4206static int
4207igb_allocate_receive_buffers(struct rx_ring *rxr)
4208{
4209 struct adapter *adapter = rxr->adapter;
4210 device_t dev = adapter->dev;
4211 struct igb_rx_buf *rxbuf;
4212 int i, bsize, error;
4213
4214 bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc;
4215 if (!(rxr->rx_buffers =
4216 (struct igb_rx_buf *) malloc(bsize,
4217 M_DEVBUF, M_NOWAIT | M_ZERO))) {
4218 device_printf(dev, "Unable to allocate rx_buffer memory\n");
4219 error = ENOMEM;
4220 goto fail;
4221 }
4222
4223 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4224 1, 0, /* alignment, bounds */
4225 BUS_SPACE_MAXADDR, /* lowaddr */
4226 BUS_SPACE_MAXADDR, /* highaddr */
4227 NULL, NULL, /* filter, filterarg */
4228 MSIZE, /* maxsize */
4229 1, /* nsegments */
4230 MSIZE, /* maxsegsize */
4231 0, /* flags */
4232 NULL, /* lockfunc */
4233 NULL, /* lockfuncarg */
4234 &rxr->htag))) {
4235 device_printf(dev, "Unable to create RX DMA tag\n");
4236 goto fail;
4237 }
4238
4239 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev),
4240 1, 0, /* alignment, bounds */
4241 BUS_SPACE_MAXADDR, /* lowaddr */
4242 BUS_SPACE_MAXADDR, /* highaddr */
4243 NULL, NULL, /* filter, filterarg */
4244 MJUM9BYTES, /* maxsize */
4245 1, /* nsegments */
4246 MJUM9BYTES, /* maxsegsize */
4247 0, /* flags */
4248 NULL, /* lockfunc */
4249 NULL, /* lockfuncarg */
4250 &rxr->ptag))) {
4251 device_printf(dev, "Unable to create RX payload DMA tag\n");
4252 goto fail;
4253 }
4254
4255 for (i = 0; i < adapter->num_rx_desc; i++) {
4256 rxbuf = &rxr->rx_buffers[i];
4257 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap);
4258 if (error) {
4259 device_printf(dev,
4260 "Unable to create RX head DMA maps\n");
4261 goto fail;
4262 }
4263 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
4264 if (error) {
4265 device_printf(dev,
4266 "Unable to create RX packet DMA maps\n");
4267 goto fail;
4268 }
4269 }
4270
4271 return (0);
4272
4273fail:
4274 /* Frees all, but can handle partial completion */
4275 igb_free_receive_structures(adapter);
4276 return (error);
4277}
4278
4279
4280static void
4281igb_free_receive_ring(struct rx_ring *rxr)
4282{
4283 struct adapter *adapter = rxr->adapter;
4284 struct igb_rx_buf *rxbuf;
4285
4286
4287 for (int i = 0; i < adapter->num_rx_desc; i++) {
4288 rxbuf = &rxr->rx_buffers[i];
4289 if (rxbuf->m_head != NULL) {
4290 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4291 BUS_DMASYNC_POSTREAD);
4292 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4293 rxbuf->m_head->m_flags |= M_PKTHDR;
4294 m_freem(rxbuf->m_head);
4295 }
4296 if (rxbuf->m_pack != NULL) {
4297 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4298 BUS_DMASYNC_POSTREAD);
4299 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4300 rxbuf->m_pack->m_flags |= M_PKTHDR;
4301 m_freem(rxbuf->m_pack);
4302 }
4303 rxbuf->m_head = NULL;
4304 rxbuf->m_pack = NULL;
4305 }
4306}
4307
4308
4309/*********************************************************************
4310 *
4311 * Initialize a receive ring and its buffers.
4312 *
4313 **********************************************************************/
4314static int
4315igb_setup_receive_ring(struct rx_ring *rxr)
4316{
4317 struct adapter *adapter;
4318 struct ifnet *ifp;
4319 device_t dev;
4320 struct igb_rx_buf *rxbuf;
4321 bus_dma_segment_t pseg[1], hseg[1];
4322 struct lro_ctrl *lro = &rxr->lro;
4323 int rsize, nsegs, error = 0;
4324#ifdef DEV_NETMAP
4325 struct netmap_adapter *na = NA(rxr->adapter->ifp);
4326 struct netmap_slot *slot;
4327#endif /* DEV_NETMAP */
4328
4329 adapter = rxr->adapter;
4330 dev = adapter->dev;
4331 ifp = adapter->ifp;
4332
4333 /* Clear the ring contents */
4334 IGB_RX_LOCK(rxr);
4335#ifdef DEV_NETMAP
4336 slot = netmap_reset(na, NR_RX, rxr->me, 0);
4337#endif /* DEV_NETMAP */
4338 rsize = roundup2(adapter->num_rx_desc *
4339 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN);
4340 bzero((void *)rxr->rx_base, rsize);
4341
4342 /*
4343 ** Free current RX buffer structures and their mbufs
4344 */
4345 igb_free_receive_ring(rxr);
4346
4347 /* Configure for header split? */
4348 if (igb_header_split)
4349 rxr->hdr_split = TRUE;
4350
4351 /* Now replenish the ring mbufs */
4352 for (int j = 0; j < adapter->num_rx_desc; ++j) {
4353 struct mbuf *mh, *mp;
4354
4355 rxbuf = &rxr->rx_buffers[j];
4356#ifdef DEV_NETMAP
4357 if (slot) {
4358 /* slot sj is mapped to the j-th NIC-ring entry */
4359 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
4360 uint64_t paddr;
4361 void *addr;
4362
4363 addr = PNMB(na, slot + sj, &paddr);
4364 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
4365 /* Update descriptor */
4366 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
4367 continue;
4368 }
4369#endif /* DEV_NETMAP */
4370 if (rxr->hdr_split == FALSE)
4371 goto skip_head;
4372
4373 /* First the header */
4374 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
4375 if (rxbuf->m_head == NULL) {
4376 error = ENOBUFS;
4377 goto fail;
4378 }
4379 m_adj(rxbuf->m_head, ETHER_ALIGN);
4380 mh = rxbuf->m_head;
4381 mh->m_len = mh->m_pkthdr.len = MHLEN;
4382 mh->m_flags |= M_PKTHDR;
4383 /* Get the memory mapping */
4384 error = bus_dmamap_load_mbuf_sg(rxr->htag,
4385 rxbuf->hmap, rxbuf->m_head, hseg,
4386 &nsegs, BUS_DMA_NOWAIT);
4387 if (error != 0) /* Nothing elegant to do here */
4388 goto fail;
4389 bus_dmamap_sync(rxr->htag,
4390 rxbuf->hmap, BUS_DMASYNC_PREREAD);
4391 /* Update descriptor */
4392 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
4393
4394skip_head:
4395 /* Now the payload cluster */
4396 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
4397 M_PKTHDR, adapter->rx_mbuf_sz);
4398 if (rxbuf->m_pack == NULL) {
4399 error = ENOBUFS;
4400 goto fail;
4401 }
4402 mp = rxbuf->m_pack;
4403 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz;
4404 /* Get the memory mapping */
4405 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
4406 rxbuf->pmap, mp, pseg,
4407 &nsegs, BUS_DMA_NOWAIT);
4408 if (error != 0)
4409 goto fail;
4410 bus_dmamap_sync(rxr->ptag,
4411 rxbuf->pmap, BUS_DMASYNC_PREREAD);
4412 /* Update descriptor */
4413 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
4414 }
4415
4416 /* Setup our descriptor indices */
4417 rxr->next_to_check = 0;
4418 rxr->next_to_refresh = adapter->num_rx_desc - 1;
4419 rxr->lro_enabled = FALSE;
4420 rxr->rx_split_packets = 0;
4421 rxr->rx_bytes = 0;
4422
4423 rxr->fmp = NULL;
4424 rxr->lmp = NULL;
4425
4426 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4427 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4428
4429 /*
4430 ** Now set up the LRO interface, we
4431 ** also only do head split when LRO
4432 ** is enabled, since so often they
4433 ** are undesireable in similar setups.
4434 */
4435 if (ifp->if_capenable & IFCAP_LRO) {
4436 error = tcp_lro_init(lro);
4437 if (error) {
4438 device_printf(dev, "LRO Initialization failed!\n");
4439 goto fail;
4440 }
4441 INIT_DEBUGOUT("RX LRO Initialized\n");
4442 rxr->lro_enabled = TRUE;
4443 lro->ifp = adapter->ifp;
4444 }
4445
4446 IGB_RX_UNLOCK(rxr);
4447 return (0);
4448
4449fail:
4450 igb_free_receive_ring(rxr);
4451 IGB_RX_UNLOCK(rxr);
4452 return (error);
4453}
4454
4455
4456/*********************************************************************
4457 *
4458 * Initialize all receive rings.
4459 *
4460 **********************************************************************/
4461static int
4462igb_setup_receive_structures(struct adapter *adapter)
4463{
4464 struct rx_ring *rxr = adapter->rx_rings;
4465 int i;
4466
4467 for (i = 0; i < adapter->num_queues; i++, rxr++)
4468 if (igb_setup_receive_ring(rxr))
4469 goto fail;
4470
4471 return (0);
4472fail:
4473 /*
4474 * Free RX buffers allocated so far, we will only handle
4475 * the rings that completed, the failing case will have
4476 * cleaned up for itself. 'i' is the endpoint.
4477 */
4478 for (int j = 0; j < i; ++j) {
4479 rxr = &adapter->rx_rings[j];
4480 IGB_RX_LOCK(rxr);
4481 igb_free_receive_ring(rxr);
4482 IGB_RX_UNLOCK(rxr);
4483 }
4484
4485 return (ENOBUFS);
4486}
4487
4488/*
4489 * Initialise the RSS mapping for NICs that support multiple transmit/
4490 * receive rings.
4491 */
4492static void
4493igb_initialise_rss_mapping(struct adapter *adapter)
4494{
4495 struct e1000_hw *hw = &adapter->hw;
4496 int i;
4497 int queue_id;
4498 u32 reta;
4499 u32 rss_key[10], mrqc, shift = 0;
4500
4501 /* XXX? */
4502 if (adapter->hw.mac.type == e1000_82575)
4503 shift = 6;
4504
4505 /*
4506 * The redirection table controls which destination
4507 * queue each bucket redirects traffic to.
4508 * Each DWORD represents four queues, with the LSB
4509 * being the first queue in the DWORD.
4510 *
4511 * This just allocates buckets to queues using round-robin
4512 * allocation.
4513 *
4514 * NOTE: It Just Happens to line up with the default
4515 * RSS allocation method.
4516 */
4517
4518 /* Warning FM follows */
4519 reta = 0;
4520 for (i = 0; i < 128; i++) {
4521#ifdef RSS
4522 queue_id = rss_get_indirection_to_bucket(i);
4523 /*
4524 * If we have more queues than buckets, we'll
4525 * end up mapping buckets to a subset of the
4526 * queues.
4527 *
4528 * If we have more buckets than queues, we'll
4529 * end up instead assigning multiple buckets
4530 * to queues.
4531 *
4532 * Both are suboptimal, but we need to handle
4533 * the case so we don't go out of bounds
4534 * indexing arrays and such.
4535 */
4536 queue_id = queue_id % adapter->num_queues;
4537#else
4538 queue_id = (i % adapter->num_queues);
4539#endif
4540 /* Adjust if required */
4541 queue_id = queue_id << shift;
4542
4543 /*
4544 * The low 8 bits are for hash value (n+0);
4545 * The next 8 bits are for hash value (n+1), etc.
4546 */
4547 reta = reta >> 8;
4548 reta = reta | ( ((uint32_t) queue_id) << 24);
4549 if ((i & 3) == 3) {
4550 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta);
4551 reta = 0;
4552 }
4553 }
4554
4555 /* Now fill in hash table */
4556
4557 /*
4558 * MRQC: Multiple Receive Queues Command
4559 * Set queuing to RSS control, number depends on the device.
4560 */
4561 mrqc = E1000_MRQC_ENABLE_RSS_8Q;
4562
4563#ifdef RSS
4564 /* XXX ew typecasting */
4565 rss_getkey((uint8_t *) &rss_key);
4566#else
4567 arc4rand(&rss_key, sizeof(rss_key), 0);
4568#endif
4569 for (i = 0; i < 10; i++)
4570 E1000_WRITE_REG_ARRAY(hw,
4571 E1000_RSSRK(0), i, rss_key[i]);
4572
4573 /*
4574 * Configure the RSS fields to hash upon.
4575 */
4576 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
4577 E1000_MRQC_RSS_FIELD_IPV4_TCP);
4578 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
4579 E1000_MRQC_RSS_FIELD_IPV6_TCP);
4580 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP |
4581 E1000_MRQC_RSS_FIELD_IPV6_UDP);
4582 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
4583 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
4584
4585 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
4586}
4587
4588/*********************************************************************
4589 *
4590 * Enable receive unit.
4591 *
4592 **********************************************************************/
4593static void
4594igb_initialize_receive_units(struct adapter *adapter)
4595{
4596 struct rx_ring *rxr = adapter->rx_rings;
4597 struct ifnet *ifp = adapter->ifp;
4598 struct e1000_hw *hw = &adapter->hw;
4599 u32 rctl, rxcsum, psize, srrctl = 0;
4600
4601 INIT_DEBUGOUT("igb_initialize_receive_unit: begin");
4602
4603 /*
4604 * Make sure receives are disabled while setting
4605 * up the descriptor ring
4606 */
4607 rctl = E1000_READ_REG(hw, E1000_RCTL);
4608 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
4609
4610 /*
4611 ** Set up for header split
4612 */
4613 if (igb_header_split) {
4614 /* Use a standard mbuf for the header */
4615 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
4616 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
4617 } else
4618 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
4619
4620 /*
4621 ** Set up for jumbo frames
4622 */
4623 if (ifp->if_mtu > ETHERMTU) {
4624 rctl |= E1000_RCTL_LPE;
4625 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) {
4626 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4627 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX;
4628 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) {
4629 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4630 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX;
4631 }
4632 /* Set maximum packet len */
4633 psize = adapter->max_frame_size;
4634 /* are we on a vlan? */
4635 if (adapter->ifp->if_vlantrunk != NULL)
4636 psize += VLAN_TAG_SIZE;
4637 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize);
4638 } else {
4639 rctl &= ~E1000_RCTL_LPE;
4640 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
4641 rctl |= E1000_RCTL_SZ_2048;
4642 }
4643
4644 /*
4645 * If TX flow control is disabled and there's >1 queue defined,
4646 * enable DROP.
4647 *
4648 * This drops frames rather than hanging the RX MAC for all queues.
4649 */
4650 if ((adapter->num_queues > 1) &&
4651 (adapter->fc == e1000_fc_none ||
4652 adapter->fc == e1000_fc_rx_pause)) {
4653 srrctl |= E1000_SRRCTL_DROP_EN;
4654 }
4655
4656 /* Setup the Base and Length of the Rx Descriptor Rings */
4657 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4658 u64 bus_addr = rxr->rxdma.dma_paddr;
4659 u32 rxdctl;
4660
4661 E1000_WRITE_REG(hw, E1000_RDLEN(i),
4662 adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
4663 E1000_WRITE_REG(hw, E1000_RDBAH(i),
4664 (uint32_t)(bus_addr >> 32));
4665 E1000_WRITE_REG(hw, E1000_RDBAL(i),
4666 (uint32_t)bus_addr);
4667 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
4668 /* Enable this Queue */
4669 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
4670 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
4671 rxdctl &= 0xFFF00000;
4672 rxdctl |= IGB_RX_PTHRESH;
4673 rxdctl |= IGB_RX_HTHRESH << 8;
4674 rxdctl |= IGB_RX_WTHRESH << 16;
4675 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
4676 }
4677
4678 /*
4679 ** Setup for RX MultiQueue
4680 */
4681 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
4682 if (adapter->num_queues >1) {
4683
4684 /* rss setup */
4685 igb_initialise_rss_mapping(adapter);
4686
4687 /*
4688 ** NOTE: Receive Full-Packet Checksum Offload
4689 ** is mutually exclusive with Multiqueue. However
4690 ** this is not the same as TCP/IP checksums which
4691 ** still work.
4692 */
4693 rxcsum |= E1000_RXCSUM_PCSD;
4694#if __FreeBSD_version >= 800000
4695 /* For SCTP Offload */
4696 if (((hw->mac.type == e1000_82576) ||
4697 (hw->mac.type == e1000_82580)) &&
4698 (ifp->if_capenable & IFCAP_RXCSUM))
4699 rxcsum |= E1000_RXCSUM_CRCOFL;
4700#endif
4701 } else {
4702 /* Non RSS setup */
4703 if (ifp->if_capenable & IFCAP_RXCSUM) {
4704 rxcsum |= E1000_RXCSUM_IPPCSE;
4705#if __FreeBSD_version >= 800000
4706 if ((adapter->hw.mac.type == e1000_82576) ||
4707 (adapter->hw.mac.type == e1000_82580))
4708 rxcsum |= E1000_RXCSUM_CRCOFL;
4709#endif
4710 } else
4711 rxcsum &= ~E1000_RXCSUM_TUOFL;
4712 }
4713 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
4714
4715 /* Setup the Receive Control Register */
4716 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
4717 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
4718 E1000_RCTL_RDMTS_HALF |
4719 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
4720 /* Strip CRC bytes. */
4721 rctl |= E1000_RCTL_SECRC;
4722 /* Make sure VLAN Filters are off */
4723 rctl &= ~E1000_RCTL_VFE;
4724 /* Don't store bad packets */
4725 rctl &= ~E1000_RCTL_SBP;
4726
4727 /* Enable Receives */
4728 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
4729
4730 /*
4731 * Setup the HW Rx Head and Tail Descriptor Pointers
4732 * - needs to be after enable
4733 */
4734 for (int i = 0; i < adapter->num_queues; i++) {
4735 rxr = &adapter->rx_rings[i];
4736 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check);
4737#ifdef DEV_NETMAP
4738 /*
4739 * an init() while a netmap client is active must
4740 * preserve the rx buffers passed to userspace.
4741 * In this driver it means we adjust RDT to
4742 * something different from next_to_refresh
4743 * (which is not used in netmap mode).
4744 */
4745 if (ifp->if_capenable & IFCAP_NETMAP) {
4746 struct netmap_adapter *na = NA(adapter->ifp);
4747 struct netmap_kring *kring = &na->rx_rings[i];
4748 int t = rxr->next_to_refresh - nm_kr_rxspace(kring);
4749
4750 if (t >= adapter->num_rx_desc)
4751 t -= adapter->num_rx_desc;
4752 else if (t < 0)
4753 t += adapter->num_rx_desc;
4754 E1000_WRITE_REG(hw, E1000_RDT(i), t);
4755 } else
4756#endif /* DEV_NETMAP */
4757 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh);
4758 }
4759 return;
4760}
4761
4762/*********************************************************************
4763 *
4764 * Free receive rings.
4765 *
4766 **********************************************************************/
4767static void
4768igb_free_receive_structures(struct adapter *adapter)
4769{
4770 struct rx_ring *rxr = adapter->rx_rings;
4771
4772 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
4773 struct lro_ctrl *lro = &rxr->lro;
4774 igb_free_receive_buffers(rxr);
4775 tcp_lro_free(lro);
4776 igb_dma_free(adapter, &rxr->rxdma);
4777 }
4778
4779 free(adapter->rx_rings, M_DEVBUF);
4780}
4781
4782/*********************************************************************
4783 *
4784 * Free receive ring data structures.
4785 *
4786 **********************************************************************/
4787static void
4788igb_free_receive_buffers(struct rx_ring *rxr)
4789{
4790 struct adapter *adapter = rxr->adapter;
4791 struct igb_rx_buf *rxbuf;
4792 int i;
4793
4794 INIT_DEBUGOUT("free_receive_structures: begin");
4795
4796 /* Cleanup any existing buffers */
4797 if (rxr->rx_buffers != NULL) {
4798 for (i = 0; i < adapter->num_rx_desc; i++) {
4799 rxbuf = &rxr->rx_buffers[i];
4800 if (rxbuf->m_head != NULL) {
4801 bus_dmamap_sync(rxr->htag, rxbuf->hmap,
4802 BUS_DMASYNC_POSTREAD);
4803 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
4804 rxbuf->m_head->m_flags |= M_PKTHDR;
4805 m_freem(rxbuf->m_head);
4806 }
4807 if (rxbuf->m_pack != NULL) {
4808 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
4809 BUS_DMASYNC_POSTREAD);
4810 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
4811 rxbuf->m_pack->m_flags |= M_PKTHDR;
4812 m_freem(rxbuf->m_pack);
4813 }
4814 rxbuf->m_head = NULL;
4815 rxbuf->m_pack = NULL;
4816 if (rxbuf->hmap != NULL) {
4817 bus_dmamap_destroy(rxr->htag, rxbuf->hmap);
4818 rxbuf->hmap = NULL;
4819 }
4820 if (rxbuf->pmap != NULL) {
4821 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
4822 rxbuf->pmap = NULL;
4823 }
4824 }
4825 if (rxr->rx_buffers != NULL) {
4826 free(rxr->rx_buffers, M_DEVBUF);
4827 rxr->rx_buffers = NULL;
4828 }
4829 }
4830
4831 if (rxr->htag != NULL) {
4832 bus_dma_tag_destroy(rxr->htag);
4833 rxr->htag = NULL;
4834 }
4835 if (rxr->ptag != NULL) {
4836 bus_dma_tag_destroy(rxr->ptag);
4837 rxr->ptag = NULL;
4838 }
4839}
4840
4841static __inline void
4842igb_rx_discard(struct rx_ring *rxr, int i)
4843{
4844 struct igb_rx_buf *rbuf;
4845
4846 rbuf = &rxr->rx_buffers[i];
4847
4848 /* Partially received? Free the chain */
4849 if (rxr->fmp != NULL) {
4850 rxr->fmp->m_flags |= M_PKTHDR;
4851 m_freem(rxr->fmp);
4852 rxr->fmp = NULL;
4853 rxr->lmp = NULL;
4854 }
4855
4856 /*
4857 ** With advanced descriptors the writeback
4858 ** clobbers the buffer addrs, so its easier
4859 ** to just free the existing mbufs and take
4860 ** the normal refresh path to get new buffers
4861 ** and mapping.
4862 */
4863 if (rbuf->m_head) {
4864 m_free(rbuf->m_head);
4865 rbuf->m_head = NULL;
4866 bus_dmamap_unload(rxr->htag, rbuf->hmap);
4867 }
4868
4869 if (rbuf->m_pack) {
4870 m_free(rbuf->m_pack);
4871 rbuf->m_pack = NULL;
4872 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
4873 }
4874
4875 return;
4876}
4877
4878static __inline void
4879igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
4880{
4881
4882 /*
4883 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
4884 * should be computed by hardware. Also it should not have VLAN tag in
4885 * ethernet header.
4886 */
4887 if (rxr->lro_enabled &&
4888 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
4889 (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
4890 (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) ==
4891 (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) &&
4892 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
4893 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
4894 /*
4895 * Send to the stack if:
4896 ** - LRO not enabled, or
4897 ** - no LRO resources, or
4898 ** - lro enqueue fails
4899 */
4900 if (rxr->lro.lro_cnt != 0)
4901 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
4902 return;
4903 }
4904 IGB_RX_UNLOCK(rxr);
4905 (*ifp->if_input)(ifp, m);
4906 IGB_RX_LOCK(rxr);
4907}
4908
4909/*********************************************************************
4910 *
4911 * This routine executes in interrupt context. It replenishes
4912 * the mbufs in the descriptor and sends data which has been
4913 * dma'ed into host memory to upper layer.
4914 *
4915 * We loop at most count times if count is > 0, or until done if
4916 * count < 0.
4917 *
4918 * Return TRUE if more to clean, FALSE otherwise
4919 *********************************************************************/
4920static bool
4921igb_rxeof(struct igb_queue *que, int count, int *done)
4922{
4923 struct adapter *adapter = que->adapter;
4924 struct rx_ring *rxr = que->rxr;
4925 struct ifnet *ifp = adapter->ifp;
4926 struct lro_ctrl *lro = &rxr->lro;
4927 struct lro_entry *queued;
4928 int i, processed = 0, rxdone = 0;
4929 u32 ptype, staterr = 0;
4930 union e1000_adv_rx_desc *cur;
4931
4932 IGB_RX_LOCK(rxr);
4933 /* Sync the ring. */
4934 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
4935 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4936
4937#ifdef DEV_NETMAP
4938 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
4939 IGB_RX_UNLOCK(rxr);
4940 return (FALSE);
4941 }
4942#endif /* DEV_NETMAP */
4943
4944 /* Main clean loop */
4945 for (i = rxr->next_to_check; count != 0;) {
4946 struct mbuf *sendmp, *mh, *mp;
4947 struct igb_rx_buf *rxbuf;
4948 u16 hlen, plen, hdr, vtag, pkt_info;
4949 bool eop = FALSE;
4950
4951 cur = &rxr->rx_base[i];
4952 staterr = le32toh(cur->wb.upper.status_error);
4953 if ((staterr & E1000_RXD_STAT_DD) == 0)
4954 break;
4955 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
4956 break;
4957 count--;
4958 sendmp = mh = mp = NULL;
4959 cur->wb.upper.status_error = 0;
4960 rxbuf = &rxr->rx_buffers[i];
4961 plen = le16toh(cur->wb.upper.length);
4962 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK;
4963 if (((adapter->hw.mac.type == e1000_i350) ||
4964 (adapter->hw.mac.type == e1000_i354)) &&
4965 (staterr & E1000_RXDEXT_STATERR_LB))
4966 vtag = be16toh(cur->wb.upper.vlan);
4967 else
4968 vtag = le16toh(cur->wb.upper.vlan);
4969 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info);
4970 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
4971 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP);
4972
4973 /*
4974 * Free the frame (all segments) if we're at EOP and
4975 * it's an error.
4976 *
4977 * The datasheet states that EOP + status is only valid for
4978 * the final segment in a multi-segment frame.
4979 */
4980 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) {
4981 adapter->dropped_pkts++;
4982 ++rxr->rx_discarded;
4983 igb_rx_discard(rxr, i);
4984 goto next_desc;
4985 }
4986
4987 /*
4988 ** The way the hardware is configured to
4989 ** split, it will ONLY use the header buffer
4990 ** when header split is enabled, otherwise we
4991 ** get normal behavior, ie, both header and
4992 ** payload are DMA'd into the payload buffer.
4993 **
4994 ** The fmp test is to catch the case where a
4995 ** packet spans multiple descriptors, in that
4996 ** case only the first header is valid.
4997 */
4998 if (rxr->hdr_split && rxr->fmp == NULL) {
4999 bus_dmamap_unload(rxr->htag, rxbuf->hmap);
5000 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >>
5001 E1000_RXDADV_HDRBUFLEN_SHIFT;
5002 if (hlen > IGB_HDR_BUF)
5003 hlen = IGB_HDR_BUF;
5004 mh = rxr->rx_buffers[i].m_head;
5005 mh->m_len = hlen;
5006 /* clear buf pointer for refresh */
5007 rxbuf->m_head = NULL;
5008 /*
5009 ** Get the payload length, this
5010 ** could be zero if its a small
5011 ** packet.
5012 */
5013 if (plen > 0) {
5014 mp = rxr->rx_buffers[i].m_pack;
5015 mp->m_len = plen;
5016 mh->m_next = mp;
5017 /* clear buf pointer */
5018 rxbuf->m_pack = NULL;
5019 rxr->rx_split_packets++;
5020 }
5021 } else {
5022 /*
5023 ** Either no header split, or a
5024 ** secondary piece of a fragmented
5025 ** split packet.
5026 */
5027 mh = rxr->rx_buffers[i].m_pack;
5028 mh->m_len = plen;
5029 /* clear buf info for refresh */
5030 rxbuf->m_pack = NULL;
5031 }
5032 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
5033
5034 ++processed; /* So we know when to refresh */
5035
5036 /* Initial frame - setup */
5037 if (rxr->fmp == NULL) {
5038 mh->m_pkthdr.len = mh->m_len;
5039 /* Save the head of the chain */
5040 rxr->fmp = mh;
5041 rxr->lmp = mh;
5042 if (mp != NULL) {
5043 /* Add payload if split */
5044 mh->m_pkthdr.len += mp->m_len;
5045 rxr->lmp = mh->m_next;
5046 }
5047 } else {
5048 /* Chain mbuf's together */
5049 rxr->lmp->m_next = mh;
5050 rxr->lmp = rxr->lmp->m_next;
5051 rxr->fmp->m_pkthdr.len += mh->m_len;
5052 }
5053
5054 if (eop) {
5055 rxr->fmp->m_pkthdr.rcvif = ifp;
5056 rxr->rx_packets++;
5057 /* capture data for AIM */
5058 rxr->packets++;
5059 rxr->bytes += rxr->fmp->m_pkthdr.len;
5060 rxr->rx_bytes += rxr->fmp->m_pkthdr.len;
5061
5062 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
5063 igb_rx_checksum(staterr, rxr->fmp, ptype);
5064
5065 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
5066 (staterr & E1000_RXD_STAT_VP) != 0) {
5067 rxr->fmp->m_pkthdr.ether_vtag = vtag;
5068 rxr->fmp->m_flags |= M_VLANTAG;
5069 }
5070
5071 /*
5072 * In case of multiqueue, we have RXCSUM.PCSD bit set
5073 * and never cleared. This means we have RSS hash
5074 * available to be used.
5075 */
5076 if (adapter->num_queues > 1) {
5077 rxr->fmp->m_pkthdr.flowid =
5078 le32toh(cur->wb.lower.hi_dword.rss);
5079 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) {
5080 case E1000_RXDADV_RSSTYPE_IPV4_TCP:
5081 M_HASHTYPE_SET(rxr->fmp,
5082 M_HASHTYPE_RSS_TCP_IPV4);
5083 break;
5084 case E1000_RXDADV_RSSTYPE_IPV4:
5085 M_HASHTYPE_SET(rxr->fmp,
5086 M_HASHTYPE_RSS_IPV4);
5087 break;
5088 case E1000_RXDADV_RSSTYPE_IPV6_TCP:
5089 M_HASHTYPE_SET(rxr->fmp,
5090 M_HASHTYPE_RSS_TCP_IPV6);
5091 break;
5092 case E1000_RXDADV_RSSTYPE_IPV6_EX:
5093 M_HASHTYPE_SET(rxr->fmp,
5094 M_HASHTYPE_RSS_IPV6_EX);
5095 break;
5096 case E1000_RXDADV_RSSTYPE_IPV6:
5097 M_HASHTYPE_SET(rxr->fmp,
5098 M_HASHTYPE_RSS_IPV6);
5099 break;
5100 case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX:
5101 M_HASHTYPE_SET(rxr->fmp,
5102 M_HASHTYPE_RSS_TCP_IPV6_EX);
5103 break;
5104 default:
5105 /* XXX fallthrough */
5106 M_HASHTYPE_SET(rxr->fmp,
5107 M_HASHTYPE_OPAQUE);
5108 }
5109 } else {
5110#ifndef IGB_LEGACY_TX
5111 rxr->fmp->m_pkthdr.flowid = que->msix;
5112 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE);
5113#endif
5114 }
5115 sendmp = rxr->fmp;
5116 /* Make sure to set M_PKTHDR. */
5117 sendmp->m_flags |= M_PKTHDR;
5118 rxr->fmp = NULL;
5119 rxr->lmp = NULL;
5120 }
5121
5122next_desc:
5123 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
5124 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
5125
5126 /* Advance our pointers to the next descriptor. */
5127 if (++i == adapter->num_rx_desc)
5128 i = 0;
5129 /*
5130 ** Send to the stack or LRO
5131 */
5132 if (sendmp != NULL) {
5133 rxr->next_to_check = i;
5134 igb_rx_input(rxr, ifp, sendmp, ptype);
5135 i = rxr->next_to_check;
5136 rxdone++;
5137 }
5138
5139 /* Every 8 descriptors we go to refresh mbufs */
5140 if (processed == 8) {
5141 igb_refresh_mbufs(rxr, i);
5142 processed = 0;
5143 }
5144 }
5145
5146 /* Catch any remainders */
5147 if (igb_rx_unrefreshed(rxr))
5148 igb_refresh_mbufs(rxr, i);
5149
5150 rxr->next_to_check = i;
5151
5152 /*
5153 * Flush any outstanding LRO work
5154 */
5155 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
5156 SLIST_REMOVE_HEAD(&lro->lro_active, next);
5157 tcp_lro_flush(lro, queued);
5158 }
5159
5160 if (done != NULL)
5161 *done += rxdone;
5162
5163 IGB_RX_UNLOCK(rxr);
5164 return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE);
5165}
5166
5167/*********************************************************************
5168 *
5169 * Verify that the hardware indicated that the checksum is valid.
5170 * Inform the stack about the status of checksum so that stack
5171 * doesn't spend time verifying the checksum.
5172 *
5173 *********************************************************************/
5174static void
5175igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype)
5176{
5177 u16 status = (u16)staterr;
5178 u8 errors = (u8) (staterr >> 24);
5179 int sctp;
5180
5181 /* Ignore Checksum bit is set */
5182 if (status & E1000_RXD_STAT_IXSM) {
5183 mp->m_pkthdr.csum_flags = 0;
5184 return;
5185 }
5186
5187 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 &&
5188 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0)
5189 sctp = 1;
5190 else
5191 sctp = 0;
5192 if (status & E1000_RXD_STAT_IPCS) {
5193 /* Did it pass? */
5194 if (!(errors & E1000_RXD_ERR_IPE)) {
5195 /* IP Checksum Good */
5196 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
5197 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
5198 } else
5199 mp->m_pkthdr.csum_flags = 0;
5200 }
5201
5202 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
5203 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
5204#if __FreeBSD_version >= 800000
5205 if (sctp) /* reassign */
5206 type = CSUM_SCTP_VALID;
5207#endif
5208 /* Did it pass? */
5209 if (!(errors & E1000_RXD_ERR_TCPE)) {
5210 mp->m_pkthdr.csum_flags |= type;
5211 if (sctp == 0)
5212 mp->m_pkthdr.csum_data = htons(0xffff);
5213 }
5214 }
5215 return;
5216}
5217
5218/*
5219 * This routine is run via an vlan
5220 * config EVENT
5221 */
5222static void
5223igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5224{
5225 struct adapter *adapter = ifp->if_softc;
5226 u32 index, bit;
5227
5228 if (ifp->if_softc != arg) /* Not our event */
5229 return;
5230
5231 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5232 return;
5233
5234 IGB_CORE_LOCK(adapter);
5235 index = (vtag >> 5) & 0x7F;
5236 bit = vtag & 0x1F;
5237 adapter->shadow_vfta[index] |= (1 << bit);
5238 ++adapter->num_vlans;
5239 /* Change hw filter setting */
5240 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5241 igb_setup_vlan_hw_support(adapter);
5242 IGB_CORE_UNLOCK(adapter);
5243}
5244
5245/*
5246 * This routine is run via an vlan
5247 * unconfig EVENT
5248 */
5249static void
5250igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag)
5251{
5252 struct adapter *adapter = ifp->if_softc;
5253 u32 index, bit;
5254
5255 if (ifp->if_softc != arg)
5256 return;
5257
5258 if ((vtag == 0) || (vtag > 4095)) /* Invalid */
5259 return;
5260
5261 IGB_CORE_LOCK(adapter);
5262 index = (vtag >> 5) & 0x7F;
5263 bit = vtag & 0x1F;
5264 adapter->shadow_vfta[index] &= ~(1 << bit);
5265 --adapter->num_vlans;
5266 /* Change hw filter setting */
5267 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
5268 igb_setup_vlan_hw_support(adapter);
5269 IGB_CORE_UNLOCK(adapter);
5270}
5271
5272static void
5273igb_setup_vlan_hw_support(struct adapter *adapter)
5274{
5275 struct e1000_hw *hw = &adapter->hw;
5276 struct ifnet *ifp = adapter->ifp;
5277 u32 reg;
5278
5279 if (adapter->vf_ifp) {
5280 e1000_rlpml_set_vf(hw,
5281 adapter->max_frame_size + VLAN_TAG_SIZE);
5282 return;
5283 }
5284
5285 reg = E1000_READ_REG(hw, E1000_CTRL);
5286 reg |= E1000_CTRL_VME;
5287 E1000_WRITE_REG(hw, E1000_CTRL, reg);
5288
5289 /* Enable the Filter Table */
5290 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
5291 reg = E1000_READ_REG(hw, E1000_RCTL);
5292 reg &= ~E1000_RCTL_CFIEN;
5293 reg |= E1000_RCTL_VFE;
5294 E1000_WRITE_REG(hw, E1000_RCTL, reg);
5295 }
5296
5297 /* Update the frame size */
5298 E1000_WRITE_REG(&adapter->hw, E1000_RLPML,
5299 adapter->max_frame_size + VLAN_TAG_SIZE);
5300
5301 /* Don't bother with table if no vlans */
5302 if ((adapter->num_vlans == 0) ||
5303 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0))
5304 return;
5305 /*
5306 ** A soft reset zero's out the VFTA, so
5307 ** we need to repopulate it now.
5308 */
5309 for (int i = 0; i < IGB_VFTA_SIZE; i++)
5310 if (adapter->shadow_vfta[i] != 0) {
5311 if (adapter->vf_ifp)
5312 e1000_vfta_set_vf(hw,
5313 adapter->shadow_vfta[i], TRUE);
5314 else
5315 e1000_write_vfta(hw,
5316 i, adapter->shadow_vfta[i]);
5317 }
5318}
5319
5320static void
5321igb_enable_intr(struct adapter *adapter)
5322{
5323 /* With RSS set up what to auto clear */
5324 if (adapter->msix_mem) {
5325 u32 mask = (adapter->que_mask | adapter->link_mask);
5326 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask);
5327 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask);
5328 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask);
5329 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5330 E1000_IMS_LSC);
5331 } else {
5332 E1000_WRITE_REG(&adapter->hw, E1000_IMS,
5333 IMS_ENABLE_MASK);
5334 }
5335 E1000_WRITE_FLUSH(&adapter->hw);
5336
5337 return;
5338}
5339
5340static void
5341igb_disable_intr(struct adapter *adapter)
5342{
5343 if (adapter->msix_mem) {
5344 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0);
5345 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0);
5346 }
5347 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0);
5348 E1000_WRITE_FLUSH(&adapter->hw);
5349 return;
5350}
5351
5352/*
5353 * Bit of a misnomer, what this really means is
5354 * to enable OS management of the system... aka
5355 * to disable special hardware management features
5356 */
5357static void
5358igb_init_manageability(struct adapter *adapter)
5359{
5360 if (adapter->has_manage) {
5361 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H);
5362 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5363
5364 /* disable hardware interception of ARP */
5365 manc &= ~(E1000_MANC_ARP_EN);
5366
5367 /* enable receiving management packets to the host */
5368 manc |= E1000_MANC_EN_MNG2HOST;
5369 manc2h |= 1 << 5; /* Mng Port 623 */
5370 manc2h |= 1 << 6; /* Mng Port 664 */
5371 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h);
5372 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5373 }
5374}
5375
5376/*
5377 * Give control back to hardware management
5378 * controller if there is one.
5379 */
5380static void
5381igb_release_manageability(struct adapter *adapter)
5382{
5383 if (adapter->has_manage) {
5384 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
5385
5386 /* re-enable hardware interception of ARP */
5387 manc |= E1000_MANC_ARP_EN;
5388 manc &= ~E1000_MANC_EN_MNG2HOST;
5389
5390 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
5391 }
5392}
5393
5394/*
5395 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
5396 * For ASF and Pass Through versions of f/w this means that
5397 * the driver is loaded.
5398 *
5399 */
5400static void
5401igb_get_hw_control(struct adapter *adapter)
5402{
5403 u32 ctrl_ext;
5404
5405 if (adapter->vf_ifp)
5406 return;
5407
5408 /* Let firmware know the driver has taken over */
5409 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5410 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5411 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
5412}
5413
5414/*
5415 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
5416 * For ASF and Pass Through versions of f/w this means that the
5417 * driver is no longer loaded.
5418 *
5419 */
5420static void
5421igb_release_hw_control(struct adapter *adapter)
5422{
5423 u32 ctrl_ext;
5424
5425 if (adapter->vf_ifp)
5426 return;
5427
5428 /* Let firmware taken over control of h/w */
5429 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
5430 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
5431 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
5432}
5433
5434static int
5435igb_is_valid_ether_addr(uint8_t *addr)
5436{
5437 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
5438
5439 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
5440 return (FALSE);
5441 }
5442
5443 return (TRUE);
5444}
5445
5446
5447/*
5448 * Enable PCI Wake On Lan capability
5449 */
5450static void
5451igb_enable_wakeup(device_t dev)
5452{
5453 u16 cap, status;
5454 u8 id;
5455
5456 /* First find the capabilities pointer*/
5457 cap = pci_read_config(dev, PCIR_CAP_PTR, 2);
5458 /* Read the PM Capabilities */
5459 id = pci_read_config(dev, cap, 1);
5460 if (id != PCIY_PMG) /* Something wrong */
5461 return;
5462 /* OK, we have the power capabilities, so
5463 now get the status register */
5464 cap += PCIR_POWER_STATUS;
5465 status = pci_read_config(dev, cap, 2);
5466 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
5467 pci_write_config(dev, cap, status, 2);
5468 return;
5469}
5470
5471static void
5472igb_led_func(void *arg, int onoff)
5473{
5474 struct adapter *adapter = arg;
5475
5476 IGB_CORE_LOCK(adapter);
5477 if (onoff) {
5478 e1000_setup_led(&adapter->hw);
5479 e1000_led_on(&adapter->hw);
5480 } else {
5481 e1000_led_off(&adapter->hw);
5482 e1000_cleanup_led(&adapter->hw);
5483 }
5484 IGB_CORE_UNLOCK(adapter);
5485}
5486
5487static uint64_t
5488igb_get_vf_counter(if_t ifp, ift_counter cnt)
5489{
5490 struct adapter *adapter;
5491 struct e1000_vf_stats *stats;
5492#ifndef IGB_LEGACY_TX
5493 struct tx_ring *txr;
5494 uint64_t rv;
5495#endif
5496
5497 adapter = if_getsoftc(ifp);
5498 stats = (struct e1000_vf_stats *)adapter->stats;
5499
5500 switch (cnt) {
5501 case IFCOUNTER_IPACKETS:
5502 return (stats->gprc);
5503 case IFCOUNTER_OPACKETS:
5504 return (stats->gptc);
5505 case IFCOUNTER_IBYTES:
5506 return (stats->gorc);
5507 case IFCOUNTER_OBYTES:
5508 return (stats->gotc);
5509 case IFCOUNTER_IMCASTS:
5510 return (stats->mprc);
5511 case IFCOUNTER_IERRORS:
5512 return (adapter->dropped_pkts);
5513 case IFCOUNTER_OERRORS:
5514 return (adapter->watchdog_events);
5515#ifndef IGB_LEGACY_TX
5516 case IFCOUNTER_OQDROPS:
5517 rv = 0;
5518 txr = adapter->tx_rings;
5519 for (int i = 0; i < adapter->num_queues; i++, txr++)
5520 rv += txr->br->br_drops;
5521 return (rv);
5522#endif
5523 default:
5524 return (if_get_counter_default(ifp, cnt));
5525 }
5526}
5527
5528static uint64_t
5529igb_get_counter(if_t ifp, ift_counter cnt)
5530{
5531 struct adapter *adapter;
5532 struct e1000_hw_stats *stats;
5533#ifndef IGB_LEGACY_TX
5534 struct tx_ring *txr;
5535 uint64_t rv;
5536#endif
5537
5538 adapter = if_getsoftc(ifp);
5539 if (adapter->vf_ifp)
5540 return (igb_get_vf_counter(ifp, cnt));
5541
5542 stats = (struct e1000_hw_stats *)adapter->stats;
5543
5544 switch (cnt) {
5545 case IFCOUNTER_IPACKETS:
5546 return (stats->gprc);
5547 case IFCOUNTER_OPACKETS:
5548 return (stats->gptc);
5549 case IFCOUNTER_IBYTES:
5550 return (stats->gorc);
5551 case IFCOUNTER_OBYTES:
5552 return (stats->gotc);
5553 case IFCOUNTER_IMCASTS:
5554 return (stats->mprc);
5555 case IFCOUNTER_OMCASTS:
5556 return (stats->mptc);
5557 case IFCOUNTER_IERRORS:
5558 return (adapter->dropped_pkts + stats->rxerrc +
5559 stats->crcerrs + stats->algnerrc +
5560 stats->ruc + stats->roc + stats->cexterr);
5561 case IFCOUNTER_OERRORS:
5562 return (stats->ecol + stats->latecol +
5563 adapter->watchdog_events);
5564 case IFCOUNTER_COLLISIONS:
5565 return (stats->colc);
5566 case IFCOUNTER_IQDROPS:
5567 return (stats->mpc);
5568#ifndef IGB_LEGACY_TX
5569 case IFCOUNTER_OQDROPS:
5570 rv = 0;
5571 txr = adapter->tx_rings;
5572 for (int i = 0; i < adapter->num_queues; i++, txr++)
5573 rv += txr->br->br_drops;
5574 return (rv);
5575#endif
5576 default:
5577 return (if_get_counter_default(ifp, cnt));
5578 }
5579}
5580
5581/**********************************************************************
5582 *
5583 * Update the board statistics counters.
5584 *
5585 **********************************************************************/
5586static void
5587igb_update_stats_counters(struct adapter *adapter)
5588{
5589 struct e1000_hw *hw = &adapter->hw;
5590 struct e1000_hw_stats *stats;
5591
5592 /*
5593 ** The virtual function adapter has only a
5594 ** small controlled set of stats, do only
5595 ** those and return.
5596 */
5597 if (adapter->vf_ifp) {
5598 igb_update_vf_stats_counters(adapter);
5599 return;
5600 }
5601
5602 stats = (struct e1000_hw_stats *)adapter->stats;
5603
5604 if (adapter->hw.phy.media_type == e1000_media_type_copper ||
5605 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) {
5606 stats->symerrs +=
5607 E1000_READ_REG(hw,E1000_SYMERRS);
5608 stats->sec += E1000_READ_REG(hw, E1000_SEC);
5609 }
5610
5611 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS);
5612 stats->mpc += E1000_READ_REG(hw, E1000_MPC);
5613 stats->scc += E1000_READ_REG(hw, E1000_SCC);
5614 stats->ecol += E1000_READ_REG(hw, E1000_ECOL);
5615
5616 stats->mcc += E1000_READ_REG(hw, E1000_MCC);
5617 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL);
5618 stats->colc += E1000_READ_REG(hw, E1000_COLC);
5619 stats->dc += E1000_READ_REG(hw, E1000_DC);
5620 stats->rlec += E1000_READ_REG(hw, E1000_RLEC);
5621 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC);
5622 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC);
5623 /*
5624 ** For watchdog management we need to know if we have been
5625 ** paused during the last interval, so capture that here.
5626 */
5627 adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
5628 stats->xoffrxc += adapter->pause_frames;
5629 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC);
5630 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC);
5631 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64);
5632 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127);
5633 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255);
5634 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511);
5635 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023);
5636 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522);
5637 stats->gprc += E1000_READ_REG(hw, E1000_GPRC);
5638 stats->bprc += E1000_READ_REG(hw, E1000_BPRC);
5639 stats->mprc += E1000_READ_REG(hw, E1000_MPRC);
5640 stats->gptc += E1000_READ_REG(hw, E1000_GPTC);
5641
5642 /* For the 64-bit byte counters the low dword must be read first. */
5643 /* Both registers clear on the read of the high dword */
5644
5645 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) +
5646 ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32);
5647 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) +
5648 ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32);
5649
5650 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC);
5651 stats->ruc += E1000_READ_REG(hw, E1000_RUC);
5652 stats->rfc += E1000_READ_REG(hw, E1000_RFC);
5653 stats->roc += E1000_READ_REG(hw, E1000_ROC);
5654 stats->rjc += E1000_READ_REG(hw, E1000_RJC);
5655
5656 stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC);
5657 stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC);
5658 stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC);
5659
5660 stats->tor += E1000_READ_REG(hw, E1000_TORL) +
5661 ((u64)E1000_READ_REG(hw, E1000_TORH) << 32);
5662 stats->tot += E1000_READ_REG(hw, E1000_TOTL) +
5663 ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32);
5664
5665 stats->tpr += E1000_READ_REG(hw, E1000_TPR);
5666 stats->tpt += E1000_READ_REG(hw, E1000_TPT);
5667 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64);
5668 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127);
5669 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255);
5670 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511);
5671 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023);
5672 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522);
5673 stats->mptc += E1000_READ_REG(hw, E1000_MPTC);
5674 stats->bptc += E1000_READ_REG(hw, E1000_BPTC);
5675
5676 /* Interrupt Counts */
5677
5678 stats->iac += E1000_READ_REG(hw, E1000_IAC);
5679 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC);
5680 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC);
5681 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC);
5682 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC);
5683 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC);
5684 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC);
5685 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC);
5686 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC);
5687
5688 /* Host to Card Statistics */
5689
5690 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC);
5691 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC);
5692 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC);
5693 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC);
5694 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC);
5695 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC);
5696 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC);
5697 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) +
5698 ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32));
5699 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) +
5700 ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32));
5701 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS);
5702 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC);
5703 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC);
5704
5705 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC);
5706 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC);
5707 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS);
5708 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR);
5709 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC);
5710 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC);
5711
5712 /* Driver specific counters */
5713 adapter->device_control = E1000_READ_REG(hw, E1000_CTRL);
5714 adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL);
5715 adapter->int_mask = E1000_READ_REG(hw, E1000_IMS);
5716 adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS);
5717 adapter->packet_buf_alloc_tx =
5718 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16);
5719 adapter->packet_buf_alloc_rx =
5720 (E1000_READ_REG(hw, E1000_PBA) & 0xffff);
5721}
5722
5723
5724/**********************************************************************
5725 *
5726 * Initialize the VF board statistics counters.
5727 *
5728 **********************************************************************/
5729static void
5730igb_vf_init_stats(struct adapter *adapter)
5731{
5732 struct e1000_hw *hw = &adapter->hw;
5733 struct e1000_vf_stats *stats;
5734
5735 stats = (struct e1000_vf_stats *)adapter->stats;
5736 if (stats == NULL)
5737 return;
5738 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC);
5739 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC);
5740 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC);
5741 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC);
5742 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC);
5743}
5744
5745/**********************************************************************
5746 *
5747 * Update the VF board statistics counters.
5748 *
5749 **********************************************************************/
5750static void
5751igb_update_vf_stats_counters(struct adapter *adapter)
5752{
5753 struct e1000_hw *hw = &adapter->hw;
5754 struct e1000_vf_stats *stats;
5755
5756 if (adapter->link_speed == 0)
5757 return;
5758
5759 stats = (struct e1000_vf_stats *)adapter->stats;
5760
5761 UPDATE_VF_REG(E1000_VFGPRC,
5762 stats->last_gprc, stats->gprc);
5763 UPDATE_VF_REG(E1000_VFGORC,
5764 stats->last_gorc, stats->gorc);
5765 UPDATE_VF_REG(E1000_VFGPTC,
5766 stats->last_gptc, stats->gptc);
5767 UPDATE_VF_REG(E1000_VFGOTC,
5768 stats->last_gotc, stats->gotc);
5769 UPDATE_VF_REG(E1000_VFMPRC,
5770 stats->last_mprc, stats->mprc);
5771}
5772
5773/* Export a single 32-bit register via a read-only sysctl. */
5774static int
5775igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
5776{
5777 struct adapter *adapter;
5778 u_int val;
5779
5780 adapter = oidp->oid_arg1;
5781 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
5782 return (sysctl_handle_int(oidp, &val, 0, req));
5783}
5784
5785/*
5786** Tuneable interrupt rate handler
5787*/
5788static int
5789igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS)
5790{
5791 struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1);
5792 int error;
5793 u32 reg, usec, rate;
5794
5795 reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix));
5796 usec = ((reg & 0x7FFC) >> 2);
5797 if (usec > 0)
5798 rate = 1000000 / usec;
5799 else
5800 rate = 0;
5801 error = sysctl_handle_int(oidp, &rate, 0, req);
5802 if (error || !req->newptr)
5803 return error;
5804 return 0;
5805}
5806
5807/*
5808 * Add sysctl variables, one per statistic, to the system.
5809 */
5810static void
5811igb_add_hw_stats(struct adapter *adapter)
5812{
5813 device_t dev = adapter->dev;
5814
5815 struct tx_ring *txr = adapter->tx_rings;
5816 struct rx_ring *rxr = adapter->rx_rings;
5817
5818 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
5819 struct sysctl_oid *tree = device_get_sysctl_tree(dev);
5820 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
5821 struct e1000_hw_stats *stats = adapter->stats;
5822
5823 struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node;
5824 struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list;
5825
5826#define QUEUE_NAME_LEN 32
5827 char namebuf[QUEUE_NAME_LEN];
5828
5829 /* Driver Statistics */
5830 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
5831 CTLFLAG_RD, &adapter->dropped_pkts,
5832 "Driver dropped packets");
5833 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq",
5834 CTLFLAG_RD, &adapter->link_irq,
5835 "Link MSIX IRQ Handled");
5836 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
5837 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
5838 "Defragmenting mbuf chain failed");
5839 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
5840 CTLFLAG_RD, &adapter->no_tx_dma_setup,
5841 "Driver tx dma failure in xmit");
5842 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
5843 CTLFLAG_RD, &adapter->rx_overruns,
5844 "RX overruns");
5845 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
5846 CTLFLAG_RD, &adapter->watchdog_events,
5847 "Watchdog timeouts");
5848
5849 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control",
5850 CTLFLAG_RD, &adapter->device_control,
5851 "Device Control Register");
5852 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control",
5853 CTLFLAG_RD, &adapter->rx_control,
5854 "Receiver Control Register");
5855 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask",
5856 CTLFLAG_RD, &adapter->int_mask,
5857 "Interrupt Mask");
5858 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask",
5859 CTLFLAG_RD, &adapter->eint_mask,
5860 "Extended Interrupt Mask");
5861 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc",
5862 CTLFLAG_RD, &adapter->packet_buf_alloc_tx,
5863 "Transmit Buffer Packet Allocation");
5864 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc",
5865 CTLFLAG_RD, &adapter->packet_buf_alloc_rx,
5866 "Receive Buffer Packet Allocation");
5867 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
5868 CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
5869 "Flow Control High Watermark");
5870 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
5871 CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
5872 "Flow Control Low Watermark");
5873
5874 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) {
5875 struct lro_ctrl *lro = &rxr->lro;
5876
5877 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i);
5878 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
5879 CTLFLAG_RD, NULL, "Queue Name");
5880 queue_list = SYSCTL_CHILDREN(queue_node);
5881
5882 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate",
5883 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i],
5884 sizeof(&adapter->queues[i]),
5885 igb_sysctl_interrupt_rate_handler,
5886 "IU", "Interrupt Rate");
5887
5888 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head",
5889 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me),
5890 igb_sysctl_reg_handler, "IU",
5891 "Transmit Descriptor Head");
5892 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail",
5893 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me),
5894 igb_sysctl_reg_handler, "IU",
5895 "Transmit Descriptor Tail");
5896 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail",
5897 CTLFLAG_RD, &txr->no_desc_avail,
5898 "Queue Descriptors Unavailable");
5899 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets",
5900 CTLFLAG_RD, &txr->total_packets,
5901 "Queue Packets Transmitted");
5902
5903 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head",
5904 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me),
5905 igb_sysctl_reg_handler, "IU",
5906 "Receive Descriptor Head");
5907 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail",
5908 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me),
5909 igb_sysctl_reg_handler, "IU",
5910 "Receive Descriptor Tail");
5911 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets",
5912 CTLFLAG_RD, &rxr->rx_packets,
5913 "Queue Packets Received");
5914 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes",
5915 CTLFLAG_RD, &rxr->rx_bytes,
5916 "Queue Bytes Received");
5917 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued",
5918 CTLFLAG_RD, &lro->lro_queued, 0,
5919 "LRO Queued");
5920 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed",
5921 CTLFLAG_RD, &lro->lro_flushed, 0,
5922 "LRO Flushed");
5923 }
5924
5925 /* MAC stats get their own sub node */
5926
5927 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
5928 CTLFLAG_RD, NULL, "MAC Statistics");
5929 stat_list = SYSCTL_CHILDREN(stat_node);
5930
5931 /*
5932 ** VF adapter has a very limited set of stats
5933 ** since its not managing the metal, so to speak.
5934 */
5935 if (adapter->vf_ifp) {
5936 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
5937 CTLFLAG_RD, &stats->gprc,
5938 "Good Packets Received");
5939 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
5940 CTLFLAG_RD, &stats->gptc,
5941 "Good Packets Transmitted");
5942 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
5943 CTLFLAG_RD, &stats->gorc,
5944 "Good Octets Received");
5945 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
5946 CTLFLAG_RD, &stats->gotc,
5947 "Good Octets Transmitted");
5948 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
5949 CTLFLAG_RD, &stats->mprc,
5950 "Multicast Packets Received");
5951 return;
5952 }
5953
5954 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll",
5955 CTLFLAG_RD, &stats->ecol,
5956 "Excessive collisions");
5957 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll",
5958 CTLFLAG_RD, &stats->scc,
5959 "Single collisions");
5960 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
5961 CTLFLAG_RD, &stats->mcc,
5962 "Multiple collisions");
5963 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll",
5964 CTLFLAG_RD, &stats->latecol,
5965 "Late collisions");
5966 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count",
5967 CTLFLAG_RD, &stats->colc,
5968 "Collision Count");
5969 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
5970 CTLFLAG_RD, &stats->symerrs,
5971 "Symbol Errors");
5972 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
5973 CTLFLAG_RD, &stats->sec,
5974 "Sequence Errors");
5975 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count",
5976 CTLFLAG_RD, &stats->dc,
5977 "Defer Count");
5978 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets",
5979 CTLFLAG_RD, &stats->mpc,
5980 "Missed Packets");
5981 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors",
5982 CTLFLAG_RD, &stats->rlec,
5983 "Receive Length Errors");
5984 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
5985 CTLFLAG_RD, &stats->rnbc,
5986 "Receive No Buffers");
5987 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
5988 CTLFLAG_RD, &stats->ruc,
5989 "Receive Undersize");
5990 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
5991 CTLFLAG_RD, &stats->rfc,
5992 "Fragmented Packets Received");
5993 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
5994 CTLFLAG_RD, &stats->roc,
5995 "Oversized Packets Received");
5996 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
5997 CTLFLAG_RD, &stats->rjc,
5998 "Recevied Jabber");
5999 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs",
6000 CTLFLAG_RD, &stats->rxerrc,
6001 "Receive Errors");
6002 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs",
6003 CTLFLAG_RD, &stats->crcerrs,
6004 "CRC errors");
6005 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
6006 CTLFLAG_RD, &stats->algnerrc,
6007 "Alignment Errors");
6008 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs",
6009 CTLFLAG_RD, &stats->tncrs,
6010 "Transmit with No CRS");
6011 /* On 82575 these are collision counts */
6012 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
6013 CTLFLAG_RD, &stats->cexterr,
6014 "Collision/Carrier extension errors");
6015 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
6016 CTLFLAG_RD, &stats->xonrxc,
6017 "XON Received");
6018 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd",
6019 CTLFLAG_RD, &stats->xontxc,
6020 "XON Transmitted");
6021 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
6022 CTLFLAG_RD, &stats->xoffrxc,
6023 "XOFF Received");
6024 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
6025 CTLFLAG_RD, &stats->xofftxc,
6026 "XOFF Transmitted");
6027 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd",
6028 CTLFLAG_RD, &stats->fcruc,
6029 "Unsupported Flow Control Received");
6030 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd",
6031 CTLFLAG_RD, &stats->mgprc,
6032 "Management Packets Received");
6033 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop",
6034 CTLFLAG_RD, &stats->mgpdc,
6035 "Management Packets Dropped");
6036 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd",
6037 CTLFLAG_RD, &stats->mgptc,
6038 "Management Packets Transmitted");
6039 /* Packet Reception Stats */
6040 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
6041 CTLFLAG_RD, &stats->tpr,
6042 "Total Packets Received");
6043 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
6044 CTLFLAG_RD, &stats->gprc,
6045 "Good Packets Received");
6046 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
6047 CTLFLAG_RD, &stats->bprc,
6048 "Broadcast Packets Received");
6049 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
6050 CTLFLAG_RD, &stats->mprc,
6051 "Multicast Packets Received");
6052 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
6053 CTLFLAG_RD, &stats->prc64,
6054 "64 byte frames received");
6055 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
6056 CTLFLAG_RD, &stats->prc127,
6057 "65-127 byte frames received");
6058 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
6059 CTLFLAG_RD, &stats->prc255,
6060 "128-255 byte frames received");
6061 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
6062 CTLFLAG_RD, &stats->prc511,
6063 "256-511 byte frames received");
6064 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
6065 CTLFLAG_RD, &stats->prc1023,
6066 "512-1023 byte frames received");
6067 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
6068 CTLFLAG_RD, &stats->prc1522,
6069 "1023-1522 byte frames received");
6070 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
6071 CTLFLAG_RD, &stats->gorc,
6072 "Good Octets Received");
6073 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd",
6074 CTLFLAG_RD, &stats->tor,
6075 "Total Octets Received");
6076
6077 /* Packet Transmission Stats */
6078 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
6079 CTLFLAG_RD, &stats->gotc,
6080 "Good Octets Transmitted");
6081 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd",
6082 CTLFLAG_RD, &stats->tot,
6083 "Total Octets Transmitted");
6084 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
6085 CTLFLAG_RD, &stats->tpt,
6086 "Total Packets Transmitted");
6087 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
6088 CTLFLAG_RD, &stats->gptc,
6089 "Good Packets Transmitted");
6090 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
6091 CTLFLAG_RD, &stats->bptc,
6092 "Broadcast Packets Transmitted");
6093 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
6094 CTLFLAG_RD, &stats->mptc,
6095 "Multicast Packets Transmitted");
6096 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
6097 CTLFLAG_RD, &stats->ptc64,
6098 "64 byte frames transmitted");
6099 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
6100 CTLFLAG_RD, &stats->ptc127,
6101 "65-127 byte frames transmitted");
6102 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
6103 CTLFLAG_RD, &stats->ptc255,
6104 "128-255 byte frames transmitted");
6105 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
6106 CTLFLAG_RD, &stats->ptc511,
6107 "256-511 byte frames transmitted");
6108 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
6109 CTLFLAG_RD, &stats->ptc1023,
6110 "512-1023 byte frames transmitted");
6111 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
6112 CTLFLAG_RD, &stats->ptc1522,
6113 "1024-1522 byte frames transmitted");
6114 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd",
6115 CTLFLAG_RD, &stats->tsctc,
6116 "TSO Contexts Transmitted");
6117 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
6118 CTLFLAG_RD, &stats->tsctfc,
6119 "TSO Contexts Failed");
6120
6121
6122 /* Interrupt Stats */
6123
6124 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts",
6125 CTLFLAG_RD, NULL, "Interrupt Statistics");
6126 int_list = SYSCTL_CHILDREN(int_node);
6127
6128 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts",
6129 CTLFLAG_RD, &stats->iac,
6130 "Interrupt Assertion Count");
6131
6132 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer",
6133 CTLFLAG_RD, &stats->icrxptc,
6134 "Interrupt Cause Rx Pkt Timer Expire Count");
6135
6136 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer",
6137 CTLFLAG_RD, &stats->icrxatc,
6138 "Interrupt Cause Rx Abs Timer Expire Count");
6139
6140 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer",
6141 CTLFLAG_RD, &stats->ictxptc,
6142 "Interrupt Cause Tx Pkt Timer Expire Count");
6143
6144 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer",
6145 CTLFLAG_RD, &stats->ictxatc,
6146 "Interrupt Cause Tx Abs Timer Expire Count");
6147
6148 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty",
6149 CTLFLAG_RD, &stats->ictxqec,
6150 "Interrupt Cause Tx Queue Empty Count");
6151
6152 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh",
6153 CTLFLAG_RD, &stats->ictxqmtc,
6154 "Interrupt Cause Tx Queue Min Thresh Count");
6155
6156 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh",
6157 CTLFLAG_RD, &stats->icrxdmtc,
6158 "Interrupt Cause Rx Desc Min Thresh Count");
6159
6160 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun",
6161 CTLFLAG_RD, &stats->icrxoc,
6162 "Interrupt Cause Receiver Overrun Count");
6163
6164 /* Host to Card Stats */
6165
6166 host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host",
6167 CTLFLAG_RD, NULL,
6168 "Host to Card Statistics");
6169
6170 host_list = SYSCTL_CHILDREN(host_node);
6171
6172 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt",
6173 CTLFLAG_RD, &stats->cbtmpc,
6174 "Circuit Breaker Tx Packet Count");
6175
6176 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard",
6177 CTLFLAG_RD, &stats->htdpmc,
6178 "Host Transmit Discarded Packets");
6179
6180 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt",
6181 CTLFLAG_RD, &stats->rpthc,
6182 "Rx Packets To Host");
6183
6184 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts",
6185 CTLFLAG_RD, &stats->cbrmpc,
6186 "Circuit Breaker Rx Packet Count");
6187
6188 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop",
6189 CTLFLAG_RD, &stats->cbrdpc,
6190 "Circuit Breaker Rx Dropped Count");
6191
6192 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt",
6193 CTLFLAG_RD, &stats->hgptc,
6194 "Host Good Packets Tx Count");
6195
6196 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop",
6197 CTLFLAG_RD, &stats->htcbdpc,
6198 "Host Tx Circuit Breaker Dropped Count");
6199
6200 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes",
6201 CTLFLAG_RD, &stats->hgorc,
6202 "Host Good Octets Received Count");
6203
6204 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes",
6205 CTLFLAG_RD, &stats->hgotc,
6206 "Host Good Octets Transmit Count");
6207
6208 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors",
6209 CTLFLAG_RD, &stats->lenerrs,
6210 "Length Errors");
6211
6212 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt",
6213 CTLFLAG_RD, &stats->scvpc,
6214 "SerDes/SGMII Code Violation Pkt Count");
6215
6216 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed",
6217 CTLFLAG_RD, &stats->hrmpc,
6218 "Header Redirection Missed Packet Count");
6219}
6220
6221
6222/**********************************************************************
6223 *
6224 * This routine provides a way to dump out the adapter eeprom,
6225 * often a useful debug/service tool. This only dumps the first
6226 * 32 words, stuff that matters is in that extent.
6227 *
6228 **********************************************************************/
6229static int
6230igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
6231{
6232 struct adapter *adapter;
6233 int error;
6234 int result;
6235
6236 result = -1;
6237 error = sysctl_handle_int(oidp, &result, 0, req);
6238
6239 if (error || !req->newptr)
6240 return (error);
6241
6242 /*
6243 * This value will cause a hex dump of the
6244 * first 32 16-bit words of the EEPROM to
6245 * the screen.
6246 */
6247 if (result == 1) {
6248 adapter = (struct adapter *)arg1;
6249 igb_print_nvm_info(adapter);
6250 }
6251
6252 return (error);
6253}
6254
6255static void
6256igb_print_nvm_info(struct adapter *adapter)
6257{
6258 u16 eeprom_data;
6259 int i, j, row = 0;
6260
6261 /* Its a bit crude, but it gets the job done */
6262 printf("\nInterface EEPROM Dump:\n");
6263 printf("Offset\n0x0000 ");
6264 for (i = 0, j = 0; i < 32; i++, j++) {
6265 if (j == 8) { /* Make the offset block */
6266 j = 0; ++row;
6267 printf("\n0x00%x0 ",row);
6268 }
6269 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
6270 printf("%04x ", eeprom_data);
6271 }
6272 printf("\n");
6273}
6274
6275static void
6276igb_set_sysctl_value(struct adapter *adapter, const char *name,
6277 const char *description, int *limit, int value)
6278{
6279 *limit = value;
6280 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
6281 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
6282 OID_AUTO, name, CTLFLAG_RW, limit, value, description);
6283}
6284
6285/*
6286** Set flow control using sysctl:
6287** Flow control values:
6288** 0 - off
6289** 1 - rx pause
6290** 2 - tx pause
6291** 3 - full
6292*/
6293static int
6294igb_set_flowcntl(SYSCTL_HANDLER_ARGS)
6295{
6296 int error;
6297 static int input = 3; /* default is full */
6298 struct adapter *adapter = (struct adapter *) arg1;
6299
6300 error = sysctl_handle_int(oidp, &input, 0, req);
6301
6302 if ((error) || (req->newptr == NULL))
6303 return (error);
6304
6305 switch (input) {
6306 case e1000_fc_rx_pause:
6307 case e1000_fc_tx_pause:
6308 case e1000_fc_full:
6309 case e1000_fc_none:
6310 adapter->hw.fc.requested_mode = input;
6311 adapter->fc = input;
6312 break;
6313 default:
6314 /* Do nothing */
6315 return (error);
6316 }
6317
6318 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode;
6319 e1000_force_mac_fc(&adapter->hw);
6320 /* XXX TODO: update DROP_EN on each RX queue if appropriate */
6321 return (error);
6322}
6323
6324/*
6325** Manage DMA Coalesce:
6326** Control values:
6327** 0/1 - off/on
6328** Legal timer values are:
6329** 250,500,1000-10000 in thousands
6330*/
6331static int
6332igb_sysctl_dmac(SYSCTL_HANDLER_ARGS)
6333{
6334 struct adapter *adapter = (struct adapter *) arg1;
6335 int error;
6336
6337 error = sysctl_handle_int(oidp, &adapter->dmac, 0, req);
6338
6339 if ((error) || (req->newptr == NULL))
6340 return (error);
6341
6342 switch (adapter->dmac) {
6343 case 0:
6344 /* Disabling */
6345 break;
6346 case 1: /* Just enable and use default */
6347 adapter->dmac = 1000;
6348 break;
6349 case 250:
6350 case 500:
6351 case 1000:
6352 case 2000:
6353 case 3000:
6354 case 4000:
6355 case 5000:
6356 case 6000:
6357 case 7000:
6358 case 8000:
6359 case 9000:
6360 case 10000:
6361 /* Legal values - allow */
6362 break;
6363 default:
6364 /* Do nothing, illegal value */
6365 adapter->dmac = 0;
6366 return (EINVAL);
6367 }
6368 /* Reinit the interface */
6369 igb_init(adapter);
6370 return (error);
6371}
6372
6373/*
6374** Manage Energy Efficient Ethernet:
6375** Control values:
6376** 0/1 - enabled/disabled
6377*/
6378static int
6379igb_sysctl_eee(SYSCTL_HANDLER_ARGS)
6380{
6381 struct adapter *adapter = (struct adapter *) arg1;
6382 int error, value;
6383
6384 value = adapter->hw.dev_spec._82575.eee_disable;
6385 error = sysctl_handle_int(oidp, &value, 0, req);
6386 if (error || req->newptr == NULL)
6387 return (error);
6388 IGB_CORE_LOCK(adapter);
6389 adapter->hw.dev_spec._82575.eee_disable = (value != 0);
6390 igb_init_locked(adapter);
6391 IGB_CORE_UNLOCK(adapter);
6392 return (0);
6393}