if_igb.c revision 314281
1/****************************************************************************** 2 3 Copyright (c) 2001-2015, Intel Corporation 4 All rights reserved. 5 6 Redistribution and use in source and binary forms, with or without 7 modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Intel Corporation nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 POSSIBILITY OF SUCH DAMAGE. 31 32******************************************************************************/ 33/*$FreeBSD: stable/11/sys/dev/e1000/if_igb.c 314281 2017-02-25 20:21:39Z loos $*/ 34 35 36#include "opt_inet.h" 37#include "opt_inet6.h" 38#include "opt_rss.h" 39 40#ifdef HAVE_KERNEL_OPTION_HEADERS 41#include "opt_device_polling.h" 42#include "opt_altq.h" 43#endif 44 45#include "if_igb.h" 46 47/********************************************************************* 48 * Driver version: 49 *********************************************************************/ 50char igb_driver_version[] = "2.5.3-k"; 51 52 53/********************************************************************* 54 * PCI Device ID Table 55 * 56 * Used by probe to select devices to load on 57 * Last field stores an index into e1000_strings 58 * Last entry must be all 0s 59 * 60 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } 61 *********************************************************************/ 62 63static igb_vendor_info_t igb_vendor_info_array[] = 64{ 65 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER, 0, 0, 0}, 66 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES, 0, 0, 0}, 67 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER, 0, 0, 0}, 68 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576, 0, 0, 0}, 69 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS, 0, 0, 0}, 70 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES, 0, 0, 0}, 71 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER, 0, 0, 0}, 72 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES, 0, 0, 0}, 73 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD, 0, 0, 0}, 74 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER, 0, 0, 0}, 75 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2, 0, 0, 0}, 76 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF, 0, 0, 0}, 77 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER, 0, 0, 0}, 78 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER, 0, 0, 0}, 79 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES, 0, 0, 0}, 80 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII, 0, 0, 0}, 81 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL, 0, 0, 0}, 82 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER, 0, 0, 0}, 83 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES, 0, 0, 0}, 84 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII, 0, 0, 0}, 85 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP, 0, 0, 0}, 86 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE, 0, 0, 0}, 87 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER, 0, 0, 0}, 88 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER, 0, 0, 0}, 89 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES, 0, 0, 0}, 90 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII, 0, 0, 0}, 91 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF, 0, 0, 0}, 92 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER, 0, 0, 0}, 93 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT, 0, 0, 0}, 94 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1, 0, 0, 0}, 95 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_FLASHLESS, 0, 0, 0}, 96 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES_FLASHLESS, 0, 0, 0}, 97 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER, 0, 0, 0}, 98 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES, 0, 0, 0}, 99 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII, 0, 0, 0}, 100 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER, 0, 0, 0}, 101 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS, 0, 0, 0}, 102 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS, 0, 0, 0}, 103 {IGB_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII, 0, 0, 0}, 104 /* required last entry */ 105 {0, 0, 0, 0, 0} 106}; 107 108/********************************************************************* 109 * Table of branding strings for all supported NICs. 110 *********************************************************************/ 111 112static char *igb_strings[] = { 113 "Intel(R) PRO/1000 Network Connection" 114}; 115 116/********************************************************************* 117 * Function prototypes 118 *********************************************************************/ 119static int igb_probe(device_t); 120static int igb_attach(device_t); 121static int igb_detach(device_t); 122static int igb_shutdown(device_t); 123static int igb_suspend(device_t); 124static int igb_resume(device_t); 125#ifndef IGB_LEGACY_TX 126static int igb_mq_start(struct ifnet *, struct mbuf *); 127static int igb_mq_start_locked(struct ifnet *, struct tx_ring *); 128static void igb_qflush(struct ifnet *); 129static void igb_deferred_mq_start(void *, int); 130#else 131static void igb_start(struct ifnet *); 132static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); 133#endif 134static int igb_ioctl(struct ifnet *, u_long, caddr_t); 135static uint64_t igb_get_counter(if_t, ift_counter); 136static void igb_init(void *); 137static void igb_init_locked(struct adapter *); 138static void igb_stop(void *); 139static void igb_media_status(struct ifnet *, struct ifmediareq *); 140static int igb_media_change(struct ifnet *); 141static void igb_identify_hardware(struct adapter *); 142static int igb_allocate_pci_resources(struct adapter *); 143static int igb_allocate_msix(struct adapter *); 144static int igb_allocate_legacy(struct adapter *); 145static int igb_setup_msix(struct adapter *); 146static void igb_free_pci_resources(struct adapter *); 147static void igb_local_timer(void *); 148static void igb_reset(struct adapter *); 149static int igb_setup_interface(device_t, struct adapter *); 150static int igb_allocate_queues(struct adapter *); 151static void igb_configure_queues(struct adapter *); 152 153static int igb_allocate_transmit_buffers(struct tx_ring *); 154static void igb_setup_transmit_structures(struct adapter *); 155static void igb_setup_transmit_ring(struct tx_ring *); 156static void igb_initialize_transmit_units(struct adapter *); 157static void igb_free_transmit_structures(struct adapter *); 158static void igb_free_transmit_buffers(struct tx_ring *); 159 160static int igb_allocate_receive_buffers(struct rx_ring *); 161static int igb_setup_receive_structures(struct adapter *); 162static int igb_setup_receive_ring(struct rx_ring *); 163static void igb_initialize_receive_units(struct adapter *); 164static void igb_free_receive_structures(struct adapter *); 165static void igb_free_receive_buffers(struct rx_ring *); 166static void igb_free_receive_ring(struct rx_ring *); 167 168static void igb_enable_intr(struct adapter *); 169static void igb_disable_intr(struct adapter *); 170static void igb_update_stats_counters(struct adapter *); 171static bool igb_txeof(struct tx_ring *); 172 173static __inline void igb_rx_discard(struct rx_ring *, int); 174static __inline void igb_rx_input(struct rx_ring *, 175 struct ifnet *, struct mbuf *, u32); 176 177static bool igb_rxeof(struct igb_queue *, int, int *); 178static void igb_rx_checksum(u32, struct mbuf *, u32); 179static int igb_tx_ctx_setup(struct tx_ring *, 180 struct mbuf *, u32 *, u32 *); 181static int igb_tso_setup(struct tx_ring *, 182 struct mbuf *, u32 *, u32 *); 183static void igb_set_promisc(struct adapter *); 184static void igb_disable_promisc(struct adapter *); 185static void igb_set_multi(struct adapter *); 186static void igb_update_link_status(struct adapter *); 187static void igb_refresh_mbufs(struct rx_ring *, int); 188 189static void igb_register_vlan(void *, struct ifnet *, u16); 190static void igb_unregister_vlan(void *, struct ifnet *, u16); 191static void igb_setup_vlan_hw_support(struct adapter *); 192 193static int igb_xmit(struct tx_ring *, struct mbuf **); 194static int igb_dma_malloc(struct adapter *, bus_size_t, 195 struct igb_dma_alloc *, int); 196static void igb_dma_free(struct adapter *, struct igb_dma_alloc *); 197static int igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS); 198static void igb_print_nvm_info(struct adapter *); 199static int igb_is_valid_ether_addr(u8 *); 200static void igb_add_hw_stats(struct adapter *); 201 202static void igb_vf_init_stats(struct adapter *); 203static void igb_update_vf_stats_counters(struct adapter *); 204 205/* Management and WOL Support */ 206static void igb_init_manageability(struct adapter *); 207static void igb_release_manageability(struct adapter *); 208static void igb_get_hw_control(struct adapter *); 209static void igb_release_hw_control(struct adapter *); 210static void igb_enable_wakeup(device_t); 211static void igb_led_func(void *, int); 212 213static int igb_irq_fast(void *); 214static void igb_msix_que(void *); 215static void igb_msix_link(void *); 216static void igb_handle_que(void *context, int pending); 217static void igb_handle_link(void *context, int pending); 218static void igb_handle_link_locked(struct adapter *); 219 220static void igb_set_sysctl_value(struct adapter *, const char *, 221 const char *, int *, int); 222static int igb_set_flowcntl(SYSCTL_HANDLER_ARGS); 223static int igb_sysctl_dmac(SYSCTL_HANDLER_ARGS); 224static int igb_sysctl_eee(SYSCTL_HANDLER_ARGS); 225 226#ifdef DEVICE_POLLING 227static poll_handler_t igb_poll; 228#endif /* POLLING */ 229 230/********************************************************************* 231 * FreeBSD Device Interface Entry Points 232 *********************************************************************/ 233 234static device_method_t igb_methods[] = { 235 /* Device interface */ 236 DEVMETHOD(device_probe, igb_probe), 237 DEVMETHOD(device_attach, igb_attach), 238 DEVMETHOD(device_detach, igb_detach), 239 DEVMETHOD(device_shutdown, igb_shutdown), 240 DEVMETHOD(device_suspend, igb_suspend), 241 DEVMETHOD(device_resume, igb_resume), 242 DEVMETHOD_END 243}; 244 245static driver_t igb_driver = { 246 "igb", igb_methods, sizeof(struct adapter), 247}; 248 249static devclass_t igb_devclass; 250DRIVER_MODULE(igb, pci, igb_driver, igb_devclass, 0, 0); 251MODULE_DEPEND(igb, pci, 1, 1, 1); 252MODULE_DEPEND(igb, ether, 1, 1, 1); 253#ifdef DEV_NETMAP 254MODULE_DEPEND(igb, netmap, 1, 1, 1); 255#endif /* DEV_NETMAP */ 256 257/********************************************************************* 258 * Tunable default values. 259 *********************************************************************/ 260 261static SYSCTL_NODE(_hw, OID_AUTO, igb, CTLFLAG_RD, 0, "IGB driver parameters"); 262 263/* Descriptor defaults */ 264static int igb_rxd = IGB_DEFAULT_RXD; 265static int igb_txd = IGB_DEFAULT_TXD; 266SYSCTL_INT(_hw_igb, OID_AUTO, rxd, CTLFLAG_RDTUN, &igb_rxd, 0, 267 "Number of receive descriptors per queue"); 268SYSCTL_INT(_hw_igb, OID_AUTO, txd, CTLFLAG_RDTUN, &igb_txd, 0, 269 "Number of transmit descriptors per queue"); 270 271/* 272** AIM: Adaptive Interrupt Moderation 273** which means that the interrupt rate 274** is varied over time based on the 275** traffic for that interrupt vector 276*/ 277static int igb_enable_aim = TRUE; 278SYSCTL_INT(_hw_igb, OID_AUTO, enable_aim, CTLFLAG_RWTUN, &igb_enable_aim, 0, 279 "Enable adaptive interrupt moderation"); 280 281/* 282 * MSIX should be the default for best performance, 283 * but this allows it to be forced off for testing. 284 */ 285static int igb_enable_msix = 1; 286SYSCTL_INT(_hw_igb, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &igb_enable_msix, 0, 287 "Enable MSI-X interrupts"); 288 289/* 290** Tuneable Interrupt rate 291*/ 292static int igb_max_interrupt_rate = 8000; 293SYSCTL_INT(_hw_igb, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, 294 &igb_max_interrupt_rate, 0, "Maximum interrupts per second"); 295 296#ifndef IGB_LEGACY_TX 297/* 298** Tuneable number of buffers in the buf-ring (drbr_xxx) 299*/ 300static int igb_buf_ring_size = IGB_BR_SIZE; 301SYSCTL_INT(_hw_igb, OID_AUTO, buf_ring_size, CTLFLAG_RDTUN, 302 &igb_buf_ring_size, 0, "Size of the bufring"); 303#endif 304 305/* 306** Header split causes the packet header to 307** be dma'd to a separate mbuf from the payload. 308** this can have memory alignment benefits. But 309** another plus is that small packets often fit 310** into the header and thus use no cluster. Its 311** a very workload dependent type feature. 312*/ 313static int igb_header_split = FALSE; 314SYSCTL_INT(_hw_igb, OID_AUTO, header_split, CTLFLAG_RDTUN, &igb_header_split, 0, 315 "Enable receive mbuf header split"); 316 317/* 318** This will autoconfigure based on the 319** number of CPUs and max supported 320** MSIX messages if left at 0. 321*/ 322static int igb_num_queues = 0; 323SYSCTL_INT(_hw_igb, OID_AUTO, num_queues, CTLFLAG_RDTUN, &igb_num_queues, 0, 324 "Number of queues to configure, 0 indicates autoconfigure"); 325 326/* 327** Global variable to store last used CPU when binding queues 328** to CPUs in igb_allocate_msix. Starts at CPU_FIRST and increments when a 329** queue is bound to a cpu. 330*/ 331static int igb_last_bind_cpu = -1; 332 333/* How many packets rxeof tries to clean at a time */ 334static int igb_rx_process_limit = 100; 335SYSCTL_INT(_hw_igb, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, 336 &igb_rx_process_limit, 0, 337 "Maximum number of received packets to process at a time, -1 means unlimited"); 338 339/* How many packets txeof tries to clean at a time */ 340static int igb_tx_process_limit = -1; 341SYSCTL_INT(_hw_igb, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, 342 &igb_tx_process_limit, 0, 343 "Maximum number of sent packets to process at a time, -1 means unlimited"); 344 345#ifdef DEV_NETMAP /* see ixgbe.c for details */ 346#include <dev/netmap/if_igb_netmap.h> 347#endif /* DEV_NETMAP */ 348/********************************************************************* 349 * Device identification routine 350 * 351 * igb_probe determines if the driver should be loaded on 352 * adapter based on PCI vendor/device id of the adapter. 353 * 354 * return BUS_PROBE_DEFAULT on success, positive on failure 355 *********************************************************************/ 356 357static int 358igb_probe(device_t dev) 359{ 360 char adapter_name[256]; 361 uint16_t pci_vendor_id = 0; 362 uint16_t pci_device_id = 0; 363 uint16_t pci_subvendor_id = 0; 364 uint16_t pci_subdevice_id = 0; 365 igb_vendor_info_t *ent; 366 367 INIT_DEBUGOUT("igb_probe: begin"); 368 369 pci_vendor_id = pci_get_vendor(dev); 370 if (pci_vendor_id != IGB_INTEL_VENDOR_ID) 371 return (ENXIO); 372 373 pci_device_id = pci_get_device(dev); 374 pci_subvendor_id = pci_get_subvendor(dev); 375 pci_subdevice_id = pci_get_subdevice(dev); 376 377 ent = igb_vendor_info_array; 378 while (ent->vendor_id != 0) { 379 if ((pci_vendor_id == ent->vendor_id) && 380 (pci_device_id == ent->device_id) && 381 382 ((pci_subvendor_id == ent->subvendor_id) || 383 (ent->subvendor_id == 0)) && 384 385 ((pci_subdevice_id == ent->subdevice_id) || 386 (ent->subdevice_id == 0))) { 387 sprintf(adapter_name, "%s, Version - %s", 388 igb_strings[ent->index], 389 igb_driver_version); 390 device_set_desc_copy(dev, adapter_name); 391 return (BUS_PROBE_DEFAULT); 392 } 393 ent++; 394 } 395 return (ENXIO); 396} 397 398/********************************************************************* 399 * Device initialization routine 400 * 401 * The attach entry point is called when the driver is being loaded. 402 * This routine identifies the type of hardware, allocates all resources 403 * and initializes the hardware. 404 * 405 * return 0 on success, positive on failure 406 *********************************************************************/ 407 408static int 409igb_attach(device_t dev) 410{ 411 struct adapter *adapter; 412 int error = 0; 413 u16 eeprom_data; 414 415 INIT_DEBUGOUT("igb_attach: begin"); 416 417 if (resource_disabled("igb", device_get_unit(dev))) { 418 device_printf(dev, "Disabled by device hint\n"); 419 return (ENXIO); 420 } 421 422 adapter = device_get_softc(dev); 423 adapter->dev = adapter->osdep.dev = dev; 424 IGB_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); 425 426 /* SYSCTLs */ 427 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 428 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 429 OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0, 430 igb_sysctl_nvm_info, "I", "NVM Information"); 431 432 igb_set_sysctl_value(adapter, "enable_aim", 433 "Interrupt Moderation", &adapter->enable_aim, 434 igb_enable_aim); 435 436 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 437 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 438 OID_AUTO, "fc", CTLTYPE_INT|CTLFLAG_RW, 439 adapter, 0, igb_set_flowcntl, "I", "Flow Control"); 440 441 callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); 442 443 /* Determine hardware and mac info */ 444 igb_identify_hardware(adapter); 445 446 /* Setup PCI resources */ 447 if (igb_allocate_pci_resources(adapter)) { 448 device_printf(dev, "Allocation of PCI resources failed\n"); 449 error = ENXIO; 450 goto err_pci; 451 } 452 453 /* Do Shared Code initialization */ 454 if (e1000_setup_init_funcs(&adapter->hw, TRUE)) { 455 device_printf(dev, "Setup of Shared code failed\n"); 456 error = ENXIO; 457 goto err_pci; 458 } 459 460 e1000_get_bus_info(&adapter->hw); 461 462 /* Sysctls for limiting the amount of work done in the taskqueues */ 463 igb_set_sysctl_value(adapter, "rx_processing_limit", 464 "max number of rx packets to process", 465 &adapter->rx_process_limit, igb_rx_process_limit); 466 467 igb_set_sysctl_value(adapter, "tx_processing_limit", 468 "max number of tx packets to process", 469 &adapter->tx_process_limit, igb_tx_process_limit); 470 471 /* 472 * Validate number of transmit and receive descriptors. It 473 * must not exceed hardware maximum, and must be multiple 474 * of E1000_DBA_ALIGN. 475 */ 476 if (((igb_txd * sizeof(struct e1000_tx_desc)) % IGB_DBA_ALIGN) != 0 || 477 (igb_txd > IGB_MAX_TXD) || (igb_txd < IGB_MIN_TXD)) { 478 device_printf(dev, "Using %d TX descriptors instead of %d!\n", 479 IGB_DEFAULT_TXD, igb_txd); 480 adapter->num_tx_desc = IGB_DEFAULT_TXD; 481 } else 482 adapter->num_tx_desc = igb_txd; 483 if (((igb_rxd * sizeof(struct e1000_rx_desc)) % IGB_DBA_ALIGN) != 0 || 484 (igb_rxd > IGB_MAX_RXD) || (igb_rxd < IGB_MIN_RXD)) { 485 device_printf(dev, "Using %d RX descriptors instead of %d!\n", 486 IGB_DEFAULT_RXD, igb_rxd); 487 adapter->num_rx_desc = IGB_DEFAULT_RXD; 488 } else 489 adapter->num_rx_desc = igb_rxd; 490 491 adapter->hw.mac.autoneg = DO_AUTO_NEG; 492 adapter->hw.phy.autoneg_wait_to_complete = FALSE; 493 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 494 495 /* Copper options */ 496 if (adapter->hw.phy.media_type == e1000_media_type_copper) { 497 adapter->hw.phy.mdix = AUTO_ALL_MODES; 498 adapter->hw.phy.disable_polarity_correction = FALSE; 499 adapter->hw.phy.ms_type = IGB_MASTER_SLAVE; 500 } 501 502 /* 503 * Set the frame limits assuming 504 * standard ethernet sized frames. 505 */ 506 adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE; 507 508 /* 509 ** Allocate and Setup Queues 510 */ 511 if (igb_allocate_queues(adapter)) { 512 error = ENOMEM; 513 goto err_pci; 514 } 515 516 /* Allocate the appropriate stats memory */ 517 if (adapter->vf_ifp) { 518 adapter->stats = 519 (struct e1000_vf_stats *)malloc(sizeof \ 520 (struct e1000_vf_stats), M_DEVBUF, M_NOWAIT | M_ZERO); 521 igb_vf_init_stats(adapter); 522 } else 523 adapter->stats = 524 (struct e1000_hw_stats *)malloc(sizeof \ 525 (struct e1000_hw_stats), M_DEVBUF, M_NOWAIT | M_ZERO); 526 if (adapter->stats == NULL) { 527 device_printf(dev, "Can not allocate stats memory\n"); 528 error = ENOMEM; 529 goto err_late; 530 } 531 532 /* Allocate multicast array memory. */ 533 adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN * 534 MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT); 535 if (adapter->mta == NULL) { 536 device_printf(dev, "Can not allocate multicast setup array\n"); 537 error = ENOMEM; 538 goto err_late; 539 } 540 541 /* Some adapter-specific advanced features */ 542 if (adapter->hw.mac.type >= e1000_i350) { 543 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 544 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 545 OID_AUTO, "dmac", CTLTYPE_INT|CTLFLAG_RW, 546 adapter, 0, igb_sysctl_dmac, "I", "DMA Coalesce"); 547 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), 548 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), 549 OID_AUTO, "eee_disabled", CTLTYPE_INT|CTLFLAG_RW, 550 adapter, 0, igb_sysctl_eee, "I", 551 "Disable Energy Efficient Ethernet"); 552 if (adapter->hw.phy.media_type == e1000_media_type_copper) { 553 if (adapter->hw.mac.type == e1000_i354) 554 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); 555 else 556 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); 557 } 558 } 559 560 /* 561 ** Start from a known state, this is 562 ** important in reading the nvm and 563 ** mac from that. 564 */ 565 e1000_reset_hw(&adapter->hw); 566 567 /* Make sure we have a good EEPROM before we read from it */ 568 if (((adapter->hw.mac.type != e1000_i210) && 569 (adapter->hw.mac.type != e1000_i211)) && 570 (e1000_validate_nvm_checksum(&adapter->hw) < 0)) { 571 /* 572 ** Some PCI-E parts fail the first check due to 573 ** the link being in sleep state, call it again, 574 ** if it fails a second time its a real issue. 575 */ 576 if (e1000_validate_nvm_checksum(&adapter->hw) < 0) { 577 device_printf(dev, 578 "The EEPROM Checksum Is Not Valid\n"); 579 error = EIO; 580 goto err_late; 581 } 582 } 583 584 /* 585 ** Copy the permanent MAC address out of the EEPROM 586 */ 587 if (e1000_read_mac_addr(&adapter->hw) < 0) { 588 device_printf(dev, "EEPROM read error while reading MAC" 589 " address\n"); 590 error = EIO; 591 goto err_late; 592 } 593 /* Check its sanity */ 594 if (!igb_is_valid_ether_addr(adapter->hw.mac.addr)) { 595 device_printf(dev, "Invalid MAC address\n"); 596 error = EIO; 597 goto err_late; 598 } 599 600 /* Setup OS specific network interface */ 601 if (igb_setup_interface(dev, adapter) != 0) 602 goto err_late; 603 604 /* Now get a good starting state */ 605 igb_reset(adapter); 606 607 /* Initialize statistics */ 608 igb_update_stats_counters(adapter); 609 610 adapter->hw.mac.get_link_status = 1; 611 igb_update_link_status(adapter); 612 613 /* Indicate SOL/IDER usage */ 614 if (e1000_check_reset_block(&adapter->hw)) 615 device_printf(dev, 616 "PHY reset is blocked due to SOL/IDER session.\n"); 617 618 /* Determine if we have to control management hardware */ 619 adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw); 620 621 /* 622 * Setup Wake-on-Lan 623 */ 624 /* APME bit in EEPROM is mapped to WUC.APME */ 625 eeprom_data = E1000_READ_REG(&adapter->hw, E1000_WUC) & E1000_WUC_APME; 626 if (eeprom_data) 627 adapter->wol = E1000_WUFC_MAG; 628 629 /* Register for VLAN events */ 630 adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 631 igb_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); 632 adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 633 igb_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); 634 635 igb_add_hw_stats(adapter); 636 637 /* Tell the stack that the interface is not active */ 638 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 639 adapter->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 640 641 adapter->led_dev = led_create(igb_led_func, adapter, 642 device_get_nameunit(dev)); 643 644 /* 645 ** Configure Interrupts 646 */ 647 if ((adapter->msix > 1) && (igb_enable_msix)) 648 error = igb_allocate_msix(adapter); 649 else /* MSI or Legacy */ 650 error = igb_allocate_legacy(adapter); 651 if (error) 652 goto err_late; 653 654#ifdef DEV_NETMAP 655 igb_netmap_attach(adapter); 656#endif /* DEV_NETMAP */ 657 INIT_DEBUGOUT("igb_attach: end"); 658 659 return (0); 660 661err_late: 662 if (igb_detach(dev) == 0) /* igb_detach() already did the cleanup */ 663 return(error); 664 igb_free_transmit_structures(adapter); 665 igb_free_receive_structures(adapter); 666 igb_release_hw_control(adapter); 667err_pci: 668 igb_free_pci_resources(adapter); 669 if (adapter->ifp != NULL) 670 if_free(adapter->ifp); 671 free(adapter->mta, M_DEVBUF); 672 IGB_CORE_LOCK_DESTROY(adapter); 673 674 return (error); 675} 676 677/********************************************************************* 678 * Device removal routine 679 * 680 * The detach entry point is called when the driver is being removed. 681 * This routine stops the adapter and deallocates all the resources 682 * that were allocated for driver operation. 683 * 684 * return 0 on success, positive on failure 685 *********************************************************************/ 686 687static int 688igb_detach(device_t dev) 689{ 690 struct adapter *adapter = device_get_softc(dev); 691 struct ifnet *ifp = adapter->ifp; 692 693 INIT_DEBUGOUT("igb_detach: begin"); 694 695 /* Make sure VLANS are not using driver */ 696 if (adapter->ifp->if_vlantrunk != NULL) { 697 device_printf(dev,"Vlan in use, detach first\n"); 698 return (EBUSY); 699 } 700 701 ether_ifdetach(adapter->ifp); 702 703 if (adapter->led_dev != NULL) 704 led_destroy(adapter->led_dev); 705 706#ifdef DEVICE_POLLING 707 if (ifp->if_capenable & IFCAP_POLLING) 708 ether_poll_deregister(ifp); 709#endif 710 711 IGB_CORE_LOCK(adapter); 712 adapter->in_detach = 1; 713 igb_stop(adapter); 714 IGB_CORE_UNLOCK(adapter); 715 716 e1000_phy_hw_reset(&adapter->hw); 717 718 /* Give control back to firmware */ 719 igb_release_manageability(adapter); 720 igb_release_hw_control(adapter); 721 722 if (adapter->wol) { 723 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); 724 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); 725 igb_enable_wakeup(dev); 726 } 727 728 /* Unregister VLAN events */ 729 if (adapter->vlan_attach != NULL) 730 EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); 731 if (adapter->vlan_detach != NULL) 732 EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); 733 734 callout_drain(&adapter->timer); 735 736#ifdef DEV_NETMAP 737 netmap_detach(adapter->ifp); 738#endif /* DEV_NETMAP */ 739 igb_free_pci_resources(adapter); 740 bus_generic_detach(dev); 741 if_free(ifp); 742 743 igb_free_transmit_structures(adapter); 744 igb_free_receive_structures(adapter); 745 if (adapter->mta != NULL) 746 free(adapter->mta, M_DEVBUF); 747 748 IGB_CORE_LOCK_DESTROY(adapter); 749 750 return (0); 751} 752 753/********************************************************************* 754 * 755 * Shutdown entry point 756 * 757 **********************************************************************/ 758 759static int 760igb_shutdown(device_t dev) 761{ 762 return igb_suspend(dev); 763} 764 765/* 766 * Suspend/resume device methods. 767 */ 768static int 769igb_suspend(device_t dev) 770{ 771 struct adapter *adapter = device_get_softc(dev); 772 773 IGB_CORE_LOCK(adapter); 774 775 igb_stop(adapter); 776 777 igb_release_manageability(adapter); 778 igb_release_hw_control(adapter); 779 780 if (adapter->wol) { 781 E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN); 782 E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol); 783 igb_enable_wakeup(dev); 784 } 785 786 IGB_CORE_UNLOCK(adapter); 787 788 return bus_generic_suspend(dev); 789} 790 791static int 792igb_resume(device_t dev) 793{ 794 struct adapter *adapter = device_get_softc(dev); 795 struct tx_ring *txr = adapter->tx_rings; 796 struct ifnet *ifp = adapter->ifp; 797 798 IGB_CORE_LOCK(adapter); 799 igb_init_locked(adapter); 800 igb_init_manageability(adapter); 801 802 if ((ifp->if_flags & IFF_UP) && 803 (ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { 804 for (int i = 0; i < adapter->num_queues; i++, txr++) { 805 IGB_TX_LOCK(txr); 806#ifndef IGB_LEGACY_TX 807 /* Process the stack queue only if not depleted */ 808 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && 809 !drbr_empty(ifp, txr->br)) 810 igb_mq_start_locked(ifp, txr); 811#else 812 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 813 igb_start_locked(txr, ifp); 814#endif 815 IGB_TX_UNLOCK(txr); 816 } 817 } 818 IGB_CORE_UNLOCK(adapter); 819 820 return bus_generic_resume(dev); 821} 822 823 824#ifdef IGB_LEGACY_TX 825 826/********************************************************************* 827 * Transmit entry point 828 * 829 * igb_start is called by the stack to initiate a transmit. 830 * The driver will remain in this routine as long as there are 831 * packets to transmit and transmit resources are available. 832 * In case resources are not available stack is notified and 833 * the packet is requeued. 834 **********************************************************************/ 835 836static void 837igb_start_locked(struct tx_ring *txr, struct ifnet *ifp) 838{ 839 struct adapter *adapter = ifp->if_softc; 840 struct mbuf *m_head; 841 842 IGB_TX_LOCK_ASSERT(txr); 843 844 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 845 IFF_DRV_RUNNING) 846 return; 847 if (!adapter->link_active) 848 return; 849 850 /* Call cleanup if number of TX descriptors low */ 851 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) 852 igb_txeof(txr); 853 854 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 855 if (txr->tx_avail <= IGB_MAX_SCATTER) { 856 txr->queue_status |= IGB_QUEUE_DEPLETED; 857 break; 858 } 859 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); 860 if (m_head == NULL) 861 break; 862 /* 863 * Encapsulation can modify our pointer, and or make it 864 * NULL on failure. In that event, we can't requeue. 865 */ 866 if (igb_xmit(txr, &m_head)) { 867 if (m_head != NULL) 868 IFQ_DRV_PREPEND(&ifp->if_snd, m_head); 869 if (txr->tx_avail <= IGB_MAX_SCATTER) 870 txr->queue_status |= IGB_QUEUE_DEPLETED; 871 break; 872 } 873 874 /* Send a copy of the frame to the BPF listener */ 875 ETHER_BPF_MTAP(ifp, m_head); 876 877 /* Set watchdog on */ 878 txr->watchdog_time = ticks; 879 txr->queue_status |= IGB_QUEUE_WORKING; 880 } 881} 882 883/* 884 * Legacy TX driver routine, called from the 885 * stack, always uses tx[0], and spins for it. 886 * Should not be used with multiqueue tx 887 */ 888static void 889igb_start(struct ifnet *ifp) 890{ 891 struct adapter *adapter = ifp->if_softc; 892 struct tx_ring *txr = adapter->tx_rings; 893 894 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 895 IGB_TX_LOCK(txr); 896 igb_start_locked(txr, ifp); 897 IGB_TX_UNLOCK(txr); 898 } 899 return; 900} 901 902#else /* ~IGB_LEGACY_TX */ 903 904/* 905** Multiqueue Transmit Entry: 906** quick turnaround to the stack 907** 908*/ 909static int 910igb_mq_start(struct ifnet *ifp, struct mbuf *m) 911{ 912 struct adapter *adapter = ifp->if_softc; 913 struct igb_queue *que; 914 struct tx_ring *txr; 915 int i, err = 0; 916#ifdef RSS 917 uint32_t bucket_id; 918#endif 919 920 /* Which queue to use */ 921 /* 922 * When doing RSS, map it to the same outbound queue 923 * as the incoming flow would be mapped to. 924 * 925 * If everything is setup correctly, it should be the 926 * same bucket that the current CPU we're on is. 927 */ 928 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 929#ifdef RSS 930 if (rss_hash2bucket(m->m_pkthdr.flowid, 931 M_HASHTYPE_GET(m), &bucket_id) == 0) { 932 /* XXX TODO: spit out something if bucket_id > num_queues? */ 933 i = bucket_id % adapter->num_queues; 934 } else { 935#endif 936 i = m->m_pkthdr.flowid % adapter->num_queues; 937#ifdef RSS 938 } 939#endif 940 } else { 941 i = curcpu % adapter->num_queues; 942 } 943 txr = &adapter->tx_rings[i]; 944 que = &adapter->queues[i]; 945 946 err = drbr_enqueue(ifp, txr->br, m); 947 if (err) 948 return (err); 949 if (IGB_TX_TRYLOCK(txr)) { 950 igb_mq_start_locked(ifp, txr); 951 IGB_TX_UNLOCK(txr); 952 } else 953 taskqueue_enqueue(que->tq, &txr->txq_task); 954 955 return (0); 956} 957 958static int 959igb_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr) 960{ 961 struct adapter *adapter = txr->adapter; 962 struct mbuf *next; 963 int err = 0, enq = 0; 964 965 IGB_TX_LOCK_ASSERT(txr); 966 967 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) || 968 adapter->link_active == 0) 969 return (ENETDOWN); 970 971 /* Process the queue */ 972 while ((next = drbr_peek(ifp, txr->br)) != NULL) { 973 if ((err = igb_xmit(txr, &next)) != 0) { 974 if (next == NULL) { 975 /* It was freed, move forward */ 976 drbr_advance(ifp, txr->br); 977 } else { 978 /* 979 * Still have one left, it may not be 980 * the same since the transmit function 981 * may have changed it. 982 */ 983 drbr_putback(ifp, txr->br, next); 984 } 985 break; 986 } 987 drbr_advance(ifp, txr->br); 988 enq++; 989 if (next->m_flags & M_MCAST && adapter->vf_ifp) 990 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); 991 ETHER_BPF_MTAP(ifp, next); 992 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 993 break; 994 } 995 if (enq > 0) { 996 /* Set the watchdog */ 997 txr->queue_status |= IGB_QUEUE_WORKING; 998 txr->watchdog_time = ticks; 999 } 1000 if (txr->tx_avail <= IGB_TX_CLEANUP_THRESHOLD) 1001 igb_txeof(txr); 1002 if (txr->tx_avail <= IGB_MAX_SCATTER) 1003 txr->queue_status |= IGB_QUEUE_DEPLETED; 1004 return (err); 1005} 1006 1007/* 1008 * Called from a taskqueue to drain queued transmit packets. 1009 */ 1010static void 1011igb_deferred_mq_start(void *arg, int pending) 1012{ 1013 struct tx_ring *txr = arg; 1014 struct adapter *adapter = txr->adapter; 1015 struct ifnet *ifp = adapter->ifp; 1016 1017 IGB_TX_LOCK(txr); 1018 if (!drbr_empty(ifp, txr->br)) 1019 igb_mq_start_locked(ifp, txr); 1020 IGB_TX_UNLOCK(txr); 1021} 1022 1023/* 1024** Flush all ring buffers 1025*/ 1026static void 1027igb_qflush(struct ifnet *ifp) 1028{ 1029 struct adapter *adapter = ifp->if_softc; 1030 struct tx_ring *txr = adapter->tx_rings; 1031 struct mbuf *m; 1032 1033 for (int i = 0; i < adapter->num_queues; i++, txr++) { 1034 IGB_TX_LOCK(txr); 1035 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL) 1036 m_freem(m); 1037 IGB_TX_UNLOCK(txr); 1038 } 1039 if_qflush(ifp); 1040} 1041#endif /* ~IGB_LEGACY_TX */ 1042 1043/********************************************************************* 1044 * Ioctl entry point 1045 * 1046 * igb_ioctl is called when the user wants to configure the 1047 * interface. 1048 * 1049 * return 0 on success, positive on failure 1050 **********************************************************************/ 1051 1052static int 1053igb_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 1054{ 1055 struct adapter *adapter = ifp->if_softc; 1056 struct ifreq *ifr = (struct ifreq *)data; 1057#if defined(INET) || defined(INET6) 1058 struct ifaddr *ifa = (struct ifaddr *)data; 1059#endif 1060 bool avoid_reset = FALSE; 1061 int error = 0; 1062 1063 if (adapter->in_detach) 1064 return (error); 1065 1066 switch (command) { 1067 case SIOCSIFADDR: 1068#ifdef INET 1069 if (ifa->ifa_addr->sa_family == AF_INET) 1070 avoid_reset = TRUE; 1071#endif 1072#ifdef INET6 1073 if (ifa->ifa_addr->sa_family == AF_INET6) 1074 avoid_reset = TRUE; 1075#endif 1076 /* 1077 ** Calling init results in link renegotiation, 1078 ** so we avoid doing it when possible. 1079 */ 1080 if (avoid_reset) { 1081 ifp->if_flags |= IFF_UP; 1082 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 1083 igb_init(adapter); 1084#ifdef INET 1085 if (!(ifp->if_flags & IFF_NOARP)) 1086 arp_ifinit(ifp, ifa); 1087#endif 1088 } else 1089 error = ether_ioctl(ifp, command, data); 1090 break; 1091 case SIOCSIFMTU: 1092 { 1093 int max_frame_size; 1094 1095 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)"); 1096 1097 IGB_CORE_LOCK(adapter); 1098 max_frame_size = 9234; 1099 if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN - 1100 ETHER_CRC_LEN) { 1101 IGB_CORE_UNLOCK(adapter); 1102 error = EINVAL; 1103 break; 1104 } 1105 1106 ifp->if_mtu = ifr->ifr_mtu; 1107 adapter->max_frame_size = 1108 ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; 1109 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) 1110 igb_init_locked(adapter); 1111 IGB_CORE_UNLOCK(adapter); 1112 break; 1113 } 1114 case SIOCSIFFLAGS: 1115 IOCTL_DEBUGOUT("ioctl rcv'd:\ 1116 SIOCSIFFLAGS (Set Interface Flags)"); 1117 IGB_CORE_LOCK(adapter); 1118 if (ifp->if_flags & IFF_UP) { 1119 if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1120 if ((ifp->if_flags ^ adapter->if_flags) & 1121 (IFF_PROMISC | IFF_ALLMULTI)) { 1122 igb_disable_promisc(adapter); 1123 igb_set_promisc(adapter); 1124 } 1125 } else 1126 igb_init_locked(adapter); 1127 } else 1128 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1129 igb_stop(adapter); 1130 adapter->if_flags = ifp->if_flags; 1131 IGB_CORE_UNLOCK(adapter); 1132 break; 1133 case SIOCADDMULTI: 1134 case SIOCDELMULTI: 1135 IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI"); 1136 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1137 IGB_CORE_LOCK(adapter); 1138 igb_disable_intr(adapter); 1139 igb_set_multi(adapter); 1140#ifdef DEVICE_POLLING 1141 if (!(ifp->if_capenable & IFCAP_POLLING)) 1142#endif 1143 igb_enable_intr(adapter); 1144 IGB_CORE_UNLOCK(adapter); 1145 } 1146 break; 1147 case SIOCSIFMEDIA: 1148 /* Check SOL/IDER usage */ 1149 IGB_CORE_LOCK(adapter); 1150 if (e1000_check_reset_block(&adapter->hw)) { 1151 IGB_CORE_UNLOCK(adapter); 1152 device_printf(adapter->dev, "Media change is" 1153 " blocked due to SOL/IDER session.\n"); 1154 break; 1155 } 1156 IGB_CORE_UNLOCK(adapter); 1157 case SIOCGIFMEDIA: 1158 IOCTL_DEBUGOUT("ioctl rcv'd: \ 1159 SIOCxIFMEDIA (Get/Set Interface Media)"); 1160 error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); 1161 break; 1162 case SIOCSIFCAP: 1163 { 1164 int mask, reinit; 1165 1166 IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)"); 1167 reinit = 0; 1168 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1169#ifdef DEVICE_POLLING 1170 if (mask & IFCAP_POLLING) { 1171 if (ifr->ifr_reqcap & IFCAP_POLLING) { 1172 error = ether_poll_register(igb_poll, ifp); 1173 if (error) 1174 return (error); 1175 IGB_CORE_LOCK(adapter); 1176 igb_disable_intr(adapter); 1177 ifp->if_capenable |= IFCAP_POLLING; 1178 IGB_CORE_UNLOCK(adapter); 1179 } else { 1180 error = ether_poll_deregister(ifp); 1181 /* Enable interrupt even in error case */ 1182 IGB_CORE_LOCK(adapter); 1183 igb_enable_intr(adapter); 1184 ifp->if_capenable &= ~IFCAP_POLLING; 1185 IGB_CORE_UNLOCK(adapter); 1186 } 1187 } 1188#endif 1189#if __FreeBSD_version >= 1000000 1190 /* HW cannot turn these on/off separately */ 1191 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 1192 ifp->if_capenable ^= IFCAP_RXCSUM; 1193 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 1194 reinit = 1; 1195 } 1196 if (mask & IFCAP_TXCSUM) { 1197 ifp->if_capenable ^= IFCAP_TXCSUM; 1198 reinit = 1; 1199 } 1200 if (mask & IFCAP_TXCSUM_IPV6) { 1201 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 1202 reinit = 1; 1203 } 1204#else 1205 if (mask & IFCAP_HWCSUM) { 1206 ifp->if_capenable ^= IFCAP_HWCSUM; 1207 reinit = 1; 1208 } 1209#endif 1210 if (mask & IFCAP_TSO4) { 1211 ifp->if_capenable ^= IFCAP_TSO4; 1212 reinit = 1; 1213 } 1214 if (mask & IFCAP_TSO6) { 1215 ifp->if_capenable ^= IFCAP_TSO6; 1216 reinit = 1; 1217 } 1218 if (mask & IFCAP_VLAN_HWTAGGING) { 1219 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 1220 reinit = 1; 1221 } 1222 if (mask & IFCAP_VLAN_HWFILTER) { 1223 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; 1224 reinit = 1; 1225 } 1226 if (mask & IFCAP_VLAN_HWTSO) { 1227 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 1228 reinit = 1; 1229 } 1230 if (mask & IFCAP_LRO) { 1231 ifp->if_capenable ^= IFCAP_LRO; 1232 reinit = 1; 1233 } 1234 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) 1235 igb_init(adapter); 1236 VLAN_CAPABILITIES(ifp); 1237 break; 1238 } 1239 1240 default: 1241 error = ether_ioctl(ifp, command, data); 1242 break; 1243 } 1244 1245 return (error); 1246} 1247 1248 1249/********************************************************************* 1250 * Init entry point 1251 * 1252 * This routine is used in two ways. It is used by the stack as 1253 * init entry point in network interface structure. It is also used 1254 * by the driver as a hw/sw initialization routine to get to a 1255 * consistent state. 1256 * 1257 * return 0 on success, positive on failure 1258 **********************************************************************/ 1259 1260static void 1261igb_init_locked(struct adapter *adapter) 1262{ 1263 struct ifnet *ifp = adapter->ifp; 1264 device_t dev = adapter->dev; 1265 1266 INIT_DEBUGOUT("igb_init: begin"); 1267 1268 IGB_CORE_LOCK_ASSERT(adapter); 1269 1270 igb_disable_intr(adapter); 1271 callout_stop(&adapter->timer); 1272 1273 /* Get the latest mac address, User can use a LAA */ 1274 bcopy(IF_LLADDR(adapter->ifp), adapter->hw.mac.addr, 1275 ETHER_ADDR_LEN); 1276 1277 /* Put the address into the Receive Address Array */ 1278 e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0); 1279 1280 igb_reset(adapter); 1281 igb_update_link_status(adapter); 1282 1283 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); 1284 1285 /* Set hardware offload abilities */ 1286 ifp->if_hwassist = 0; 1287 if (ifp->if_capenable & IFCAP_TXCSUM) { 1288#if __FreeBSD_version >= 1000000 1289 ifp->if_hwassist |= (CSUM_IP_TCP | CSUM_IP_UDP); 1290 if (adapter->hw.mac.type != e1000_82575) 1291 ifp->if_hwassist |= CSUM_IP_SCTP; 1292#else 1293 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 1294#if __FreeBSD_version >= 800000 1295 if (adapter->hw.mac.type != e1000_82575) 1296 ifp->if_hwassist |= CSUM_SCTP; 1297#endif 1298#endif 1299 } 1300 1301#if __FreeBSD_version >= 1000000 1302 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { 1303 ifp->if_hwassist |= (CSUM_IP6_TCP | CSUM_IP6_UDP); 1304 if (adapter->hw.mac.type != e1000_82575) 1305 ifp->if_hwassist |= CSUM_IP6_SCTP; 1306 } 1307#endif 1308 if (ifp->if_capenable & IFCAP_TSO) 1309 ifp->if_hwassist |= CSUM_TSO; 1310 1311 /* Clear bad data from Rx FIFOs */ 1312 e1000_rx_fifo_flush_82575(&adapter->hw); 1313 1314 /* Configure for OS presence */ 1315 igb_init_manageability(adapter); 1316 1317 /* Prepare transmit descriptors and buffers */ 1318 igb_setup_transmit_structures(adapter); 1319 igb_initialize_transmit_units(adapter); 1320 1321 /* Setup Multicast table */ 1322 igb_set_multi(adapter); 1323 1324 /* 1325 ** Figure out the desired mbuf pool 1326 ** for doing jumbo/packetsplit 1327 */ 1328 if (adapter->max_frame_size <= 2048) 1329 adapter->rx_mbuf_sz = MCLBYTES; 1330 else if (adapter->max_frame_size <= 4096) 1331 adapter->rx_mbuf_sz = MJUMPAGESIZE; 1332 else 1333 adapter->rx_mbuf_sz = MJUM9BYTES; 1334 1335 /* Prepare receive descriptors and buffers */ 1336 if (igb_setup_receive_structures(adapter)) { 1337 device_printf(dev, "Could not setup receive structures\n"); 1338 return; 1339 } 1340 igb_initialize_receive_units(adapter); 1341 1342 /* Enable VLAN support */ 1343 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) 1344 igb_setup_vlan_hw_support(adapter); 1345 1346 /* Don't lose promiscuous settings */ 1347 igb_set_promisc(adapter); 1348 1349 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1350 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1351 1352 callout_reset(&adapter->timer, hz, igb_local_timer, adapter); 1353 e1000_clear_hw_cntrs_base_generic(&adapter->hw); 1354 1355 if (adapter->msix > 1) /* Set up queue routing */ 1356 igb_configure_queues(adapter); 1357 1358 /* this clears any pending interrupts */ 1359 E1000_READ_REG(&adapter->hw, E1000_ICR); 1360#ifdef DEVICE_POLLING 1361 /* 1362 * Only enable interrupts if we are not polling, make sure 1363 * they are off otherwise. 1364 */ 1365 if (ifp->if_capenable & IFCAP_POLLING) 1366 igb_disable_intr(adapter); 1367 else 1368#endif /* DEVICE_POLLING */ 1369 { 1370 igb_enable_intr(adapter); 1371 E1000_WRITE_REG(&adapter->hw, E1000_ICS, E1000_ICS_LSC); 1372 } 1373 1374 /* Set Energy Efficient Ethernet */ 1375 if (adapter->hw.phy.media_type == e1000_media_type_copper) { 1376 if (adapter->hw.mac.type == e1000_i354) 1377 e1000_set_eee_i354(&adapter->hw, TRUE, TRUE); 1378 else 1379 e1000_set_eee_i350(&adapter->hw, TRUE, TRUE); 1380 } 1381} 1382 1383static void 1384igb_init(void *arg) 1385{ 1386 struct adapter *adapter = arg; 1387 1388 IGB_CORE_LOCK(adapter); 1389 igb_init_locked(adapter); 1390 IGB_CORE_UNLOCK(adapter); 1391} 1392 1393 1394static void 1395igb_handle_que(void *context, int pending) 1396{ 1397 struct igb_queue *que = context; 1398 struct adapter *adapter = que->adapter; 1399 struct tx_ring *txr = que->txr; 1400 struct ifnet *ifp = adapter->ifp; 1401 1402 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1403 bool more; 1404 1405 more = igb_rxeof(que, adapter->rx_process_limit, NULL); 1406 1407 IGB_TX_LOCK(txr); 1408 igb_txeof(txr); 1409#ifndef IGB_LEGACY_TX 1410 /* Process the stack queue only if not depleted */ 1411 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && 1412 !drbr_empty(ifp, txr->br)) 1413 igb_mq_start_locked(ifp, txr); 1414#else 1415 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1416 igb_start_locked(txr, ifp); 1417#endif 1418 IGB_TX_UNLOCK(txr); 1419 /* Do we need another? */ 1420 if (more) { 1421 taskqueue_enqueue(que->tq, &que->que_task); 1422 return; 1423 } 1424 } 1425 1426#ifdef DEVICE_POLLING 1427 if (ifp->if_capenable & IFCAP_POLLING) 1428 return; 1429#endif 1430 /* Reenable this interrupt */ 1431 if (que->eims) 1432 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); 1433 else 1434 igb_enable_intr(adapter); 1435} 1436 1437/* Deal with link in a sleepable context */ 1438static void 1439igb_handle_link(void *context, int pending) 1440{ 1441 struct adapter *adapter = context; 1442 1443 IGB_CORE_LOCK(adapter); 1444 igb_handle_link_locked(adapter); 1445 IGB_CORE_UNLOCK(adapter); 1446} 1447 1448static void 1449igb_handle_link_locked(struct adapter *adapter) 1450{ 1451 struct tx_ring *txr = adapter->tx_rings; 1452 struct ifnet *ifp = adapter->ifp; 1453 1454 IGB_CORE_LOCK_ASSERT(adapter); 1455 adapter->hw.mac.get_link_status = 1; 1456 igb_update_link_status(adapter); 1457 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) && adapter->link_active) { 1458 for (int i = 0; i < adapter->num_queues; i++, txr++) { 1459 IGB_TX_LOCK(txr); 1460#ifndef IGB_LEGACY_TX 1461 /* Process the stack queue only if not depleted */ 1462 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && 1463 !drbr_empty(ifp, txr->br)) 1464 igb_mq_start_locked(ifp, txr); 1465#else 1466 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1467 igb_start_locked(txr, ifp); 1468#endif 1469 IGB_TX_UNLOCK(txr); 1470 } 1471 } 1472} 1473 1474/********************************************************************* 1475 * 1476 * MSI/Legacy Deferred 1477 * Interrupt Service routine 1478 * 1479 *********************************************************************/ 1480static int 1481igb_irq_fast(void *arg) 1482{ 1483 struct adapter *adapter = arg; 1484 struct igb_queue *que = adapter->queues; 1485 u32 reg_icr; 1486 1487 1488 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1489 1490 /* Hot eject? */ 1491 if (reg_icr == 0xffffffff) 1492 return FILTER_STRAY; 1493 1494 /* Definitely not our interrupt. */ 1495 if (reg_icr == 0x0) 1496 return FILTER_STRAY; 1497 1498 if ((reg_icr & E1000_ICR_INT_ASSERTED) == 0) 1499 return FILTER_STRAY; 1500 1501 /* 1502 * Mask interrupts until the taskqueue is finished running. This is 1503 * cheap, just assume that it is needed. This also works around the 1504 * MSI message reordering errata on certain systems. 1505 */ 1506 igb_disable_intr(adapter); 1507 taskqueue_enqueue(que->tq, &que->que_task); 1508 1509 /* Link status change */ 1510 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) 1511 taskqueue_enqueue(que->tq, &adapter->link_task); 1512 1513 if (reg_icr & E1000_ICR_RXO) 1514 adapter->rx_overruns++; 1515 return FILTER_HANDLED; 1516} 1517 1518#ifdef DEVICE_POLLING 1519#if __FreeBSD_version >= 800000 1520#define POLL_RETURN_COUNT(a) (a) 1521static int 1522#else 1523#define POLL_RETURN_COUNT(a) 1524static void 1525#endif 1526igb_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) 1527{ 1528 struct adapter *adapter = ifp->if_softc; 1529 struct igb_queue *que; 1530 struct tx_ring *txr; 1531 u32 reg_icr, rx_done = 0; 1532 u32 loop = IGB_MAX_LOOP; 1533 bool more; 1534 1535 IGB_CORE_LOCK(adapter); 1536 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 1537 IGB_CORE_UNLOCK(adapter); 1538 return POLL_RETURN_COUNT(rx_done); 1539 } 1540 1541 if (cmd == POLL_AND_CHECK_STATUS) { 1542 reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1543 /* Link status change */ 1544 if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) 1545 igb_handle_link_locked(adapter); 1546 1547 if (reg_icr & E1000_ICR_RXO) 1548 adapter->rx_overruns++; 1549 } 1550 IGB_CORE_UNLOCK(adapter); 1551 1552 for (int i = 0; i < adapter->num_queues; i++) { 1553 que = &adapter->queues[i]; 1554 txr = que->txr; 1555 1556 igb_rxeof(que, count, &rx_done); 1557 1558 IGB_TX_LOCK(txr); 1559 do { 1560 more = igb_txeof(txr); 1561 } while (loop-- && more); 1562#ifndef IGB_LEGACY_TX 1563 if (!drbr_empty(ifp, txr->br)) 1564 igb_mq_start_locked(ifp, txr); 1565#else 1566 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1567 igb_start_locked(txr, ifp); 1568#endif 1569 IGB_TX_UNLOCK(txr); 1570 } 1571 1572 return POLL_RETURN_COUNT(rx_done); 1573} 1574#endif /* DEVICE_POLLING */ 1575 1576/********************************************************************* 1577 * 1578 * MSIX Que Interrupt Service routine 1579 * 1580 **********************************************************************/ 1581static void 1582igb_msix_que(void *arg) 1583{ 1584 struct igb_queue *que = arg; 1585 struct adapter *adapter = que->adapter; 1586 struct ifnet *ifp = adapter->ifp; 1587 struct tx_ring *txr = que->txr; 1588 struct rx_ring *rxr = que->rxr; 1589 u32 newitr = 0; 1590 bool more_rx; 1591 1592 /* Ignore spurious interrupts */ 1593 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1594 return; 1595 1596 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, que->eims); 1597 ++que->irqs; 1598 1599 IGB_TX_LOCK(txr); 1600 igb_txeof(txr); 1601#ifndef IGB_LEGACY_TX 1602 /* Process the stack queue only if not depleted */ 1603 if (((txr->queue_status & IGB_QUEUE_DEPLETED) == 0) && 1604 !drbr_empty(ifp, txr->br)) 1605 igb_mq_start_locked(ifp, txr); 1606#else 1607 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 1608 igb_start_locked(txr, ifp); 1609#endif 1610 IGB_TX_UNLOCK(txr); 1611 1612 more_rx = igb_rxeof(que, adapter->rx_process_limit, NULL); 1613 1614 if (adapter->enable_aim == FALSE) 1615 goto no_calc; 1616 /* 1617 ** Do Adaptive Interrupt Moderation: 1618 ** - Write out last calculated setting 1619 ** - Calculate based on average size over 1620 ** the last interval. 1621 */ 1622 if (que->eitr_setting) 1623 E1000_WRITE_REG(&adapter->hw, 1624 E1000_EITR(que->msix), que->eitr_setting); 1625 1626 que->eitr_setting = 0; 1627 1628 /* Idle, do nothing */ 1629 if ((txr->bytes == 0) && (rxr->bytes == 0)) 1630 goto no_calc; 1631 1632 /* Used half Default if sub-gig */ 1633 if (adapter->link_speed != 1000) 1634 newitr = IGB_DEFAULT_ITR / 2; 1635 else { 1636 if ((txr->bytes) && (txr->packets)) 1637 newitr = txr->bytes/txr->packets; 1638 if ((rxr->bytes) && (rxr->packets)) 1639 newitr = max(newitr, 1640 (rxr->bytes / rxr->packets)); 1641 newitr += 24; /* account for hardware frame, crc */ 1642 /* set an upper boundary */ 1643 newitr = min(newitr, 3000); 1644 /* Be nice to the mid range */ 1645 if ((newitr > 300) && (newitr < 1200)) 1646 newitr = (newitr / 3); 1647 else 1648 newitr = (newitr / 2); 1649 } 1650 newitr &= 0x7FFC; /* Mask invalid bits */ 1651 if (adapter->hw.mac.type == e1000_82575) 1652 newitr |= newitr << 16; 1653 else 1654 newitr |= E1000_EITR_CNT_IGNR; 1655 1656 /* save for next interrupt */ 1657 que->eitr_setting = newitr; 1658 1659 /* Reset state */ 1660 txr->bytes = 0; 1661 txr->packets = 0; 1662 rxr->bytes = 0; 1663 rxr->packets = 0; 1664 1665no_calc: 1666 /* Schedule a clean task if needed*/ 1667 if (more_rx) 1668 taskqueue_enqueue(que->tq, &que->que_task); 1669 else 1670 /* Reenable this interrupt */ 1671 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); 1672 return; 1673} 1674 1675 1676/********************************************************************* 1677 * 1678 * MSIX Link Interrupt Service routine 1679 * 1680 **********************************************************************/ 1681 1682static void 1683igb_msix_link(void *arg) 1684{ 1685 struct adapter *adapter = arg; 1686 u32 icr; 1687 1688 ++adapter->link_irq; 1689 icr = E1000_READ_REG(&adapter->hw, E1000_ICR); 1690 if (!(icr & E1000_ICR_LSC)) 1691 goto spurious; 1692 igb_handle_link(adapter, 0); 1693 1694spurious: 1695 /* Rearm */ 1696 E1000_WRITE_REG(&adapter->hw, E1000_IMS, E1000_IMS_LSC); 1697 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, adapter->link_mask); 1698 return; 1699} 1700 1701 1702/********************************************************************* 1703 * 1704 * Media Ioctl callback 1705 * 1706 * This routine is called whenever the user queries the status of 1707 * the interface using ifconfig. 1708 * 1709 **********************************************************************/ 1710static void 1711igb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 1712{ 1713 struct adapter *adapter = ifp->if_softc; 1714 1715 INIT_DEBUGOUT("igb_media_status: begin"); 1716 1717 IGB_CORE_LOCK(adapter); 1718 igb_update_link_status(adapter); 1719 1720 ifmr->ifm_status = IFM_AVALID; 1721 ifmr->ifm_active = IFM_ETHER; 1722 1723 if (!adapter->link_active) { 1724 IGB_CORE_UNLOCK(adapter); 1725 return; 1726 } 1727 1728 ifmr->ifm_status |= IFM_ACTIVE; 1729 1730 switch (adapter->link_speed) { 1731 case 10: 1732 ifmr->ifm_active |= IFM_10_T; 1733 break; 1734 case 100: 1735 /* 1736 ** Support for 100Mb SFP - these are Fiber 1737 ** but the media type appears as serdes 1738 */ 1739 if (adapter->hw.phy.media_type == 1740 e1000_media_type_internal_serdes) 1741 ifmr->ifm_active |= IFM_100_FX; 1742 else 1743 ifmr->ifm_active |= IFM_100_TX; 1744 break; 1745 case 1000: 1746 ifmr->ifm_active |= IFM_1000_T; 1747 break; 1748 case 2500: 1749 ifmr->ifm_active |= IFM_2500_SX; 1750 break; 1751 } 1752 1753 if (adapter->link_duplex == FULL_DUPLEX) 1754 ifmr->ifm_active |= IFM_FDX; 1755 else 1756 ifmr->ifm_active |= IFM_HDX; 1757 1758 IGB_CORE_UNLOCK(adapter); 1759} 1760 1761/********************************************************************* 1762 * 1763 * Media Ioctl callback 1764 * 1765 * This routine is called when the user changes speed/duplex using 1766 * media/mediopt option with ifconfig. 1767 * 1768 **********************************************************************/ 1769static int 1770igb_media_change(struct ifnet *ifp) 1771{ 1772 struct adapter *adapter = ifp->if_softc; 1773 struct ifmedia *ifm = &adapter->media; 1774 1775 INIT_DEBUGOUT("igb_media_change: begin"); 1776 1777 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 1778 return (EINVAL); 1779 1780 IGB_CORE_LOCK(adapter); 1781 switch (IFM_SUBTYPE(ifm->ifm_media)) { 1782 case IFM_AUTO: 1783 adapter->hw.mac.autoneg = DO_AUTO_NEG; 1784 adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT; 1785 break; 1786 case IFM_1000_LX: 1787 case IFM_1000_SX: 1788 case IFM_1000_T: 1789 adapter->hw.mac.autoneg = DO_AUTO_NEG; 1790 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; 1791 break; 1792 case IFM_100_TX: 1793 adapter->hw.mac.autoneg = FALSE; 1794 adapter->hw.phy.autoneg_advertised = 0; 1795 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1796 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL; 1797 else 1798 adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF; 1799 break; 1800 case IFM_10_T: 1801 adapter->hw.mac.autoneg = FALSE; 1802 adapter->hw.phy.autoneg_advertised = 0; 1803 if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX) 1804 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL; 1805 else 1806 adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF; 1807 break; 1808 default: 1809 device_printf(adapter->dev, "Unsupported media type\n"); 1810 } 1811 1812 igb_init_locked(adapter); 1813 IGB_CORE_UNLOCK(adapter); 1814 1815 return (0); 1816} 1817 1818 1819/********************************************************************* 1820 * 1821 * This routine maps the mbufs to Advanced TX descriptors. 1822 * 1823 **********************************************************************/ 1824static int 1825igb_xmit(struct tx_ring *txr, struct mbuf **m_headp) 1826{ 1827 struct adapter *adapter = txr->adapter; 1828 u32 olinfo_status = 0, cmd_type_len; 1829 int i, j, error, nsegs; 1830 int first; 1831 bool remap = TRUE; 1832 struct mbuf *m_head; 1833 bus_dma_segment_t segs[IGB_MAX_SCATTER]; 1834 bus_dmamap_t map; 1835 struct igb_tx_buf *txbuf; 1836 union e1000_adv_tx_desc *txd = NULL; 1837 1838 m_head = *m_headp; 1839 1840 /* Basic descriptor defines */ 1841 cmd_type_len = (E1000_ADVTXD_DTYP_DATA | 1842 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT); 1843 1844 if (m_head->m_flags & M_VLANTAG) 1845 cmd_type_len |= E1000_ADVTXD_DCMD_VLE; 1846 1847 /* 1848 * Important to capture the first descriptor 1849 * used because it will contain the index of 1850 * the one we tell the hardware to report back 1851 */ 1852 first = txr->next_avail_desc; 1853 txbuf = &txr->tx_buffers[first]; 1854 map = txbuf->map; 1855 1856 /* 1857 * Map the packet for DMA. 1858 */ 1859retry: 1860 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, 1861 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT); 1862 1863 if (__predict_false(error)) { 1864 struct mbuf *m; 1865 1866 switch (error) { 1867 case EFBIG: 1868 /* Try it again? - one try */ 1869 if (remap == TRUE) { 1870 remap = FALSE; 1871 m = m_collapse(*m_headp, M_NOWAIT, 1872 IGB_MAX_SCATTER); 1873 if (m == NULL) { 1874 adapter->mbuf_defrag_failed++; 1875 m_freem(*m_headp); 1876 *m_headp = NULL; 1877 return (ENOBUFS); 1878 } 1879 *m_headp = m; 1880 goto retry; 1881 } else 1882 return (error); 1883 default: 1884 txr->no_tx_dma_setup++; 1885 m_freem(*m_headp); 1886 *m_headp = NULL; 1887 return (error); 1888 } 1889 } 1890 1891 /* Make certain there are enough descriptors */ 1892 if (txr->tx_avail < (nsegs + 2)) { 1893 txr->no_desc_avail++; 1894 bus_dmamap_unload(txr->txtag, map); 1895 return (ENOBUFS); 1896 } 1897 m_head = *m_headp; 1898 1899 /* 1900 ** Set up the appropriate offload context 1901 ** this will consume the first descriptor 1902 */ 1903 error = igb_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status); 1904 if (__predict_false(error)) { 1905 m_freem(*m_headp); 1906 *m_headp = NULL; 1907 return (error); 1908 } 1909 1910 /* 82575 needs the queue index added */ 1911 if (adapter->hw.mac.type == e1000_82575) 1912 olinfo_status |= txr->me << 4; 1913 1914 i = txr->next_avail_desc; 1915 for (j = 0; j < nsegs; j++) { 1916 bus_size_t seglen; 1917 bus_addr_t segaddr; 1918 1919 txbuf = &txr->tx_buffers[i]; 1920 txd = &txr->tx_base[i]; 1921 seglen = segs[j].ds_len; 1922 segaddr = htole64(segs[j].ds_addr); 1923 1924 txd->read.buffer_addr = segaddr; 1925 txd->read.cmd_type_len = htole32(E1000_TXD_CMD_IFCS | 1926 cmd_type_len | seglen); 1927 txd->read.olinfo_status = htole32(olinfo_status); 1928 1929 if (++i == txr->num_desc) 1930 i = 0; 1931 } 1932 1933 txd->read.cmd_type_len |= 1934 htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS); 1935 txr->tx_avail -= nsegs; 1936 txr->next_avail_desc = i; 1937 1938 txbuf->m_head = m_head; 1939 /* 1940 ** Here we swap the map so the last descriptor, 1941 ** which gets the completion interrupt has the 1942 ** real map, and the first descriptor gets the 1943 ** unused map from this descriptor. 1944 */ 1945 txr->tx_buffers[first].map = txbuf->map; 1946 txbuf->map = map; 1947 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE); 1948 1949 /* Set the EOP descriptor that will be marked done */ 1950 txbuf = &txr->tx_buffers[first]; 1951 txbuf->eop = txd; 1952 1953 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 1954 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1955 /* 1956 * Advance the Transmit Descriptor Tail (Tdt), this tells the 1957 * hardware that this frame is available to transmit. 1958 */ 1959 ++txr->total_packets; 1960 E1000_WRITE_REG(&adapter->hw, E1000_TDT(txr->me), i); 1961 1962 return (0); 1963} 1964static void 1965igb_set_promisc(struct adapter *adapter) 1966{ 1967 struct ifnet *ifp = adapter->ifp; 1968 struct e1000_hw *hw = &adapter->hw; 1969 u32 reg; 1970 1971 if (adapter->vf_ifp) { 1972 e1000_promisc_set_vf(hw, e1000_promisc_enabled); 1973 return; 1974 } 1975 1976 reg = E1000_READ_REG(hw, E1000_RCTL); 1977 if (ifp->if_flags & IFF_PROMISC) { 1978 reg |= (E1000_RCTL_UPE | E1000_RCTL_MPE); 1979 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1980 } else if (ifp->if_flags & IFF_ALLMULTI) { 1981 reg |= E1000_RCTL_MPE; 1982 reg &= ~E1000_RCTL_UPE; 1983 E1000_WRITE_REG(hw, E1000_RCTL, reg); 1984 } 1985} 1986 1987static void 1988igb_disable_promisc(struct adapter *adapter) 1989{ 1990 struct e1000_hw *hw = &adapter->hw; 1991 struct ifnet *ifp = adapter->ifp; 1992 u32 reg; 1993 int mcnt = 0; 1994 1995 if (adapter->vf_ifp) { 1996 e1000_promisc_set_vf(hw, e1000_promisc_disabled); 1997 return; 1998 } 1999 reg = E1000_READ_REG(hw, E1000_RCTL); 2000 reg &= (~E1000_RCTL_UPE); 2001 if (ifp->if_flags & IFF_ALLMULTI) 2002 mcnt = MAX_NUM_MULTICAST_ADDRESSES; 2003 else { 2004 struct ifmultiaddr *ifma; 2005#if __FreeBSD_version < 800000 2006 IF_ADDR_LOCK(ifp); 2007#else 2008 if_maddr_rlock(ifp); 2009#endif 2010 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2011 if (ifma->ifma_addr->sa_family != AF_LINK) 2012 continue; 2013 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 2014 break; 2015 mcnt++; 2016 } 2017#if __FreeBSD_version < 800000 2018 IF_ADDR_UNLOCK(ifp); 2019#else 2020 if_maddr_runlock(ifp); 2021#endif 2022 } 2023 /* Don't disable if in MAX groups */ 2024 if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) 2025 reg &= (~E1000_RCTL_MPE); 2026 E1000_WRITE_REG(hw, E1000_RCTL, reg); 2027} 2028 2029 2030/********************************************************************* 2031 * Multicast Update 2032 * 2033 * This routine is called whenever multicast address list is updated. 2034 * 2035 **********************************************************************/ 2036 2037static void 2038igb_set_multi(struct adapter *adapter) 2039{ 2040 struct ifnet *ifp = adapter->ifp; 2041 struct ifmultiaddr *ifma; 2042 u32 reg_rctl = 0; 2043 u8 *mta; 2044 2045 int mcnt = 0; 2046 2047 IOCTL_DEBUGOUT("igb_set_multi: begin"); 2048 2049 mta = adapter->mta; 2050 bzero(mta, sizeof(uint8_t) * ETH_ADDR_LEN * 2051 MAX_NUM_MULTICAST_ADDRESSES); 2052 2053#if __FreeBSD_version < 800000 2054 IF_ADDR_LOCK(ifp); 2055#else 2056 if_maddr_rlock(ifp); 2057#endif 2058 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2059 if (ifma->ifma_addr->sa_family != AF_LINK) 2060 continue; 2061 2062 if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) 2063 break; 2064 2065 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 2066 &mta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); 2067 mcnt++; 2068 } 2069#if __FreeBSD_version < 800000 2070 IF_ADDR_UNLOCK(ifp); 2071#else 2072 if_maddr_runlock(ifp); 2073#endif 2074 2075 if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) { 2076 reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL); 2077 reg_rctl |= E1000_RCTL_MPE; 2078 E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl); 2079 } else 2080 e1000_update_mc_addr_list(&adapter->hw, mta, mcnt); 2081} 2082 2083 2084/********************************************************************* 2085 * Timer routine: 2086 * This routine checks for link status, 2087 * updates statistics, and does the watchdog. 2088 * 2089 **********************************************************************/ 2090 2091static void 2092igb_local_timer(void *arg) 2093{ 2094 struct adapter *adapter = arg; 2095 device_t dev = adapter->dev; 2096 struct ifnet *ifp = adapter->ifp; 2097 struct tx_ring *txr = adapter->tx_rings; 2098 struct igb_queue *que = adapter->queues; 2099 int hung = 0, busy = 0; 2100 2101 2102 IGB_CORE_LOCK_ASSERT(adapter); 2103 2104 igb_update_link_status(adapter); 2105 igb_update_stats_counters(adapter); 2106 2107 /* 2108 ** Check the TX queues status 2109 ** - central locked handling of OACTIVE 2110 ** - watchdog only if all queues show hung 2111 */ 2112 for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { 2113 if ((txr->queue_status & IGB_QUEUE_HUNG) && 2114 (adapter->pause_frames == 0)) 2115 ++hung; 2116 if (txr->queue_status & IGB_QUEUE_DEPLETED) 2117 ++busy; 2118 if ((txr->queue_status & IGB_QUEUE_IDLE) == 0) 2119 taskqueue_enqueue(que->tq, &que->que_task); 2120 } 2121 if (hung == adapter->num_queues) 2122 goto timeout; 2123 if (busy == adapter->num_queues) 2124 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2125 else if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) && 2126 (busy < adapter->num_queues)) 2127 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2128 2129 adapter->pause_frames = 0; 2130 callout_reset(&adapter->timer, hz, igb_local_timer, adapter); 2131#ifndef DEVICE_POLLING 2132 /* Schedule all queue interrupts - deadlock protection */ 2133 E1000_WRITE_REG(&adapter->hw, E1000_EICS, adapter->que_mask); 2134#endif 2135 return; 2136 2137timeout: 2138 device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); 2139 device_printf(dev,"Queue(%d) tdh = %d, hw tdt = %d\n", txr->me, 2140 E1000_READ_REG(&adapter->hw, E1000_TDH(txr->me)), 2141 E1000_READ_REG(&adapter->hw, E1000_TDT(txr->me))); 2142 device_printf(dev,"TX(%d) desc avail = %d," 2143 "Next TX to Clean = %d\n", 2144 txr->me, txr->tx_avail, txr->next_to_clean); 2145 adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2146 adapter->watchdog_events++; 2147 igb_init_locked(adapter); 2148} 2149 2150static void 2151igb_update_link_status(struct adapter *adapter) 2152{ 2153 struct e1000_hw *hw = &adapter->hw; 2154 struct e1000_fc_info *fc = &hw->fc; 2155 struct ifnet *ifp = adapter->ifp; 2156 device_t dev = adapter->dev; 2157 struct tx_ring *txr = adapter->tx_rings; 2158 u32 link_check, thstat, ctrl; 2159 char *flowctl = NULL; 2160 2161 link_check = thstat = ctrl = 0; 2162 2163 /* Get the cached link value or read for real */ 2164 switch (hw->phy.media_type) { 2165 case e1000_media_type_copper: 2166 if (hw->mac.get_link_status) { 2167 /* Do the work to read phy */ 2168 e1000_check_for_link(hw); 2169 link_check = !hw->mac.get_link_status; 2170 } else 2171 link_check = TRUE; 2172 break; 2173 case e1000_media_type_fiber: 2174 e1000_check_for_link(hw); 2175 link_check = (E1000_READ_REG(hw, E1000_STATUS) & 2176 E1000_STATUS_LU); 2177 break; 2178 case e1000_media_type_internal_serdes: 2179 e1000_check_for_link(hw); 2180 link_check = adapter->hw.mac.serdes_has_link; 2181 break; 2182 /* VF device is type_unknown */ 2183 case e1000_media_type_unknown: 2184 e1000_check_for_link(hw); 2185 link_check = !hw->mac.get_link_status; 2186 /* Fall thru */ 2187 default: 2188 break; 2189 } 2190 2191 /* Check for thermal downshift or shutdown */ 2192 if (hw->mac.type == e1000_i350) { 2193 thstat = E1000_READ_REG(hw, E1000_THSTAT); 2194 ctrl = E1000_READ_REG(hw, E1000_CTRL_EXT); 2195 } 2196 2197 /* Get the flow control for display */ 2198 switch (fc->current_mode) { 2199 case e1000_fc_rx_pause: 2200 flowctl = "RX"; 2201 break; 2202 case e1000_fc_tx_pause: 2203 flowctl = "TX"; 2204 break; 2205 case e1000_fc_full: 2206 flowctl = "Full"; 2207 break; 2208 case e1000_fc_none: 2209 default: 2210 flowctl = "None"; 2211 break; 2212 } 2213 2214 /* Now we check if a transition has happened */ 2215 if (link_check && (adapter->link_active == 0)) { 2216 e1000_get_speed_and_duplex(&adapter->hw, 2217 &adapter->link_speed, &adapter->link_duplex); 2218 if (bootverbose) 2219 device_printf(dev, "Link is up %d Mbps %s," 2220 " Flow Control: %s\n", 2221 adapter->link_speed, 2222 ((adapter->link_duplex == FULL_DUPLEX) ? 2223 "Full Duplex" : "Half Duplex"), flowctl); 2224 adapter->link_active = 1; 2225 ifp->if_baudrate = adapter->link_speed * 1000000; 2226 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 2227 (thstat & E1000_THSTAT_LINK_THROTTLE)) 2228 device_printf(dev, "Link: thermal downshift\n"); 2229 /* Delay Link Up for Phy update */ 2230 if (((hw->mac.type == e1000_i210) || 2231 (hw->mac.type == e1000_i211)) && 2232 (hw->phy.id == I210_I_PHY_ID)) 2233 msec_delay(I210_LINK_DELAY); 2234 /* Reset if the media type changed. */ 2235 if (hw->dev_spec._82575.media_changed) { 2236 hw->dev_spec._82575.media_changed = false; 2237 adapter->flags |= IGB_MEDIA_RESET; 2238 igb_reset(adapter); 2239 } 2240 /* This can sleep */ 2241 if_link_state_change(ifp, LINK_STATE_UP); 2242 } else if (!link_check && (adapter->link_active == 1)) { 2243 ifp->if_baudrate = adapter->link_speed = 0; 2244 adapter->link_duplex = 0; 2245 if (bootverbose) 2246 device_printf(dev, "Link is Down\n"); 2247 if ((ctrl & E1000_CTRL_EXT_LINK_MODE_GMII) && 2248 (thstat & E1000_THSTAT_PWR_DOWN)) 2249 device_printf(dev, "Link: thermal shutdown\n"); 2250 adapter->link_active = 0; 2251 /* This can sleep */ 2252 if_link_state_change(ifp, LINK_STATE_DOWN); 2253 /* Reset queue state */ 2254 for (int i = 0; i < adapter->num_queues; i++, txr++) 2255 txr->queue_status = IGB_QUEUE_IDLE; 2256 } 2257} 2258 2259/********************************************************************* 2260 * 2261 * This routine disables all traffic on the adapter by issuing a 2262 * global reset on the MAC and deallocates TX/RX buffers. 2263 * 2264 **********************************************************************/ 2265 2266static void 2267igb_stop(void *arg) 2268{ 2269 struct adapter *adapter = arg; 2270 struct ifnet *ifp = adapter->ifp; 2271 struct tx_ring *txr = adapter->tx_rings; 2272 2273 IGB_CORE_LOCK_ASSERT(adapter); 2274 2275 INIT_DEBUGOUT("igb_stop: begin"); 2276 2277 igb_disable_intr(adapter); 2278 2279 callout_stop(&adapter->timer); 2280 2281 /* Tell the stack that the interface is no longer active */ 2282 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2283 ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2284 2285 /* Disarm watchdog timer. */ 2286 for (int i = 0; i < adapter->num_queues; i++, txr++) { 2287 IGB_TX_LOCK(txr); 2288 txr->queue_status = IGB_QUEUE_IDLE; 2289 IGB_TX_UNLOCK(txr); 2290 } 2291 2292 e1000_reset_hw(&adapter->hw); 2293 E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0); 2294 2295 e1000_led_off(&adapter->hw); 2296 e1000_cleanup_led(&adapter->hw); 2297} 2298 2299 2300/********************************************************************* 2301 * 2302 * Determine hardware revision. 2303 * 2304 **********************************************************************/ 2305static void 2306igb_identify_hardware(struct adapter *adapter) 2307{ 2308 device_t dev = adapter->dev; 2309 2310 /* Make sure our PCI config space has the necessary stuff set */ 2311 pci_enable_busmaster(dev); 2312 adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); 2313 2314 /* Save off the information about this board */ 2315 adapter->hw.vendor_id = pci_get_vendor(dev); 2316 adapter->hw.device_id = pci_get_device(dev); 2317 adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1); 2318 adapter->hw.subsystem_vendor_id = 2319 pci_read_config(dev, PCIR_SUBVEND_0, 2); 2320 adapter->hw.subsystem_device_id = 2321 pci_read_config(dev, PCIR_SUBDEV_0, 2); 2322 2323 /* Set MAC type early for PCI setup */ 2324 e1000_set_mac_type(&adapter->hw); 2325 2326 /* Are we a VF device? */ 2327 if ((adapter->hw.mac.type == e1000_vfadapt) || 2328 (adapter->hw.mac.type == e1000_vfadapt_i350)) 2329 adapter->vf_ifp = 1; 2330 else 2331 adapter->vf_ifp = 0; 2332} 2333 2334static int 2335igb_allocate_pci_resources(struct adapter *adapter) 2336{ 2337 device_t dev = adapter->dev; 2338 int rid; 2339 2340 rid = PCIR_BAR(0); 2341 adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, 2342 &rid, RF_ACTIVE); 2343 if (adapter->pci_mem == NULL) { 2344 device_printf(dev, "Unable to allocate bus resource: memory\n"); 2345 return (ENXIO); 2346 } 2347 adapter->osdep.mem_bus_space_tag = 2348 rman_get_bustag(adapter->pci_mem); 2349 adapter->osdep.mem_bus_space_handle = 2350 rman_get_bushandle(adapter->pci_mem); 2351 adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; 2352 2353 adapter->num_queues = 1; /* Defaults for Legacy or MSI */ 2354 2355 /* This will setup either MSI/X or MSI */ 2356 adapter->msix = igb_setup_msix(adapter); 2357 adapter->hw.back = &adapter->osdep; 2358 2359 return (0); 2360} 2361 2362/********************************************************************* 2363 * 2364 * Setup the Legacy or MSI Interrupt handler 2365 * 2366 **********************************************************************/ 2367static int 2368igb_allocate_legacy(struct adapter *adapter) 2369{ 2370 device_t dev = adapter->dev; 2371 struct igb_queue *que = adapter->queues; 2372#ifndef IGB_LEGACY_TX 2373 struct tx_ring *txr = adapter->tx_rings; 2374#endif 2375 int error, rid = 0; 2376 2377 /* Turn off all interrupts */ 2378 E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff); 2379 2380 /* MSI RID is 1 */ 2381 if (adapter->msix == 1) 2382 rid = 1; 2383 2384 /* We allocate a single interrupt resource */ 2385 adapter->res = bus_alloc_resource_any(dev, 2386 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); 2387 if (adapter->res == NULL) { 2388 device_printf(dev, "Unable to allocate bus resource: " 2389 "interrupt\n"); 2390 return (ENXIO); 2391 } 2392 2393#ifndef IGB_LEGACY_TX 2394 TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); 2395#endif 2396 2397 /* 2398 * Try allocating a fast interrupt and the associated deferred 2399 * processing contexts. 2400 */ 2401 TASK_INIT(&que->que_task, 0, igb_handle_que, que); 2402 /* Make tasklet for deferred link handling */ 2403 TASK_INIT(&adapter->link_task, 0, igb_handle_link, adapter); 2404 que->tq = taskqueue_create_fast("igb_taskq", M_NOWAIT, 2405 taskqueue_thread_enqueue, &que->tq); 2406 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq", 2407 device_get_nameunit(adapter->dev)); 2408 if ((error = bus_setup_intr(dev, adapter->res, 2409 INTR_TYPE_NET | INTR_MPSAFE, igb_irq_fast, NULL, 2410 adapter, &adapter->tag)) != 0) { 2411 device_printf(dev, "Failed to register fast interrupt " 2412 "handler: %d\n", error); 2413 taskqueue_free(que->tq); 2414 que->tq = NULL; 2415 return (error); 2416 } 2417 2418 return (0); 2419} 2420 2421 2422/********************************************************************* 2423 * 2424 * Setup the MSIX Queue Interrupt handlers: 2425 * 2426 **********************************************************************/ 2427static int 2428igb_allocate_msix(struct adapter *adapter) 2429{ 2430 device_t dev = adapter->dev; 2431 struct igb_queue *que = adapter->queues; 2432 int error, rid, vector = 0; 2433 int cpu_id = 0; 2434#ifdef RSS 2435 cpuset_t cpu_mask; 2436#endif 2437 2438 /* Be sure to start with all interrupts disabled */ 2439 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); 2440 E1000_WRITE_FLUSH(&adapter->hw); 2441 2442#ifdef RSS 2443 /* 2444 * If we're doing RSS, the number of queues needs to 2445 * match the number of RSS buckets that are configured. 2446 * 2447 * + If there's more queues than RSS buckets, we'll end 2448 * up with queues that get no traffic. 2449 * 2450 * + If there's more RSS buckets than queues, we'll end 2451 * up having multiple RSS buckets map to the same queue, 2452 * so there'll be some contention. 2453 */ 2454 if (adapter->num_queues != rss_getnumbuckets()) { 2455 device_printf(dev, 2456 "%s: number of queues (%d) != number of RSS buckets (%d)" 2457 "; performance will be impacted.\n", 2458 __func__, 2459 adapter->num_queues, 2460 rss_getnumbuckets()); 2461 } 2462#endif 2463 2464 for (int i = 0; i < adapter->num_queues; i++, vector++, que++) { 2465 rid = vector +1; 2466 que->res = bus_alloc_resource_any(dev, 2467 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); 2468 if (que->res == NULL) { 2469 device_printf(dev, 2470 "Unable to allocate bus resource: " 2471 "MSIX Queue Interrupt\n"); 2472 return (ENXIO); 2473 } 2474 error = bus_setup_intr(dev, que->res, 2475 INTR_TYPE_NET | INTR_MPSAFE, NULL, 2476 igb_msix_que, que, &que->tag); 2477 if (error) { 2478 que->res = NULL; 2479 device_printf(dev, "Failed to register Queue handler"); 2480 return (error); 2481 } 2482#if __FreeBSD_version >= 800504 2483 bus_describe_intr(dev, que->res, que->tag, "que %d", i); 2484#endif 2485 que->msix = vector; 2486 if (adapter->hw.mac.type == e1000_82575) 2487 que->eims = E1000_EICR_TX_QUEUE0 << i; 2488 else 2489 que->eims = 1 << vector; 2490 2491#ifdef RSS 2492 /* 2493 * The queue ID is used as the RSS layer bucket ID. 2494 * We look up the queue ID -> RSS CPU ID and select 2495 * that. 2496 */ 2497 cpu_id = rss_getcpu(i % rss_getnumbuckets()); 2498#else 2499 /* 2500 * Bind the msix vector, and thus the 2501 * rings to the corresponding cpu. 2502 * 2503 * This just happens to match the default RSS round-robin 2504 * bucket -> queue -> CPU allocation. 2505 */ 2506 if (adapter->num_queues > 1) { 2507 if (igb_last_bind_cpu < 0) 2508 igb_last_bind_cpu = CPU_FIRST(); 2509 cpu_id = igb_last_bind_cpu; 2510 } 2511#endif 2512 2513 if (adapter->num_queues > 1) { 2514 bus_bind_intr(dev, que->res, cpu_id); 2515#ifdef RSS 2516 device_printf(dev, 2517 "Bound queue %d to RSS bucket %d\n", 2518 i, cpu_id); 2519#else 2520 device_printf(dev, 2521 "Bound queue %d to cpu %d\n", 2522 i, cpu_id); 2523#endif 2524 } 2525 2526#ifndef IGB_LEGACY_TX 2527 TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, 2528 que->txr); 2529#endif 2530 /* Make tasklet for deferred handling */ 2531 TASK_INIT(&que->que_task, 0, igb_handle_que, que); 2532 que->tq = taskqueue_create("igb_que", M_NOWAIT, 2533 taskqueue_thread_enqueue, &que->tq); 2534 if (adapter->num_queues > 1) { 2535 /* 2536 * Only pin the taskqueue thread to a CPU if 2537 * RSS is in use. 2538 * 2539 * This again just happens to match the default RSS 2540 * round-robin bucket -> queue -> CPU allocation. 2541 */ 2542#ifdef RSS 2543 CPU_SETOF(cpu_id, &cpu_mask); 2544 taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, 2545 &cpu_mask, 2546 "%s que (bucket %d)", 2547 device_get_nameunit(adapter->dev), 2548 cpu_id); 2549#else 2550 taskqueue_start_threads(&que->tq, 1, PI_NET, 2551 "%s que (qid %d)", 2552 device_get_nameunit(adapter->dev), 2553 cpu_id); 2554#endif 2555 } else { 2556 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", 2557 device_get_nameunit(adapter->dev)); 2558 } 2559 2560 /* Finally update the last bound CPU id */ 2561 if (adapter->num_queues > 1) 2562 igb_last_bind_cpu = CPU_NEXT(igb_last_bind_cpu); 2563 } 2564 2565 /* And Link */ 2566 rid = vector + 1; 2567 adapter->res = bus_alloc_resource_any(dev, 2568 SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); 2569 if (adapter->res == NULL) { 2570 device_printf(dev, 2571 "Unable to allocate bus resource: " 2572 "MSIX Link Interrupt\n"); 2573 return (ENXIO); 2574 } 2575 if ((error = bus_setup_intr(dev, adapter->res, 2576 INTR_TYPE_NET | INTR_MPSAFE, NULL, 2577 igb_msix_link, adapter, &adapter->tag)) != 0) { 2578 device_printf(dev, "Failed to register Link handler"); 2579 return (error); 2580 } 2581#if __FreeBSD_version >= 800504 2582 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); 2583#endif 2584 adapter->linkvec = vector; 2585 2586 return (0); 2587} 2588 2589 2590static void 2591igb_configure_queues(struct adapter *adapter) 2592{ 2593 struct e1000_hw *hw = &adapter->hw; 2594 struct igb_queue *que; 2595 u32 tmp, ivar = 0, newitr = 0; 2596 2597 /* First turn on RSS capability */ 2598 if (adapter->hw.mac.type != e1000_82575) 2599 E1000_WRITE_REG(hw, E1000_GPIE, 2600 E1000_GPIE_MSIX_MODE | E1000_GPIE_EIAME | 2601 E1000_GPIE_PBA | E1000_GPIE_NSICR); 2602 2603 /* Turn on MSIX */ 2604 switch (adapter->hw.mac.type) { 2605 case e1000_82580: 2606 case e1000_i350: 2607 case e1000_i354: 2608 case e1000_i210: 2609 case e1000_i211: 2610 case e1000_vfadapt: 2611 case e1000_vfadapt_i350: 2612 /* RX entries */ 2613 for (int i = 0; i < adapter->num_queues; i++) { 2614 u32 index = i >> 1; 2615 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 2616 que = &adapter->queues[i]; 2617 if (i & 1) { 2618 ivar &= 0xFF00FFFF; 2619 ivar |= (que->msix | E1000_IVAR_VALID) << 16; 2620 } else { 2621 ivar &= 0xFFFFFF00; 2622 ivar |= que->msix | E1000_IVAR_VALID; 2623 } 2624 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 2625 } 2626 /* TX entries */ 2627 for (int i = 0; i < adapter->num_queues; i++) { 2628 u32 index = i >> 1; 2629 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 2630 que = &adapter->queues[i]; 2631 if (i & 1) { 2632 ivar &= 0x00FFFFFF; 2633 ivar |= (que->msix | E1000_IVAR_VALID) << 24; 2634 } else { 2635 ivar &= 0xFFFF00FF; 2636 ivar |= (que->msix | E1000_IVAR_VALID) << 8; 2637 } 2638 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 2639 adapter->que_mask |= que->eims; 2640 } 2641 2642 /* And for the link interrupt */ 2643 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; 2644 adapter->link_mask = 1 << adapter->linkvec; 2645 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 2646 break; 2647 case e1000_82576: 2648 /* RX entries */ 2649 for (int i = 0; i < adapter->num_queues; i++) { 2650 u32 index = i & 0x7; /* Each IVAR has two entries */ 2651 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 2652 que = &adapter->queues[i]; 2653 if (i < 8) { 2654 ivar &= 0xFFFFFF00; 2655 ivar |= que->msix | E1000_IVAR_VALID; 2656 } else { 2657 ivar &= 0xFF00FFFF; 2658 ivar |= (que->msix | E1000_IVAR_VALID) << 16; 2659 } 2660 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 2661 adapter->que_mask |= que->eims; 2662 } 2663 /* TX entries */ 2664 for (int i = 0; i < adapter->num_queues; i++) { 2665 u32 index = i & 0x7; /* Each IVAR has two entries */ 2666 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); 2667 que = &adapter->queues[i]; 2668 if (i < 8) { 2669 ivar &= 0xFFFF00FF; 2670 ivar |= (que->msix | E1000_IVAR_VALID) << 8; 2671 } else { 2672 ivar &= 0x00FFFFFF; 2673 ivar |= (que->msix | E1000_IVAR_VALID) << 24; 2674 } 2675 E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); 2676 adapter->que_mask |= que->eims; 2677 } 2678 2679 /* And for the link interrupt */ 2680 ivar = (adapter->linkvec | E1000_IVAR_VALID) << 8; 2681 adapter->link_mask = 1 << adapter->linkvec; 2682 E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar); 2683 break; 2684 2685 case e1000_82575: 2686 /* enable MSI-X support*/ 2687 tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); 2688 tmp |= E1000_CTRL_EXT_PBA_CLR; 2689 /* Auto-Mask interrupts upon ICR read. */ 2690 tmp |= E1000_CTRL_EXT_EIAME; 2691 tmp |= E1000_CTRL_EXT_IRCA; 2692 E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); 2693 2694 /* Queues */ 2695 for (int i = 0; i < adapter->num_queues; i++) { 2696 que = &adapter->queues[i]; 2697 tmp = E1000_EICR_RX_QUEUE0 << i; 2698 tmp |= E1000_EICR_TX_QUEUE0 << i; 2699 que->eims = tmp; 2700 E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), 2701 i, que->eims); 2702 adapter->que_mask |= que->eims; 2703 } 2704 2705 /* Link */ 2706 E1000_WRITE_REG(hw, E1000_MSIXBM(adapter->linkvec), 2707 E1000_EIMS_OTHER); 2708 adapter->link_mask |= E1000_EIMS_OTHER; 2709 default: 2710 break; 2711 } 2712 2713 /* Set the starting interrupt rate */ 2714 if (igb_max_interrupt_rate > 0) 2715 newitr = (4000000 / igb_max_interrupt_rate) & 0x7FFC; 2716 2717 if (hw->mac.type == e1000_82575) 2718 newitr |= newitr << 16; 2719 else 2720 newitr |= E1000_EITR_CNT_IGNR; 2721 2722 for (int i = 0; i < adapter->num_queues; i++) { 2723 que = &adapter->queues[i]; 2724 E1000_WRITE_REG(hw, E1000_EITR(que->msix), newitr); 2725 } 2726 2727 return; 2728} 2729 2730 2731static void 2732igb_free_pci_resources(struct adapter *adapter) 2733{ 2734 struct igb_queue *que = adapter->queues; 2735 device_t dev = adapter->dev; 2736 int rid; 2737 2738 /* 2739 ** There is a slight possibility of a failure mode 2740 ** in attach that will result in entering this function 2741 ** before interrupt resources have been initialized, and 2742 ** in that case we do not want to execute the loops below 2743 ** We can detect this reliably by the state of the adapter 2744 ** res pointer. 2745 */ 2746 if (adapter->res == NULL) 2747 goto mem; 2748 2749 /* 2750 * First release all the interrupt resources: 2751 */ 2752 for (int i = 0; i < adapter->num_queues; i++, que++) { 2753 rid = que->msix + 1; 2754 if (que->tag != NULL) { 2755 bus_teardown_intr(dev, que->res, que->tag); 2756 que->tag = NULL; 2757 } 2758 if (que->res != NULL) 2759 bus_release_resource(dev, 2760 SYS_RES_IRQ, rid, que->res); 2761 } 2762 2763 /* Clean the Legacy or Link interrupt last */ 2764 if (adapter->linkvec) /* we are doing MSIX */ 2765 rid = adapter->linkvec + 1; 2766 else 2767 (adapter->msix != 0) ? (rid = 1):(rid = 0); 2768 2769 que = adapter->queues; 2770 if (adapter->tag != NULL) { 2771 taskqueue_drain(que->tq, &adapter->link_task); 2772 bus_teardown_intr(dev, adapter->res, adapter->tag); 2773 adapter->tag = NULL; 2774 } 2775 if (adapter->res != NULL) 2776 bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); 2777 2778 for (int i = 0; i < adapter->num_queues; i++, que++) { 2779 if (que->tq != NULL) { 2780#ifndef IGB_LEGACY_TX 2781 taskqueue_drain(que->tq, &que->txr->txq_task); 2782#endif 2783 taskqueue_drain(que->tq, &que->que_task); 2784 taskqueue_free(que->tq); 2785 } 2786 } 2787mem: 2788 if (adapter->msix) 2789 pci_release_msi(dev); 2790 2791 if (adapter->msix_mem != NULL) 2792 bus_release_resource(dev, SYS_RES_MEMORY, 2793 adapter->memrid, adapter->msix_mem); 2794 2795 if (adapter->pci_mem != NULL) 2796 bus_release_resource(dev, SYS_RES_MEMORY, 2797 PCIR_BAR(0), adapter->pci_mem); 2798 2799} 2800 2801/* 2802 * Setup Either MSI/X or MSI 2803 */ 2804static int 2805igb_setup_msix(struct adapter *adapter) 2806{ 2807 device_t dev = adapter->dev; 2808 int bar, want, queues, msgs, maxqueues; 2809 2810 /* tuneable override */ 2811 if (igb_enable_msix == 0) 2812 goto msi; 2813 2814 /* First try MSI/X */ 2815 msgs = pci_msix_count(dev); 2816 if (msgs == 0) 2817 goto msi; 2818 /* 2819 ** Some new devices, as with ixgbe, now may 2820 ** use a different BAR, so we need to keep 2821 ** track of which is used. 2822 */ 2823 adapter->memrid = PCIR_BAR(IGB_MSIX_BAR); 2824 bar = pci_read_config(dev, adapter->memrid, 4); 2825 if (bar == 0) /* use next bar */ 2826 adapter->memrid += 4; 2827 adapter->msix_mem = bus_alloc_resource_any(dev, 2828 SYS_RES_MEMORY, &adapter->memrid, RF_ACTIVE); 2829 if (adapter->msix_mem == NULL) { 2830 /* May not be enabled */ 2831 device_printf(adapter->dev, 2832 "Unable to map MSIX table \n"); 2833 goto msi; 2834 } 2835 2836 queues = (mp_ncpus > (msgs-1)) ? (msgs-1) : mp_ncpus; 2837 2838 /* Override via tuneable */ 2839 if (igb_num_queues != 0) 2840 queues = igb_num_queues; 2841 2842#ifdef RSS 2843 /* If we're doing RSS, clamp at the number of RSS buckets */ 2844 if (queues > rss_getnumbuckets()) 2845 queues = rss_getnumbuckets(); 2846#endif 2847 2848 2849 /* Sanity check based on HW */ 2850 switch (adapter->hw.mac.type) { 2851 case e1000_82575: 2852 maxqueues = 4; 2853 break; 2854 case e1000_82576: 2855 case e1000_82580: 2856 case e1000_i350: 2857 case e1000_i354: 2858 maxqueues = 8; 2859 break; 2860 case e1000_i210: 2861 maxqueues = 4; 2862 break; 2863 case e1000_i211: 2864 maxqueues = 2; 2865 break; 2866 default: /* VF interfaces */ 2867 maxqueues = 1; 2868 break; 2869 } 2870 2871 /* Final clamp on the actual hardware capability */ 2872 if (queues > maxqueues) 2873 queues = maxqueues; 2874 2875 /* 2876 ** One vector (RX/TX pair) per queue 2877 ** plus an additional for Link interrupt 2878 */ 2879 want = queues + 1; 2880 if (msgs >= want) 2881 msgs = want; 2882 else { 2883 device_printf(adapter->dev, 2884 "MSIX Configuration Problem, " 2885 "%d vectors configured, but %d queues wanted!\n", 2886 msgs, want); 2887 goto msi; 2888 } 2889 if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { 2890 device_printf(adapter->dev, 2891 "Using MSIX interrupts with %d vectors\n", msgs); 2892 adapter->num_queues = queues; 2893 return (msgs); 2894 } 2895 /* 2896 ** If MSIX alloc failed or provided us with 2897 ** less than needed, free and fall through to MSI 2898 */ 2899 pci_release_msi(dev); 2900 2901msi: 2902 if (adapter->msix_mem != NULL) { 2903 bus_release_resource(dev, SYS_RES_MEMORY, 2904 PCIR_BAR(IGB_MSIX_BAR), adapter->msix_mem); 2905 adapter->msix_mem = NULL; 2906 } 2907 msgs = 1; 2908 if (pci_alloc_msi(dev, &msgs) == 0) { 2909 device_printf(adapter->dev," Using an MSI interrupt\n"); 2910 return (msgs); 2911 } 2912 device_printf(adapter->dev," Using a Legacy interrupt\n"); 2913 return (0); 2914} 2915 2916/********************************************************************* 2917 * 2918 * Initialize the DMA Coalescing feature 2919 * 2920 **********************************************************************/ 2921static void 2922igb_init_dmac(struct adapter *adapter, u32 pba) 2923{ 2924 device_t dev = adapter->dev; 2925 struct e1000_hw *hw = &adapter->hw; 2926 u32 dmac, reg = ~E1000_DMACR_DMAC_EN; 2927 u16 hwm; 2928 2929 if (hw->mac.type == e1000_i211) 2930 return; 2931 2932 if (hw->mac.type > e1000_82580) { 2933 2934 if (adapter->dmac == 0) { /* Disabling it */ 2935 E1000_WRITE_REG(hw, E1000_DMACR, reg); 2936 return; 2937 } else 2938 device_printf(dev, "DMA Coalescing enabled\n"); 2939 2940 /* Set starting threshold */ 2941 E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); 2942 2943 hwm = 64 * pba - adapter->max_frame_size / 16; 2944 if (hwm < 64 * (pba - 6)) 2945 hwm = 64 * (pba - 6); 2946 reg = E1000_READ_REG(hw, E1000_FCRTC); 2947 reg &= ~E1000_FCRTC_RTH_COAL_MASK; 2948 reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) 2949 & E1000_FCRTC_RTH_COAL_MASK); 2950 E1000_WRITE_REG(hw, E1000_FCRTC, reg); 2951 2952 2953 dmac = pba - adapter->max_frame_size / 512; 2954 if (dmac < pba - 10) 2955 dmac = pba - 10; 2956 reg = E1000_READ_REG(hw, E1000_DMACR); 2957 reg &= ~E1000_DMACR_DMACTHR_MASK; 2958 reg = ((dmac << E1000_DMACR_DMACTHR_SHIFT) 2959 & E1000_DMACR_DMACTHR_MASK); 2960 2961 /* transition to L0x or L1 if available..*/ 2962 reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); 2963 2964 /* Check if status is 2.5Gb backplane connection 2965 * before configuration of watchdog timer, which is 2966 * in msec values in 12.8usec intervals 2967 * watchdog timer= msec values in 32usec intervals 2968 * for non 2.5Gb connection 2969 */ 2970 if (hw->mac.type == e1000_i354) { 2971 int status = E1000_READ_REG(hw, E1000_STATUS); 2972 if ((status & E1000_STATUS_2P5_SKU) && 2973 (!(status & E1000_STATUS_2P5_SKU_OVER))) 2974 reg |= ((adapter->dmac * 5) >> 6); 2975 else 2976 reg |= (adapter->dmac >> 5); 2977 } else { 2978 reg |= (adapter->dmac >> 5); 2979 } 2980 2981 E1000_WRITE_REG(hw, E1000_DMACR, reg); 2982 2983 E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); 2984 2985 /* Set the interval before transition */ 2986 reg = E1000_READ_REG(hw, E1000_DMCTLX); 2987 if (hw->mac.type == e1000_i350) 2988 reg |= IGB_DMCTLX_DCFLUSH_DIS; 2989 /* 2990 ** in 2.5Gb connection, TTLX unit is 0.4 usec 2991 ** which is 0x4*2 = 0xA. But delay is still 4 usec 2992 */ 2993 if (hw->mac.type == e1000_i354) { 2994 int status = E1000_READ_REG(hw, E1000_STATUS); 2995 if ((status & E1000_STATUS_2P5_SKU) && 2996 (!(status & E1000_STATUS_2P5_SKU_OVER))) 2997 reg |= 0xA; 2998 else 2999 reg |= 0x4; 3000 } else { 3001 reg |= 0x4; 3002 } 3003 3004 E1000_WRITE_REG(hw, E1000_DMCTLX, reg); 3005 3006 /* free space in tx packet buffer to wake from DMA coal */ 3007 E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_TXPBSIZE - 3008 (2 * adapter->max_frame_size)) >> 6); 3009 3010 /* make low power state decision controlled by DMA coal */ 3011 reg = E1000_READ_REG(hw, E1000_PCIEMISC); 3012 reg &= ~E1000_PCIEMISC_LX_DECISION; 3013 E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); 3014 3015 } else if (hw->mac.type == e1000_82580) { 3016 u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); 3017 E1000_WRITE_REG(hw, E1000_PCIEMISC, 3018 reg & ~E1000_PCIEMISC_LX_DECISION); 3019 E1000_WRITE_REG(hw, E1000_DMACR, 0); 3020 } 3021} 3022 3023 3024/********************************************************************* 3025 * 3026 * Set up an fresh starting state 3027 * 3028 **********************************************************************/ 3029static void 3030igb_reset(struct adapter *adapter) 3031{ 3032 device_t dev = adapter->dev; 3033 struct e1000_hw *hw = &adapter->hw; 3034 struct e1000_fc_info *fc = &hw->fc; 3035 struct ifnet *ifp = adapter->ifp; 3036 u32 pba = 0; 3037 u16 hwm; 3038 3039 INIT_DEBUGOUT("igb_reset: begin"); 3040 3041 /* Let the firmware know the OS is in control */ 3042 igb_get_hw_control(adapter); 3043 3044 /* 3045 * Packet Buffer Allocation (PBA) 3046 * Writing PBA sets the receive portion of the buffer 3047 * the remainder is used for the transmit buffer. 3048 */ 3049 switch (hw->mac.type) { 3050 case e1000_82575: 3051 pba = E1000_PBA_32K; 3052 break; 3053 case e1000_82576: 3054 case e1000_vfadapt: 3055 pba = E1000_READ_REG(hw, E1000_RXPBS); 3056 pba &= E1000_RXPBS_SIZE_MASK_82576; 3057 break; 3058 case e1000_82580: 3059 case e1000_i350: 3060 case e1000_i354: 3061 case e1000_vfadapt_i350: 3062 pba = E1000_READ_REG(hw, E1000_RXPBS); 3063 pba = e1000_rxpbs_adjust_82580(pba); 3064 break; 3065 case e1000_i210: 3066 case e1000_i211: 3067 pba = E1000_PBA_34K; 3068 default: 3069 break; 3070 } 3071 3072 /* Special needs in case of Jumbo frames */ 3073 if ((hw->mac.type == e1000_82575) && (ifp->if_mtu > ETHERMTU)) { 3074 u32 tx_space, min_tx, min_rx; 3075 pba = E1000_READ_REG(hw, E1000_PBA); 3076 tx_space = pba >> 16; 3077 pba &= 0xffff; 3078 min_tx = (adapter->max_frame_size + 3079 sizeof(struct e1000_tx_desc) - ETHERNET_FCS_SIZE) * 2; 3080 min_tx = roundup2(min_tx, 1024); 3081 min_tx >>= 10; 3082 min_rx = adapter->max_frame_size; 3083 min_rx = roundup2(min_rx, 1024); 3084 min_rx >>= 10; 3085 if (tx_space < min_tx && 3086 ((min_tx - tx_space) < pba)) { 3087 pba = pba - (min_tx - tx_space); 3088 /* 3089 * if short on rx space, rx wins 3090 * and must trump tx adjustment 3091 */ 3092 if (pba < min_rx) 3093 pba = min_rx; 3094 } 3095 E1000_WRITE_REG(hw, E1000_PBA, pba); 3096 } 3097 3098 INIT_DEBUGOUT1("igb_init: pba=%dK",pba); 3099 3100 /* 3101 * These parameters control the automatic generation (Tx) and 3102 * response (Rx) to Ethernet PAUSE frames. 3103 * - High water mark should allow for at least two frames to be 3104 * received after sending an XOFF. 3105 * - Low water mark works best when it is very near the high water mark. 3106 * This allows the receiver to restart by sending XON when it has 3107 * drained a bit. 3108 */ 3109 hwm = min(((pba << 10) * 9 / 10), 3110 ((pba << 10) - 2 * adapter->max_frame_size)); 3111 3112 if (hw->mac.type < e1000_82576) { 3113 fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */ 3114 fc->low_water = fc->high_water - 8; 3115 } else { 3116 fc->high_water = hwm & 0xFFF0; /* 16-byte granularity */ 3117 fc->low_water = fc->high_water - 16; 3118 } 3119 3120 fc->pause_time = IGB_FC_PAUSE_TIME; 3121 fc->send_xon = TRUE; 3122 if (adapter->fc) 3123 fc->requested_mode = adapter->fc; 3124 else 3125 fc->requested_mode = e1000_fc_default; 3126 3127 /* Issue a global reset */ 3128 e1000_reset_hw(hw); 3129 E1000_WRITE_REG(hw, E1000_WUC, 0); 3130 3131 /* Reset for AutoMediaDetect */ 3132 if (adapter->flags & IGB_MEDIA_RESET) { 3133 e1000_setup_init_funcs(hw, TRUE); 3134 e1000_get_bus_info(hw); 3135 adapter->flags &= ~IGB_MEDIA_RESET; 3136 } 3137 3138 if (e1000_init_hw(hw) < 0) 3139 device_printf(dev, "Hardware Initialization Failed\n"); 3140 3141 /* Setup DMA Coalescing */ 3142 igb_init_dmac(adapter, pba); 3143 3144 E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN); 3145 e1000_get_phy_info(hw); 3146 e1000_check_for_link(hw); 3147 return; 3148} 3149 3150/********************************************************************* 3151 * 3152 * Setup networking device structure and register an interface. 3153 * 3154 **********************************************************************/ 3155static int 3156igb_setup_interface(device_t dev, struct adapter *adapter) 3157{ 3158 struct ifnet *ifp; 3159 3160 INIT_DEBUGOUT("igb_setup_interface: begin"); 3161 3162 ifp = adapter->ifp = if_alloc(IFT_ETHER); 3163 if (ifp == NULL) { 3164 device_printf(dev, "can not allocate ifnet structure\n"); 3165 return (-1); 3166 } 3167 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 3168 ifp->if_init = igb_init; 3169 ifp->if_softc = adapter; 3170 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 3171 ifp->if_ioctl = igb_ioctl; 3172 ifp->if_get_counter = igb_get_counter; 3173 3174 /* TSO parameters */ 3175 ifp->if_hw_tsomax = IP_MAXPACKET; 3176 ifp->if_hw_tsomaxsegcount = IGB_MAX_SCATTER; 3177 ifp->if_hw_tsomaxsegsize = IGB_TSO_SEG_SIZE; 3178 3179#ifndef IGB_LEGACY_TX 3180 ifp->if_transmit = igb_mq_start; 3181 ifp->if_qflush = igb_qflush; 3182#else 3183 ifp->if_start = igb_start; 3184 IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); 3185 ifp->if_snd.ifq_drv_maxlen = 0; 3186 IFQ_SET_READY(&ifp->if_snd); 3187#endif 3188 3189 ether_ifattach(ifp, adapter->hw.mac.addr); 3190 3191 ifp->if_capabilities = ifp->if_capenable = 0; 3192 3193 ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; 3194#if __FreeBSD_version >= 1000000 3195 ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; 3196#endif 3197 ifp->if_capabilities |= IFCAP_TSO; 3198 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 3199 ifp->if_capenable = ifp->if_capabilities; 3200 3201 /* Don't enable LRO by default */ 3202 ifp->if_capabilities |= IFCAP_LRO; 3203 3204#ifdef DEVICE_POLLING 3205 ifp->if_capabilities |= IFCAP_POLLING; 3206#endif 3207 3208 /* 3209 * Tell the upper layer(s) we 3210 * support full VLAN capability. 3211 */ 3212 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 3213 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING 3214 | IFCAP_VLAN_HWTSO 3215 | IFCAP_VLAN_MTU; 3216 ifp->if_capenable |= IFCAP_VLAN_HWTAGGING 3217 | IFCAP_VLAN_HWTSO 3218 | IFCAP_VLAN_MTU; 3219 3220 /* 3221 ** Don't turn this on by default, if vlans are 3222 ** created on another pseudo device (eg. lagg) 3223 ** then vlan events are not passed thru, breaking 3224 ** operation, but with HW FILTER off it works. If 3225 ** using vlans directly on the igb driver you can 3226 ** enable this and get full hardware tag filtering. 3227 */ 3228 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; 3229 3230 /* 3231 * Specify the media types supported by this adapter and register 3232 * callbacks to update media and link information 3233 */ 3234 ifmedia_init(&adapter->media, IFM_IMASK, 3235 igb_media_change, igb_media_status); 3236 if ((adapter->hw.phy.media_type == e1000_media_type_fiber) || 3237 (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) { 3238 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 3239 0, NULL); 3240 ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); 3241 } else { 3242 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); 3243 ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX, 3244 0, NULL); 3245 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 3246 0, NULL); 3247 ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX, 3248 0, NULL); 3249 if (adapter->hw.phy.type != e1000_phy_ife) { 3250 ifmedia_add(&adapter->media, 3251 IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 3252 ifmedia_add(&adapter->media, 3253 IFM_ETHER | IFM_1000_T, 0, NULL); 3254 } 3255 } 3256 ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); 3257 ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); 3258 return (0); 3259} 3260 3261 3262/* 3263 * Manage DMA'able memory. 3264 */ 3265static void 3266igb_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 3267{ 3268 if (error) 3269 return; 3270 *(bus_addr_t *) arg = segs[0].ds_addr; 3271} 3272 3273static int 3274igb_dma_malloc(struct adapter *adapter, bus_size_t size, 3275 struct igb_dma_alloc *dma, int mapflags) 3276{ 3277 int error; 3278 3279 error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */ 3280 IGB_DBA_ALIGN, 0, /* alignment, bounds */ 3281 BUS_SPACE_MAXADDR, /* lowaddr */ 3282 BUS_SPACE_MAXADDR, /* highaddr */ 3283 NULL, NULL, /* filter, filterarg */ 3284 size, /* maxsize */ 3285 1, /* nsegments */ 3286 size, /* maxsegsize */ 3287 0, /* flags */ 3288 NULL, /* lockfunc */ 3289 NULL, /* lockarg */ 3290 &dma->dma_tag); 3291 if (error) { 3292 device_printf(adapter->dev, 3293 "%s: bus_dma_tag_create failed: %d\n", 3294 __func__, error); 3295 goto fail_0; 3296 } 3297 3298 error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr, 3299 BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map); 3300 if (error) { 3301 device_printf(adapter->dev, 3302 "%s: bus_dmamem_alloc(%ju) failed: %d\n", 3303 __func__, (uintmax_t)size, error); 3304 goto fail_2; 3305 } 3306 3307 dma->dma_paddr = 0; 3308 error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, 3309 size, igb_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT); 3310 if (error || dma->dma_paddr == 0) { 3311 device_printf(adapter->dev, 3312 "%s: bus_dmamap_load failed: %d\n", 3313 __func__, error); 3314 goto fail_3; 3315 } 3316 3317 return (0); 3318 3319fail_3: 3320 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 3321fail_2: 3322 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 3323 bus_dma_tag_destroy(dma->dma_tag); 3324fail_0: 3325 dma->dma_tag = NULL; 3326 3327 return (error); 3328} 3329 3330static void 3331igb_dma_free(struct adapter *adapter, struct igb_dma_alloc *dma) 3332{ 3333 if (dma->dma_tag == NULL) 3334 return; 3335 if (dma->dma_paddr != 0) { 3336 bus_dmamap_sync(dma->dma_tag, dma->dma_map, 3337 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 3338 bus_dmamap_unload(dma->dma_tag, dma->dma_map); 3339 dma->dma_paddr = 0; 3340 } 3341 if (dma->dma_vaddr != NULL) { 3342 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map); 3343 dma->dma_vaddr = NULL; 3344 } 3345 bus_dma_tag_destroy(dma->dma_tag); 3346 dma->dma_tag = NULL; 3347} 3348 3349 3350/********************************************************************* 3351 * 3352 * Allocate memory for the transmit and receive rings, and then 3353 * the descriptors associated with each, called only once at attach. 3354 * 3355 **********************************************************************/ 3356static int 3357igb_allocate_queues(struct adapter *adapter) 3358{ 3359 device_t dev = adapter->dev; 3360 struct igb_queue *que = NULL; 3361 struct tx_ring *txr = NULL; 3362 struct rx_ring *rxr = NULL; 3363 int rsize, tsize, error = E1000_SUCCESS; 3364 int txconf = 0, rxconf = 0; 3365 3366 /* First allocate the top level queue structs */ 3367 if (!(adapter->queues = 3368 (struct igb_queue *) malloc(sizeof(struct igb_queue) * 3369 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3370 device_printf(dev, "Unable to allocate queue memory\n"); 3371 error = ENOMEM; 3372 goto fail; 3373 } 3374 3375 /* Next allocate the TX ring struct memory */ 3376 if (!(adapter->tx_rings = 3377 (struct tx_ring *) malloc(sizeof(struct tx_ring) * 3378 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3379 device_printf(dev, "Unable to allocate TX ring memory\n"); 3380 error = ENOMEM; 3381 goto tx_fail; 3382 } 3383 3384 /* Now allocate the RX */ 3385 if (!(adapter->rx_rings = 3386 (struct rx_ring *) malloc(sizeof(struct rx_ring) * 3387 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3388 device_printf(dev, "Unable to allocate RX ring memory\n"); 3389 error = ENOMEM; 3390 goto rx_fail; 3391 } 3392 3393 tsize = roundup2(adapter->num_tx_desc * 3394 sizeof(union e1000_adv_tx_desc), IGB_DBA_ALIGN); 3395 /* 3396 * Now set up the TX queues, txconf is needed to handle the 3397 * possibility that things fail midcourse and we need to 3398 * undo memory gracefully 3399 */ 3400 for (int i = 0; i < adapter->num_queues; i++, txconf++) { 3401 /* Set up some basics */ 3402 txr = &adapter->tx_rings[i]; 3403 txr->adapter = adapter; 3404 txr->me = i; 3405 txr->num_desc = adapter->num_tx_desc; 3406 3407 /* Initialize the TX lock */ 3408 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", 3409 device_get_nameunit(dev), txr->me); 3410 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF); 3411 3412 if (igb_dma_malloc(adapter, tsize, 3413 &txr->txdma, BUS_DMA_NOWAIT)) { 3414 device_printf(dev, 3415 "Unable to allocate TX Descriptor memory\n"); 3416 error = ENOMEM; 3417 goto err_tx_desc; 3418 } 3419 txr->tx_base = (union e1000_adv_tx_desc *)txr->txdma.dma_vaddr; 3420 bzero((void *)txr->tx_base, tsize); 3421 3422 /* Now allocate transmit buffers for the ring */ 3423 if (igb_allocate_transmit_buffers(txr)) { 3424 device_printf(dev, 3425 "Critical Failure setting up transmit buffers\n"); 3426 error = ENOMEM; 3427 goto err_tx_desc; 3428 } 3429#ifndef IGB_LEGACY_TX 3430 /* Allocate a buf ring */ 3431 txr->br = buf_ring_alloc(igb_buf_ring_size, M_DEVBUF, 3432 M_WAITOK, &txr->tx_mtx); 3433#endif 3434 } 3435 3436 /* 3437 * Next the RX queues... 3438 */ 3439 rsize = roundup2(adapter->num_rx_desc * 3440 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); 3441 for (int i = 0; i < adapter->num_queues; i++, rxconf++) { 3442 rxr = &adapter->rx_rings[i]; 3443 rxr->adapter = adapter; 3444 rxr->me = i; 3445 3446 /* Initialize the RX lock */ 3447 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", 3448 device_get_nameunit(dev), txr->me); 3449 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF); 3450 3451 if (igb_dma_malloc(adapter, rsize, 3452 &rxr->rxdma, BUS_DMA_NOWAIT)) { 3453 device_printf(dev, 3454 "Unable to allocate RxDescriptor memory\n"); 3455 error = ENOMEM; 3456 goto err_rx_desc; 3457 } 3458 rxr->rx_base = (union e1000_adv_rx_desc *)rxr->rxdma.dma_vaddr; 3459 bzero((void *)rxr->rx_base, rsize); 3460 3461 /* Allocate receive buffers for the ring*/ 3462 if (igb_allocate_receive_buffers(rxr)) { 3463 device_printf(dev, 3464 "Critical Failure setting up receive buffers\n"); 3465 error = ENOMEM; 3466 goto err_rx_desc; 3467 } 3468 } 3469 3470 /* 3471 ** Finally set up the queue holding structs 3472 */ 3473 for (int i = 0; i < adapter->num_queues; i++) { 3474 que = &adapter->queues[i]; 3475 que->adapter = adapter; 3476 que->txr = &adapter->tx_rings[i]; 3477 que->rxr = &adapter->rx_rings[i]; 3478 } 3479 3480 return (0); 3481 3482err_rx_desc: 3483 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--) 3484 igb_dma_free(adapter, &rxr->rxdma); 3485err_tx_desc: 3486 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--) 3487 igb_dma_free(adapter, &txr->txdma); 3488 free(adapter->rx_rings, M_DEVBUF); 3489rx_fail: 3490#ifndef IGB_LEGACY_TX 3491 buf_ring_free(txr->br, M_DEVBUF); 3492#endif 3493 free(adapter->tx_rings, M_DEVBUF); 3494tx_fail: 3495 free(adapter->queues, M_DEVBUF); 3496fail: 3497 return (error); 3498} 3499 3500/********************************************************************* 3501 * 3502 * Allocate memory for tx_buffer structures. The tx_buffer stores all 3503 * the information needed to transmit a packet on the wire. This is 3504 * called only once at attach, setup is done every reset. 3505 * 3506 **********************************************************************/ 3507static int 3508igb_allocate_transmit_buffers(struct tx_ring *txr) 3509{ 3510 struct adapter *adapter = txr->adapter; 3511 device_t dev = adapter->dev; 3512 struct igb_tx_buf *txbuf; 3513 int error, i; 3514 3515 /* 3516 * Setup DMA descriptor areas. 3517 */ 3518 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 3519 1, 0, /* alignment, bounds */ 3520 BUS_SPACE_MAXADDR, /* lowaddr */ 3521 BUS_SPACE_MAXADDR, /* highaddr */ 3522 NULL, NULL, /* filter, filterarg */ 3523 IGB_TSO_SIZE, /* maxsize */ 3524 IGB_MAX_SCATTER, /* nsegments */ 3525 PAGE_SIZE, /* maxsegsize */ 3526 0, /* flags */ 3527 NULL, /* lockfunc */ 3528 NULL, /* lockfuncarg */ 3529 &txr->txtag))) { 3530 device_printf(dev,"Unable to allocate TX DMA tag\n"); 3531 goto fail; 3532 } 3533 3534 if (!(txr->tx_buffers = 3535 (struct igb_tx_buf *) malloc(sizeof(struct igb_tx_buf) * 3536 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) { 3537 device_printf(dev, "Unable to allocate tx_buffer memory\n"); 3538 error = ENOMEM; 3539 goto fail; 3540 } 3541 3542 /* Create the descriptor buffer dma maps */ 3543 txbuf = txr->tx_buffers; 3544 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 3545 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map); 3546 if (error != 0) { 3547 device_printf(dev, "Unable to create TX DMA map\n"); 3548 goto fail; 3549 } 3550 } 3551 3552 return 0; 3553fail: 3554 /* We free all, it handles case where we are in the middle */ 3555 igb_free_transmit_structures(adapter); 3556 return (error); 3557} 3558 3559/********************************************************************* 3560 * 3561 * Initialize a transmit ring. 3562 * 3563 **********************************************************************/ 3564static void 3565igb_setup_transmit_ring(struct tx_ring *txr) 3566{ 3567 struct adapter *adapter = txr->adapter; 3568 struct igb_tx_buf *txbuf; 3569 int i; 3570#ifdef DEV_NETMAP 3571 struct netmap_adapter *na = NA(adapter->ifp); 3572 struct netmap_slot *slot; 3573#endif /* DEV_NETMAP */ 3574 3575 /* Clear the old descriptor contents */ 3576 IGB_TX_LOCK(txr); 3577#ifdef DEV_NETMAP 3578 slot = netmap_reset(na, NR_TX, txr->me, 0); 3579#endif /* DEV_NETMAP */ 3580 bzero((void *)txr->tx_base, 3581 (sizeof(union e1000_adv_tx_desc)) * adapter->num_tx_desc); 3582 /* Reset indices */ 3583 txr->next_avail_desc = 0; 3584 txr->next_to_clean = 0; 3585 3586 /* Free any existing tx buffers. */ 3587 txbuf = txr->tx_buffers; 3588 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) { 3589 if (txbuf->m_head != NULL) { 3590 bus_dmamap_sync(txr->txtag, txbuf->map, 3591 BUS_DMASYNC_POSTWRITE); 3592 bus_dmamap_unload(txr->txtag, txbuf->map); 3593 m_freem(txbuf->m_head); 3594 txbuf->m_head = NULL; 3595 } 3596#ifdef DEV_NETMAP 3597 if (slot) { 3598 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i); 3599 /* no need to set the address */ 3600 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si)); 3601 } 3602#endif /* DEV_NETMAP */ 3603 /* clear the watch index */ 3604 txbuf->eop = NULL; 3605 } 3606 3607 /* Set number of descriptors available */ 3608 txr->tx_avail = adapter->num_tx_desc; 3609 3610 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 3611 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3612 IGB_TX_UNLOCK(txr); 3613} 3614 3615/********************************************************************* 3616 * 3617 * Initialize all transmit rings. 3618 * 3619 **********************************************************************/ 3620static void 3621igb_setup_transmit_structures(struct adapter *adapter) 3622{ 3623 struct tx_ring *txr = adapter->tx_rings; 3624 3625 for (int i = 0; i < adapter->num_queues; i++, txr++) 3626 igb_setup_transmit_ring(txr); 3627 3628 return; 3629} 3630 3631/********************************************************************* 3632 * 3633 * Enable transmit unit. 3634 * 3635 **********************************************************************/ 3636static void 3637igb_initialize_transmit_units(struct adapter *adapter) 3638{ 3639 struct tx_ring *txr = adapter->tx_rings; 3640 struct e1000_hw *hw = &adapter->hw; 3641 u32 tctl, txdctl; 3642 3643 INIT_DEBUGOUT("igb_initialize_transmit_units: begin"); 3644 tctl = txdctl = 0; 3645 3646 /* Setup the Tx Descriptor Rings */ 3647 for (int i = 0; i < adapter->num_queues; i++, txr++) { 3648 u64 bus_addr = txr->txdma.dma_paddr; 3649 3650 E1000_WRITE_REG(hw, E1000_TDLEN(i), 3651 adapter->num_tx_desc * sizeof(struct e1000_tx_desc)); 3652 E1000_WRITE_REG(hw, E1000_TDBAH(i), 3653 (uint32_t)(bus_addr >> 32)); 3654 E1000_WRITE_REG(hw, E1000_TDBAL(i), 3655 (uint32_t)bus_addr); 3656 3657 /* Setup the HW Tx Head and Tail descriptor pointers */ 3658 E1000_WRITE_REG(hw, E1000_TDT(i), 0); 3659 E1000_WRITE_REG(hw, E1000_TDH(i), 0); 3660 3661 HW_DEBUGOUT2("Base = %x, Length = %x\n", 3662 E1000_READ_REG(hw, E1000_TDBAL(i)), 3663 E1000_READ_REG(hw, E1000_TDLEN(i))); 3664 3665 txr->queue_status = IGB_QUEUE_IDLE; 3666 3667 txdctl |= IGB_TX_PTHRESH; 3668 txdctl |= IGB_TX_HTHRESH << 8; 3669 txdctl |= IGB_TX_WTHRESH << 16; 3670 txdctl |= E1000_TXDCTL_QUEUE_ENABLE; 3671 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl); 3672 } 3673 3674 if (adapter->vf_ifp) 3675 return; 3676 3677 e1000_config_collision_dist(hw); 3678 3679 /* Program the Transmit Control Register */ 3680 tctl = E1000_READ_REG(hw, E1000_TCTL); 3681 tctl &= ~E1000_TCTL_CT; 3682 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN | 3683 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT)); 3684 3685 /* This write will effectively turn on the transmit unit. */ 3686 E1000_WRITE_REG(hw, E1000_TCTL, tctl); 3687} 3688 3689/********************************************************************* 3690 * 3691 * Free all transmit rings. 3692 * 3693 **********************************************************************/ 3694static void 3695igb_free_transmit_structures(struct adapter *adapter) 3696{ 3697 struct tx_ring *txr = adapter->tx_rings; 3698 3699 for (int i = 0; i < adapter->num_queues; i++, txr++) { 3700 IGB_TX_LOCK(txr); 3701 igb_free_transmit_buffers(txr); 3702 igb_dma_free(adapter, &txr->txdma); 3703 IGB_TX_UNLOCK(txr); 3704 IGB_TX_LOCK_DESTROY(txr); 3705 } 3706 free(adapter->tx_rings, M_DEVBUF); 3707} 3708 3709/********************************************************************* 3710 * 3711 * Free transmit ring related data structures. 3712 * 3713 **********************************************************************/ 3714static void 3715igb_free_transmit_buffers(struct tx_ring *txr) 3716{ 3717 struct adapter *adapter = txr->adapter; 3718 struct igb_tx_buf *tx_buffer; 3719 int i; 3720 3721 INIT_DEBUGOUT("free_transmit_ring: begin"); 3722 3723 if (txr->tx_buffers == NULL) 3724 return; 3725 3726 tx_buffer = txr->tx_buffers; 3727 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) { 3728 if (tx_buffer->m_head != NULL) { 3729 bus_dmamap_sync(txr->txtag, tx_buffer->map, 3730 BUS_DMASYNC_POSTWRITE); 3731 bus_dmamap_unload(txr->txtag, 3732 tx_buffer->map); 3733 m_freem(tx_buffer->m_head); 3734 tx_buffer->m_head = NULL; 3735 if (tx_buffer->map != NULL) { 3736 bus_dmamap_destroy(txr->txtag, 3737 tx_buffer->map); 3738 tx_buffer->map = NULL; 3739 } 3740 } else if (tx_buffer->map != NULL) { 3741 bus_dmamap_unload(txr->txtag, 3742 tx_buffer->map); 3743 bus_dmamap_destroy(txr->txtag, 3744 tx_buffer->map); 3745 tx_buffer->map = NULL; 3746 } 3747 } 3748#ifndef IGB_LEGACY_TX 3749 if (txr->br != NULL) 3750 buf_ring_free(txr->br, M_DEVBUF); 3751#endif 3752 if (txr->tx_buffers != NULL) { 3753 free(txr->tx_buffers, M_DEVBUF); 3754 txr->tx_buffers = NULL; 3755 } 3756 if (txr->txtag != NULL) { 3757 bus_dma_tag_destroy(txr->txtag); 3758 txr->txtag = NULL; 3759 } 3760 return; 3761} 3762 3763/********************************************************************** 3764 * 3765 * Setup work for hardware segmentation offload (TSO) on 3766 * adapters using advanced tx descriptors 3767 * 3768 **********************************************************************/ 3769static int 3770igb_tso_setup(struct tx_ring *txr, struct mbuf *mp, 3771 u32 *cmd_type_len, u32 *olinfo_status) 3772{ 3773 struct adapter *adapter = txr->adapter; 3774 struct e1000_adv_tx_context_desc *TXD; 3775 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0; 3776 u32 mss_l4len_idx = 0, paylen; 3777 u16 vtag = 0, eh_type; 3778 int ctxd, ehdrlen, ip_hlen, tcp_hlen; 3779 struct ether_vlan_header *eh; 3780#ifdef INET6 3781 struct ip6_hdr *ip6; 3782#endif 3783#ifdef INET 3784 struct ip *ip; 3785#endif 3786 struct tcphdr *th; 3787 3788 3789 /* 3790 * Determine where frame payload starts. 3791 * Jump over vlan headers if already present 3792 */ 3793 eh = mtod(mp, struct ether_vlan_header *); 3794 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 3795 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3796 eh_type = eh->evl_proto; 3797 } else { 3798 ehdrlen = ETHER_HDR_LEN; 3799 eh_type = eh->evl_encap_proto; 3800 } 3801 3802 switch (ntohs(eh_type)) { 3803#ifdef INET6 3804 case ETHERTYPE_IPV6: 3805 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 3806 /* XXX-BZ For now we do not pretend to support ext. hdrs. */ 3807 if (ip6->ip6_nxt != IPPROTO_TCP) 3808 return (ENXIO); 3809 ip_hlen = sizeof(struct ip6_hdr); 3810 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 3811 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); 3812 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 3813 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; 3814 break; 3815#endif 3816#ifdef INET 3817 case ETHERTYPE_IP: 3818 ip = (struct ip *)(mp->m_data + ehdrlen); 3819 if (ip->ip_p != IPPROTO_TCP) 3820 return (ENXIO); 3821 ip->ip_sum = 0; 3822 ip_hlen = ip->ip_hl << 2; 3823 th = (struct tcphdr *)((caddr_t)ip + ip_hlen); 3824 th->th_sum = in_pseudo(ip->ip_src.s_addr, 3825 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 3826 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 3827 /* Tell transmit desc to also do IPv4 checksum. */ 3828 *olinfo_status |= E1000_TXD_POPTS_IXSM << 8; 3829 break; 3830#endif 3831 default: 3832 panic("%s: CSUM_TSO but no supported IP version (0x%04x)", 3833 __func__, ntohs(eh_type)); 3834 break; 3835 } 3836 3837 ctxd = txr->next_avail_desc; 3838 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; 3839 3840 tcp_hlen = th->th_off << 2; 3841 3842 /* This is used in the transmit desc in encap */ 3843 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen; 3844 3845 /* VLAN MACLEN IPLEN */ 3846 if (mp->m_flags & M_VLANTAG) { 3847 vtag = htole16(mp->m_pkthdr.ether_vtag); 3848 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); 3849 } 3850 3851 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; 3852 vlan_macip_lens |= ip_hlen; 3853 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 3854 3855 /* ADV DTYPE TUCMD */ 3856 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 3857 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 3858 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 3859 3860 /* MSS L4LEN IDX */ 3861 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << E1000_ADVTXD_MSS_SHIFT); 3862 mss_l4len_idx |= (tcp_hlen << E1000_ADVTXD_L4LEN_SHIFT); 3863 /* 82575 needs the queue index added */ 3864 if (adapter->hw.mac.type == e1000_82575) 3865 mss_l4len_idx |= txr->me << 4; 3866 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 3867 3868 TXD->seqnum_seed = htole32(0); 3869 3870 if (++ctxd == txr->num_desc) 3871 ctxd = 0; 3872 3873 txr->tx_avail--; 3874 txr->next_avail_desc = ctxd; 3875 *cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 3876 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 3877 *olinfo_status |= paylen << E1000_ADVTXD_PAYLEN_SHIFT; 3878 ++txr->tso_tx; 3879 return (0); 3880} 3881 3882/********************************************************************* 3883 * 3884 * Advanced Context Descriptor setup for VLAN, CSUM or TSO 3885 * 3886 **********************************************************************/ 3887 3888static int 3889igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp, 3890 u32 *cmd_type_len, u32 *olinfo_status) 3891{ 3892 struct e1000_adv_tx_context_desc *TXD; 3893 struct adapter *adapter = txr->adapter; 3894 struct ether_vlan_header *eh; 3895 struct ip *ip; 3896 struct ip6_hdr *ip6; 3897 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0, mss_l4len_idx = 0; 3898 int ehdrlen, ip_hlen = 0; 3899 u16 etype; 3900 u8 ipproto = 0; 3901 int offload = TRUE; 3902 int ctxd = txr->next_avail_desc; 3903 u16 vtag = 0; 3904 3905 /* First check if TSO is to be used */ 3906 if (mp->m_pkthdr.csum_flags & CSUM_TSO) 3907 return (igb_tso_setup(txr, mp, cmd_type_len, olinfo_status)); 3908 3909 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0) 3910 offload = FALSE; 3911 3912 /* Indicate the whole packet as payload when not doing TSO */ 3913 *olinfo_status |= mp->m_pkthdr.len << E1000_ADVTXD_PAYLEN_SHIFT; 3914 3915 /* Now ready a context descriptor */ 3916 TXD = (struct e1000_adv_tx_context_desc *) &txr->tx_base[ctxd]; 3917 3918 /* 3919 ** In advanced descriptors the vlan tag must 3920 ** be placed into the context descriptor. Hence 3921 ** we need to make one even if not doing offloads. 3922 */ 3923 if (mp->m_flags & M_VLANTAG) { 3924 vtag = htole16(mp->m_pkthdr.ether_vtag); 3925 vlan_macip_lens |= (vtag << E1000_ADVTXD_VLAN_SHIFT); 3926 } else if (offload == FALSE) /* ... no offload to do */ 3927 return (0); 3928 3929 /* 3930 * Determine where frame payload starts. 3931 * Jump over vlan headers if already present, 3932 * helpful for QinQ too. 3933 */ 3934 eh = mtod(mp, struct ether_vlan_header *); 3935 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 3936 etype = ntohs(eh->evl_proto); 3937 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3938 } else { 3939 etype = ntohs(eh->evl_encap_proto); 3940 ehdrlen = ETHER_HDR_LEN; 3941 } 3942 3943 /* Set the ether header length */ 3944 vlan_macip_lens |= ehdrlen << E1000_ADVTXD_MACLEN_SHIFT; 3945 3946 switch (etype) { 3947 case ETHERTYPE_IP: 3948 ip = (struct ip *)(mp->m_data + ehdrlen); 3949 ip_hlen = ip->ip_hl << 2; 3950 ipproto = ip->ip_p; 3951 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 3952 break; 3953 case ETHERTYPE_IPV6: 3954 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); 3955 ip_hlen = sizeof(struct ip6_hdr); 3956 /* XXX-BZ this will go badly in case of ext hdrs. */ 3957 ipproto = ip6->ip6_nxt; 3958 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; 3959 break; 3960 default: 3961 offload = FALSE; 3962 break; 3963 } 3964 3965 vlan_macip_lens |= ip_hlen; 3966 type_tucmd_mlhl |= E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 3967 3968 switch (ipproto) { 3969 case IPPROTO_TCP: 3970#if __FreeBSD_version >= 1000000 3971 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) 3972#else 3973 if (mp->m_pkthdr.csum_flags & CSUM_TCP) 3974#endif 3975 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 3976 break; 3977 case IPPROTO_UDP: 3978#if __FreeBSD_version >= 1000000 3979 if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP)) 3980#else 3981 if (mp->m_pkthdr.csum_flags & CSUM_UDP) 3982#endif 3983 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP; 3984 break; 3985 3986#if __FreeBSD_version >= 800000 3987 case IPPROTO_SCTP: 3988#if __FreeBSD_version >= 1000000 3989 if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP)) 3990#else 3991 if (mp->m_pkthdr.csum_flags & CSUM_SCTP) 3992#endif 3993 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP; 3994 break; 3995#endif 3996 default: 3997 offload = FALSE; 3998 break; 3999 } 4000 4001 if (offload) /* For the TX descriptor setup */ 4002 *olinfo_status |= E1000_TXD_POPTS_TXSM << 8; 4003 4004 /* 82575 needs the queue index added */ 4005 if (adapter->hw.mac.type == e1000_82575) 4006 mss_l4len_idx = txr->me << 4; 4007 4008 /* Now copy bits into descriptor */ 4009 TXD->vlan_macip_lens = htole32(vlan_macip_lens); 4010 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl); 4011 TXD->seqnum_seed = htole32(0); 4012 TXD->mss_l4len_idx = htole32(mss_l4len_idx); 4013 4014 /* We've consumed the first desc, adjust counters */ 4015 if (++ctxd == txr->num_desc) 4016 ctxd = 0; 4017 txr->next_avail_desc = ctxd; 4018 --txr->tx_avail; 4019 4020 return (0); 4021} 4022 4023/********************************************************************** 4024 * 4025 * Examine each tx_buffer in the used queue. If the hardware is done 4026 * processing the packet then free associated resources. The 4027 * tx_buffer is put back on the free queue. 4028 * 4029 * TRUE return means there's work in the ring to clean, FALSE its empty. 4030 **********************************************************************/ 4031static bool 4032igb_txeof(struct tx_ring *txr) 4033{ 4034 struct adapter *adapter = txr->adapter; 4035#ifdef DEV_NETMAP 4036 struct ifnet *ifp = adapter->ifp; 4037#endif /* DEV_NETMAP */ 4038 u32 work, processed = 0; 4039 int limit = adapter->tx_process_limit; 4040 struct igb_tx_buf *buf; 4041 union e1000_adv_tx_desc *txd; 4042 4043 mtx_assert(&txr->tx_mtx, MA_OWNED); 4044 4045#ifdef DEV_NETMAP 4046 if (netmap_tx_irq(ifp, txr->me)) 4047 return (FALSE); 4048#endif /* DEV_NETMAP */ 4049 4050 if (txr->tx_avail == txr->num_desc) { 4051 txr->queue_status = IGB_QUEUE_IDLE; 4052 return FALSE; 4053 } 4054 4055 /* Get work starting point */ 4056 work = txr->next_to_clean; 4057 buf = &txr->tx_buffers[work]; 4058 txd = &txr->tx_base[work]; 4059 work -= txr->num_desc; /* The distance to ring end */ 4060 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 4061 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4062 do { 4063 union e1000_adv_tx_desc *eop = buf->eop; 4064 if (eop == NULL) /* No work */ 4065 break; 4066 4067 if ((eop->wb.status & E1000_TXD_STAT_DD) == 0) 4068 break; /* I/O not complete */ 4069 4070 if (buf->m_head) { 4071 txr->bytes += 4072 buf->m_head->m_pkthdr.len; 4073 bus_dmamap_sync(txr->txtag, 4074 buf->map, 4075 BUS_DMASYNC_POSTWRITE); 4076 bus_dmamap_unload(txr->txtag, 4077 buf->map); 4078 m_freem(buf->m_head); 4079 buf->m_head = NULL; 4080 } 4081 buf->eop = NULL; 4082 ++txr->tx_avail; 4083 4084 /* We clean the range if multi segment */ 4085 while (txd != eop) { 4086 ++txd; 4087 ++buf; 4088 ++work; 4089 /* wrap the ring? */ 4090 if (__predict_false(!work)) { 4091 work -= txr->num_desc; 4092 buf = txr->tx_buffers; 4093 txd = txr->tx_base; 4094 } 4095 if (buf->m_head) { 4096 txr->bytes += 4097 buf->m_head->m_pkthdr.len; 4098 bus_dmamap_sync(txr->txtag, 4099 buf->map, 4100 BUS_DMASYNC_POSTWRITE); 4101 bus_dmamap_unload(txr->txtag, 4102 buf->map); 4103 m_freem(buf->m_head); 4104 buf->m_head = NULL; 4105 } 4106 ++txr->tx_avail; 4107 buf->eop = NULL; 4108 4109 } 4110 ++txr->packets; 4111 ++processed; 4112 txr->watchdog_time = ticks; 4113 4114 /* Try the next packet */ 4115 ++txd; 4116 ++buf; 4117 ++work; 4118 /* reset with a wrap */ 4119 if (__predict_false(!work)) { 4120 work -= txr->num_desc; 4121 buf = txr->tx_buffers; 4122 txd = txr->tx_base; 4123 } 4124 prefetch(txd); 4125 } while (__predict_true(--limit)); 4126 4127 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map, 4128 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4129 4130 work += txr->num_desc; 4131 txr->next_to_clean = work; 4132 4133 /* 4134 ** Watchdog calculation, we know there's 4135 ** work outstanding or the first return 4136 ** would have been taken, so none processed 4137 ** for too long indicates a hang. 4138 */ 4139 if ((!processed) && ((ticks - txr->watchdog_time) > IGB_WATCHDOG)) 4140 txr->queue_status |= IGB_QUEUE_HUNG; 4141 4142 if (txr->tx_avail >= IGB_QUEUE_THRESHOLD) 4143 txr->queue_status &= ~IGB_QUEUE_DEPLETED; 4144 4145 if (txr->tx_avail == txr->num_desc) { 4146 txr->queue_status = IGB_QUEUE_IDLE; 4147 return (FALSE); 4148 } 4149 4150 return (TRUE); 4151} 4152 4153/********************************************************************* 4154 * 4155 * Refresh mbuf buffers for RX descriptor rings 4156 * - now keeps its own state so discards due to resource 4157 * exhaustion are unnecessary, if an mbuf cannot be obtained 4158 * it just returns, keeping its placeholder, thus it can simply 4159 * be recalled to try again. 4160 * 4161 **********************************************************************/ 4162static void 4163igb_refresh_mbufs(struct rx_ring *rxr, int limit) 4164{ 4165 struct adapter *adapter = rxr->adapter; 4166 bus_dma_segment_t hseg[1]; 4167 bus_dma_segment_t pseg[1]; 4168 struct igb_rx_buf *rxbuf; 4169 struct mbuf *mh, *mp; 4170 int i, j, nsegs, error; 4171 bool refreshed = FALSE; 4172 4173 i = j = rxr->next_to_refresh; 4174 /* 4175 ** Get one descriptor beyond 4176 ** our work mark to control 4177 ** the loop. 4178 */ 4179 if (++j == adapter->num_rx_desc) 4180 j = 0; 4181 4182 while (j != limit) { 4183 rxbuf = &rxr->rx_buffers[i]; 4184 /* No hdr mbuf used with header split off */ 4185 if (rxr->hdr_split == FALSE) 4186 goto no_split; 4187 if (rxbuf->m_head == NULL) { 4188 mh = m_gethdr(M_NOWAIT, MT_DATA); 4189 if (mh == NULL) 4190 goto update; 4191 } else 4192 mh = rxbuf->m_head; 4193 4194 mh->m_pkthdr.len = mh->m_len = MHLEN; 4195 mh->m_len = MHLEN; 4196 mh->m_flags |= M_PKTHDR; 4197 /* Get the memory mapping */ 4198 error = bus_dmamap_load_mbuf_sg(rxr->htag, 4199 rxbuf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT); 4200 if (error != 0) { 4201 printf("Refresh mbufs: hdr dmamap load" 4202 " failure - %d\n", error); 4203 m_free(mh); 4204 rxbuf->m_head = NULL; 4205 goto update; 4206 } 4207 rxbuf->m_head = mh; 4208 bus_dmamap_sync(rxr->htag, rxbuf->hmap, 4209 BUS_DMASYNC_PREREAD); 4210 rxr->rx_base[i].read.hdr_addr = 4211 htole64(hseg[0].ds_addr); 4212no_split: 4213 if (rxbuf->m_pack == NULL) { 4214 mp = m_getjcl(M_NOWAIT, MT_DATA, 4215 M_PKTHDR, adapter->rx_mbuf_sz); 4216 if (mp == NULL) 4217 goto update; 4218 } else 4219 mp = rxbuf->m_pack; 4220 4221 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; 4222 /* Get the memory mapping */ 4223 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 4224 rxbuf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT); 4225 if (error != 0) { 4226 printf("Refresh mbufs: payload dmamap load" 4227 " failure - %d\n", error); 4228 m_free(mp); 4229 rxbuf->m_pack = NULL; 4230 goto update; 4231 } 4232 rxbuf->m_pack = mp; 4233 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 4234 BUS_DMASYNC_PREREAD); 4235 rxr->rx_base[i].read.pkt_addr = 4236 htole64(pseg[0].ds_addr); 4237 refreshed = TRUE; /* I feel wefreshed :) */ 4238 4239 i = j; /* our next is precalculated */ 4240 rxr->next_to_refresh = i; 4241 if (++j == adapter->num_rx_desc) 4242 j = 0; 4243 } 4244update: 4245 if (refreshed) /* update tail */ 4246 E1000_WRITE_REG(&adapter->hw, 4247 E1000_RDT(rxr->me), rxr->next_to_refresh); 4248 return; 4249} 4250 4251 4252/********************************************************************* 4253 * 4254 * Allocate memory for rx_buffer structures. Since we use one 4255 * rx_buffer per received packet, the maximum number of rx_buffer's 4256 * that we'll need is equal to the number of receive descriptors 4257 * that we've allocated. 4258 * 4259 **********************************************************************/ 4260static int 4261igb_allocate_receive_buffers(struct rx_ring *rxr) 4262{ 4263 struct adapter *adapter = rxr->adapter; 4264 device_t dev = adapter->dev; 4265 struct igb_rx_buf *rxbuf; 4266 int i, bsize, error; 4267 4268 bsize = sizeof(struct igb_rx_buf) * adapter->num_rx_desc; 4269 if (!(rxr->rx_buffers = 4270 (struct igb_rx_buf *) malloc(bsize, 4271 M_DEVBUF, M_NOWAIT | M_ZERO))) { 4272 device_printf(dev, "Unable to allocate rx_buffer memory\n"); 4273 error = ENOMEM; 4274 goto fail; 4275 } 4276 4277 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 4278 1, 0, /* alignment, bounds */ 4279 BUS_SPACE_MAXADDR, /* lowaddr */ 4280 BUS_SPACE_MAXADDR, /* highaddr */ 4281 NULL, NULL, /* filter, filterarg */ 4282 MSIZE, /* maxsize */ 4283 1, /* nsegments */ 4284 MSIZE, /* maxsegsize */ 4285 0, /* flags */ 4286 NULL, /* lockfunc */ 4287 NULL, /* lockfuncarg */ 4288 &rxr->htag))) { 4289 device_printf(dev, "Unable to create RX DMA tag\n"); 4290 goto fail; 4291 } 4292 4293 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), 4294 1, 0, /* alignment, bounds */ 4295 BUS_SPACE_MAXADDR, /* lowaddr */ 4296 BUS_SPACE_MAXADDR, /* highaddr */ 4297 NULL, NULL, /* filter, filterarg */ 4298 MJUM9BYTES, /* maxsize */ 4299 1, /* nsegments */ 4300 MJUM9BYTES, /* maxsegsize */ 4301 0, /* flags */ 4302 NULL, /* lockfunc */ 4303 NULL, /* lockfuncarg */ 4304 &rxr->ptag))) { 4305 device_printf(dev, "Unable to create RX payload DMA tag\n"); 4306 goto fail; 4307 } 4308 4309 for (i = 0; i < adapter->num_rx_desc; i++) { 4310 rxbuf = &rxr->rx_buffers[i]; 4311 error = bus_dmamap_create(rxr->htag, 0, &rxbuf->hmap); 4312 if (error) { 4313 device_printf(dev, 4314 "Unable to create RX head DMA maps\n"); 4315 goto fail; 4316 } 4317 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap); 4318 if (error) { 4319 device_printf(dev, 4320 "Unable to create RX packet DMA maps\n"); 4321 goto fail; 4322 } 4323 } 4324 4325 return (0); 4326 4327fail: 4328 /* Frees all, but can handle partial completion */ 4329 igb_free_receive_structures(adapter); 4330 return (error); 4331} 4332 4333 4334static void 4335igb_free_receive_ring(struct rx_ring *rxr) 4336{ 4337 struct adapter *adapter = rxr->adapter; 4338 struct igb_rx_buf *rxbuf; 4339 4340 4341 for (int i = 0; i < adapter->num_rx_desc; i++) { 4342 rxbuf = &rxr->rx_buffers[i]; 4343 if (rxbuf->m_head != NULL) { 4344 bus_dmamap_sync(rxr->htag, rxbuf->hmap, 4345 BUS_DMASYNC_POSTREAD); 4346 bus_dmamap_unload(rxr->htag, rxbuf->hmap); 4347 rxbuf->m_head->m_flags |= M_PKTHDR; 4348 m_freem(rxbuf->m_head); 4349 } 4350 if (rxbuf->m_pack != NULL) { 4351 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 4352 BUS_DMASYNC_POSTREAD); 4353 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 4354 rxbuf->m_pack->m_flags |= M_PKTHDR; 4355 m_freem(rxbuf->m_pack); 4356 } 4357 rxbuf->m_head = NULL; 4358 rxbuf->m_pack = NULL; 4359 } 4360} 4361 4362 4363/********************************************************************* 4364 * 4365 * Initialize a receive ring and its buffers. 4366 * 4367 **********************************************************************/ 4368static int 4369igb_setup_receive_ring(struct rx_ring *rxr) 4370{ 4371 struct adapter *adapter; 4372 struct ifnet *ifp; 4373 device_t dev; 4374 struct igb_rx_buf *rxbuf; 4375 bus_dma_segment_t pseg[1], hseg[1]; 4376 struct lro_ctrl *lro = &rxr->lro; 4377 int rsize, nsegs, error = 0; 4378#ifdef DEV_NETMAP 4379 struct netmap_adapter *na = NA(rxr->adapter->ifp); 4380 struct netmap_slot *slot; 4381#endif /* DEV_NETMAP */ 4382 4383 adapter = rxr->adapter; 4384 dev = adapter->dev; 4385 ifp = adapter->ifp; 4386 4387 /* Clear the ring contents */ 4388 IGB_RX_LOCK(rxr); 4389#ifdef DEV_NETMAP 4390 slot = netmap_reset(na, NR_RX, rxr->me, 0); 4391#endif /* DEV_NETMAP */ 4392 rsize = roundup2(adapter->num_rx_desc * 4393 sizeof(union e1000_adv_rx_desc), IGB_DBA_ALIGN); 4394 bzero((void *)rxr->rx_base, rsize); 4395 4396 /* 4397 ** Free current RX buffer structures and their mbufs 4398 */ 4399 igb_free_receive_ring(rxr); 4400 4401 /* Configure for header split? */ 4402 if (igb_header_split) 4403 rxr->hdr_split = TRUE; 4404 4405 /* Now replenish the ring mbufs */ 4406 for (int j = 0; j < adapter->num_rx_desc; ++j) { 4407 struct mbuf *mh, *mp; 4408 4409 rxbuf = &rxr->rx_buffers[j]; 4410#ifdef DEV_NETMAP 4411 if (slot) { 4412 /* slot sj is mapped to the j-th NIC-ring entry */ 4413 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j); 4414 uint64_t paddr; 4415 void *addr; 4416 4417 addr = PNMB(na, slot + sj, &paddr); 4418 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr); 4419 /* Update descriptor */ 4420 rxr->rx_base[j].read.pkt_addr = htole64(paddr); 4421 continue; 4422 } 4423#endif /* DEV_NETMAP */ 4424 if (rxr->hdr_split == FALSE) 4425 goto skip_head; 4426 4427 /* First the header */ 4428 rxbuf->m_head = m_gethdr(M_NOWAIT, MT_DATA); 4429 if (rxbuf->m_head == NULL) { 4430 error = ENOBUFS; 4431 goto fail; 4432 } 4433 m_adj(rxbuf->m_head, ETHER_ALIGN); 4434 mh = rxbuf->m_head; 4435 mh->m_len = mh->m_pkthdr.len = MHLEN; 4436 mh->m_flags |= M_PKTHDR; 4437 /* Get the memory mapping */ 4438 error = bus_dmamap_load_mbuf_sg(rxr->htag, 4439 rxbuf->hmap, rxbuf->m_head, hseg, 4440 &nsegs, BUS_DMA_NOWAIT); 4441 if (error != 0) /* Nothing elegant to do here */ 4442 goto fail; 4443 bus_dmamap_sync(rxr->htag, 4444 rxbuf->hmap, BUS_DMASYNC_PREREAD); 4445 /* Update descriptor */ 4446 rxr->rx_base[j].read.hdr_addr = htole64(hseg[0].ds_addr); 4447 4448skip_head: 4449 /* Now the payload cluster */ 4450 rxbuf->m_pack = m_getjcl(M_NOWAIT, MT_DATA, 4451 M_PKTHDR, adapter->rx_mbuf_sz); 4452 if (rxbuf->m_pack == NULL) { 4453 error = ENOBUFS; 4454 goto fail; 4455 } 4456 mp = rxbuf->m_pack; 4457 mp->m_pkthdr.len = mp->m_len = adapter->rx_mbuf_sz; 4458 /* Get the memory mapping */ 4459 error = bus_dmamap_load_mbuf_sg(rxr->ptag, 4460 rxbuf->pmap, mp, pseg, 4461 &nsegs, BUS_DMA_NOWAIT); 4462 if (error != 0) 4463 goto fail; 4464 bus_dmamap_sync(rxr->ptag, 4465 rxbuf->pmap, BUS_DMASYNC_PREREAD); 4466 /* Update descriptor */ 4467 rxr->rx_base[j].read.pkt_addr = htole64(pseg[0].ds_addr); 4468 } 4469 4470 /* Setup our descriptor indices */ 4471 rxr->next_to_check = 0; 4472 rxr->next_to_refresh = adapter->num_rx_desc - 1; 4473 rxr->lro_enabled = FALSE; 4474 rxr->rx_split_packets = 0; 4475 rxr->rx_bytes = 0; 4476 4477 rxr->fmp = NULL; 4478 rxr->lmp = NULL; 4479 4480 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4481 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4482 4483 /* 4484 ** Now set up the LRO interface, we 4485 ** also only do head split when LRO 4486 ** is enabled, since so often they 4487 ** are undesirable in similar setups. 4488 */ 4489 if (ifp->if_capenable & IFCAP_LRO) { 4490 error = tcp_lro_init(lro); 4491 if (error) { 4492 device_printf(dev, "LRO Initialization failed!\n"); 4493 goto fail; 4494 } 4495 INIT_DEBUGOUT("RX LRO Initialized\n"); 4496 rxr->lro_enabled = TRUE; 4497 lro->ifp = adapter->ifp; 4498 } 4499 4500 IGB_RX_UNLOCK(rxr); 4501 return (0); 4502 4503fail: 4504 igb_free_receive_ring(rxr); 4505 IGB_RX_UNLOCK(rxr); 4506 return (error); 4507} 4508 4509 4510/********************************************************************* 4511 * 4512 * Initialize all receive rings. 4513 * 4514 **********************************************************************/ 4515static int 4516igb_setup_receive_structures(struct adapter *adapter) 4517{ 4518 struct rx_ring *rxr = adapter->rx_rings; 4519 int i; 4520 4521 for (i = 0; i < adapter->num_queues; i++, rxr++) 4522 if (igb_setup_receive_ring(rxr)) 4523 goto fail; 4524 4525 return (0); 4526fail: 4527 /* 4528 * Free RX buffers allocated so far, we will only handle 4529 * the rings that completed, the failing case will have 4530 * cleaned up for itself. 'i' is the endpoint. 4531 */ 4532 for (int j = 0; j < i; ++j) { 4533 rxr = &adapter->rx_rings[j]; 4534 IGB_RX_LOCK(rxr); 4535 igb_free_receive_ring(rxr); 4536 IGB_RX_UNLOCK(rxr); 4537 } 4538 4539 return (ENOBUFS); 4540} 4541 4542/* 4543 * Initialise the RSS mapping for NICs that support multiple transmit/ 4544 * receive rings. 4545 */ 4546static void 4547igb_initialise_rss_mapping(struct adapter *adapter) 4548{ 4549 struct e1000_hw *hw = &adapter->hw; 4550 int i; 4551 int queue_id; 4552 u32 reta; 4553 u32 rss_key[10], mrqc, shift = 0; 4554 4555 /* XXX? */ 4556 if (adapter->hw.mac.type == e1000_82575) 4557 shift = 6; 4558 4559 /* 4560 * The redirection table controls which destination 4561 * queue each bucket redirects traffic to. 4562 * Each DWORD represents four queues, with the LSB 4563 * being the first queue in the DWORD. 4564 * 4565 * This just allocates buckets to queues using round-robin 4566 * allocation. 4567 * 4568 * NOTE: It Just Happens to line up with the default 4569 * RSS allocation method. 4570 */ 4571 4572 /* Warning FM follows */ 4573 reta = 0; 4574 for (i = 0; i < 128; i++) { 4575#ifdef RSS 4576 queue_id = rss_get_indirection_to_bucket(i); 4577 /* 4578 * If we have more queues than buckets, we'll 4579 * end up mapping buckets to a subset of the 4580 * queues. 4581 * 4582 * If we have more buckets than queues, we'll 4583 * end up instead assigning multiple buckets 4584 * to queues. 4585 * 4586 * Both are suboptimal, but we need to handle 4587 * the case so we don't go out of bounds 4588 * indexing arrays and such. 4589 */ 4590 queue_id = queue_id % adapter->num_queues; 4591#else 4592 queue_id = (i % adapter->num_queues); 4593#endif 4594 /* Adjust if required */ 4595 queue_id = queue_id << shift; 4596 4597 /* 4598 * The low 8 bits are for hash value (n+0); 4599 * The next 8 bits are for hash value (n+1), etc. 4600 */ 4601 reta = reta >> 8; 4602 reta = reta | ( ((uint32_t) queue_id) << 24); 4603 if ((i & 3) == 3) { 4604 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta); 4605 reta = 0; 4606 } 4607 } 4608 4609 /* Now fill in hash table */ 4610 4611 /* 4612 * MRQC: Multiple Receive Queues Command 4613 * Set queuing to RSS control, number depends on the device. 4614 */ 4615 mrqc = E1000_MRQC_ENABLE_RSS_8Q; 4616 4617#ifdef RSS 4618 /* XXX ew typecasting */ 4619 rss_getkey((uint8_t *) &rss_key); 4620#else 4621 arc4rand(&rss_key, sizeof(rss_key), 0); 4622#endif 4623 for (i = 0; i < 10; i++) 4624 E1000_WRITE_REG_ARRAY(hw, 4625 E1000_RSSRK(0), i, rss_key[i]); 4626 4627 /* 4628 * Configure the RSS fields to hash upon. 4629 */ 4630 mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 | 4631 E1000_MRQC_RSS_FIELD_IPV4_TCP); 4632 mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 | 4633 E1000_MRQC_RSS_FIELD_IPV6_TCP); 4634 mrqc |=( E1000_MRQC_RSS_FIELD_IPV4_UDP | 4635 E1000_MRQC_RSS_FIELD_IPV6_UDP); 4636 mrqc |=( E1000_MRQC_RSS_FIELD_IPV6_UDP_EX | 4637 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX); 4638 4639 E1000_WRITE_REG(hw, E1000_MRQC, mrqc); 4640} 4641 4642/********************************************************************* 4643 * 4644 * Enable receive unit. 4645 * 4646 **********************************************************************/ 4647static void 4648igb_initialize_receive_units(struct adapter *adapter) 4649{ 4650 struct rx_ring *rxr = adapter->rx_rings; 4651 struct ifnet *ifp = adapter->ifp; 4652 struct e1000_hw *hw = &adapter->hw; 4653 u32 rctl, rxcsum, psize, srrctl = 0; 4654 4655 INIT_DEBUGOUT("igb_initialize_receive_unit: begin"); 4656 4657 /* 4658 * Make sure receives are disabled while setting 4659 * up the descriptor ring 4660 */ 4661 rctl = E1000_READ_REG(hw, E1000_RCTL); 4662 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); 4663 4664 /* 4665 ** Set up for header split 4666 */ 4667 if (igb_header_split) { 4668 /* Use a standard mbuf for the header */ 4669 srrctl |= IGB_HDR_BUF << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; 4670 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; 4671 } else 4672 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; 4673 4674 /* 4675 ** Set up for jumbo frames 4676 */ 4677 if (ifp->if_mtu > ETHERMTU) { 4678 rctl |= E1000_RCTL_LPE; 4679 if (adapter->rx_mbuf_sz == MJUMPAGESIZE) { 4680 srrctl |= 4096 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 4681 rctl |= E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX; 4682 } else if (adapter->rx_mbuf_sz > MJUMPAGESIZE) { 4683 srrctl |= 8192 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 4684 rctl |= E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX; 4685 } 4686 /* Set maximum packet len */ 4687 psize = adapter->max_frame_size; 4688 /* are we on a vlan? */ 4689 if (adapter->ifp->if_vlantrunk != NULL) 4690 psize += VLAN_TAG_SIZE; 4691 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, psize); 4692 } else { 4693 rctl &= ~E1000_RCTL_LPE; 4694 srrctl |= 2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT; 4695 rctl |= E1000_RCTL_SZ_2048; 4696 } 4697 4698 /* 4699 * If TX flow control is disabled and there's >1 queue defined, 4700 * enable DROP. 4701 * 4702 * This drops frames rather than hanging the RX MAC for all queues. 4703 */ 4704 if ((adapter->num_queues > 1) && 4705 (adapter->fc == e1000_fc_none || 4706 adapter->fc == e1000_fc_rx_pause)) { 4707 srrctl |= E1000_SRRCTL_DROP_EN; 4708 } 4709 4710 /* Setup the Base and Length of the Rx Descriptor Rings */ 4711 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 4712 u64 bus_addr = rxr->rxdma.dma_paddr; 4713 u32 rxdctl; 4714 4715 E1000_WRITE_REG(hw, E1000_RDLEN(i), 4716 adapter->num_rx_desc * sizeof(struct e1000_rx_desc)); 4717 E1000_WRITE_REG(hw, E1000_RDBAH(i), 4718 (uint32_t)(bus_addr >> 32)); 4719 E1000_WRITE_REG(hw, E1000_RDBAL(i), 4720 (uint32_t)bus_addr); 4721 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl); 4722 /* Enable this Queue */ 4723 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i)); 4724 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; 4725 rxdctl &= 0xFFF00000; 4726 rxdctl |= IGB_RX_PTHRESH; 4727 rxdctl |= IGB_RX_HTHRESH << 8; 4728 rxdctl |= IGB_RX_WTHRESH << 16; 4729 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl); 4730 } 4731 4732 /* 4733 ** Setup for RX MultiQueue 4734 */ 4735 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); 4736 if (adapter->num_queues >1) { 4737 4738 /* rss setup */ 4739 igb_initialise_rss_mapping(adapter); 4740 4741 /* 4742 ** NOTE: Receive Full-Packet Checksum Offload 4743 ** is mutually exclusive with Multiqueue. However 4744 ** this is not the same as TCP/IP checksums which 4745 ** still work. 4746 */ 4747 rxcsum |= E1000_RXCSUM_PCSD; 4748#if __FreeBSD_version >= 800000 4749 /* For SCTP Offload */ 4750 if ((hw->mac.type != e1000_82575) && 4751 (ifp->if_capenable & IFCAP_RXCSUM)) 4752 rxcsum |= E1000_RXCSUM_CRCOFL; 4753#endif 4754 } else { 4755 /* Non RSS setup */ 4756 if (ifp->if_capenable & IFCAP_RXCSUM) { 4757 rxcsum |= E1000_RXCSUM_IPPCSE; 4758#if __FreeBSD_version >= 800000 4759 if (adapter->hw.mac.type != e1000_82575) 4760 rxcsum |= E1000_RXCSUM_CRCOFL; 4761#endif 4762 } else 4763 rxcsum &= ~E1000_RXCSUM_TUOFL; 4764 } 4765 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); 4766 4767 /* Setup the Receive Control Register */ 4768 rctl &= ~(3 << E1000_RCTL_MO_SHIFT); 4769 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO | 4770 E1000_RCTL_RDMTS_HALF | 4771 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); 4772 /* Strip CRC bytes. */ 4773 rctl |= E1000_RCTL_SECRC; 4774 /* Make sure VLAN Filters are off */ 4775 rctl &= ~E1000_RCTL_VFE; 4776 /* Don't store bad packets */ 4777 rctl &= ~E1000_RCTL_SBP; 4778 4779 /* Enable Receives */ 4780 E1000_WRITE_REG(hw, E1000_RCTL, rctl); 4781 4782 /* 4783 * Setup the HW Rx Head and Tail Descriptor Pointers 4784 * - needs to be after enable 4785 */ 4786 for (int i = 0; i < adapter->num_queues; i++) { 4787 rxr = &adapter->rx_rings[i]; 4788 E1000_WRITE_REG(hw, E1000_RDH(i), rxr->next_to_check); 4789#ifdef DEV_NETMAP 4790 /* 4791 * an init() while a netmap client is active must 4792 * preserve the rx buffers passed to userspace. 4793 * In this driver it means we adjust RDT to 4794 * something different from next_to_refresh 4795 * (which is not used in netmap mode). 4796 */ 4797 if (ifp->if_capenable & IFCAP_NETMAP) { 4798 struct netmap_adapter *na = NA(adapter->ifp); 4799 struct netmap_kring *kring = &na->rx_rings[i]; 4800 int t = rxr->next_to_refresh - nm_kr_rxspace(kring); 4801 4802 if (t >= adapter->num_rx_desc) 4803 t -= adapter->num_rx_desc; 4804 else if (t < 0) 4805 t += adapter->num_rx_desc; 4806 E1000_WRITE_REG(hw, E1000_RDT(i), t); 4807 } else 4808#endif /* DEV_NETMAP */ 4809 E1000_WRITE_REG(hw, E1000_RDT(i), rxr->next_to_refresh); 4810 } 4811 return; 4812} 4813 4814/********************************************************************* 4815 * 4816 * Free receive rings. 4817 * 4818 **********************************************************************/ 4819static void 4820igb_free_receive_structures(struct adapter *adapter) 4821{ 4822 struct rx_ring *rxr = adapter->rx_rings; 4823 4824 for (int i = 0; i < adapter->num_queues; i++, rxr++) { 4825 struct lro_ctrl *lro = &rxr->lro; 4826 igb_free_receive_buffers(rxr); 4827 tcp_lro_free(lro); 4828 igb_dma_free(adapter, &rxr->rxdma); 4829 } 4830 4831 free(adapter->rx_rings, M_DEVBUF); 4832} 4833 4834/********************************************************************* 4835 * 4836 * Free receive ring data structures. 4837 * 4838 **********************************************************************/ 4839static void 4840igb_free_receive_buffers(struct rx_ring *rxr) 4841{ 4842 struct adapter *adapter = rxr->adapter; 4843 struct igb_rx_buf *rxbuf; 4844 int i; 4845 4846 INIT_DEBUGOUT("free_receive_structures: begin"); 4847 4848 /* Cleanup any existing buffers */ 4849 if (rxr->rx_buffers != NULL) { 4850 for (i = 0; i < adapter->num_rx_desc; i++) { 4851 rxbuf = &rxr->rx_buffers[i]; 4852 if (rxbuf->m_head != NULL) { 4853 bus_dmamap_sync(rxr->htag, rxbuf->hmap, 4854 BUS_DMASYNC_POSTREAD); 4855 bus_dmamap_unload(rxr->htag, rxbuf->hmap); 4856 rxbuf->m_head->m_flags |= M_PKTHDR; 4857 m_freem(rxbuf->m_head); 4858 } 4859 if (rxbuf->m_pack != NULL) { 4860 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, 4861 BUS_DMASYNC_POSTREAD); 4862 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 4863 rxbuf->m_pack->m_flags |= M_PKTHDR; 4864 m_freem(rxbuf->m_pack); 4865 } 4866 rxbuf->m_head = NULL; 4867 rxbuf->m_pack = NULL; 4868 if (rxbuf->hmap != NULL) { 4869 bus_dmamap_destroy(rxr->htag, rxbuf->hmap); 4870 rxbuf->hmap = NULL; 4871 } 4872 if (rxbuf->pmap != NULL) { 4873 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap); 4874 rxbuf->pmap = NULL; 4875 } 4876 } 4877 if (rxr->rx_buffers != NULL) { 4878 free(rxr->rx_buffers, M_DEVBUF); 4879 rxr->rx_buffers = NULL; 4880 } 4881 } 4882 4883 if (rxr->htag != NULL) { 4884 bus_dma_tag_destroy(rxr->htag); 4885 rxr->htag = NULL; 4886 } 4887 if (rxr->ptag != NULL) { 4888 bus_dma_tag_destroy(rxr->ptag); 4889 rxr->ptag = NULL; 4890 } 4891} 4892 4893static __inline void 4894igb_rx_discard(struct rx_ring *rxr, int i) 4895{ 4896 struct igb_rx_buf *rbuf; 4897 4898 rbuf = &rxr->rx_buffers[i]; 4899 4900 /* Partially received? Free the chain */ 4901 if (rxr->fmp != NULL) { 4902 rxr->fmp->m_flags |= M_PKTHDR; 4903 m_freem(rxr->fmp); 4904 rxr->fmp = NULL; 4905 rxr->lmp = NULL; 4906 } 4907 4908 /* 4909 ** With advanced descriptors the writeback 4910 ** clobbers the buffer addrs, so its easier 4911 ** to just free the existing mbufs and take 4912 ** the normal refresh path to get new buffers 4913 ** and mapping. 4914 */ 4915 if (rbuf->m_head) { 4916 m_free(rbuf->m_head); 4917 rbuf->m_head = NULL; 4918 bus_dmamap_unload(rxr->htag, rbuf->hmap); 4919 } 4920 4921 if (rbuf->m_pack) { 4922 m_free(rbuf->m_pack); 4923 rbuf->m_pack = NULL; 4924 bus_dmamap_unload(rxr->ptag, rbuf->pmap); 4925 } 4926 4927 return; 4928} 4929 4930static __inline void 4931igb_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype) 4932{ 4933 4934 /* 4935 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet 4936 * should be computed by hardware. Also it should not have VLAN tag in 4937 * ethernet header. 4938 */ 4939 if (rxr->lro_enabled && 4940 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 4941 (ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && 4942 (ptype & (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP)) == 4943 (E1000_RXDADV_PKTTYPE_IPV4 | E1000_RXDADV_PKTTYPE_TCP) && 4944 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) == 4945 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) { 4946 /* 4947 * Send to the stack if: 4948 ** - LRO not enabled, or 4949 ** - no LRO resources, or 4950 ** - lro enqueue fails 4951 */ 4952 if (rxr->lro.lro_cnt != 0) 4953 if (tcp_lro_rx(&rxr->lro, m, 0) == 0) 4954 return; 4955 } 4956 IGB_RX_UNLOCK(rxr); 4957 (*ifp->if_input)(ifp, m); 4958 IGB_RX_LOCK(rxr); 4959} 4960 4961/********************************************************************* 4962 * 4963 * This routine executes in interrupt context. It replenishes 4964 * the mbufs in the descriptor and sends data which has been 4965 * dma'ed into host memory to upper layer. 4966 * 4967 * We loop at most count times if count is > 0, or until done if 4968 * count < 0. 4969 * 4970 * Return TRUE if more to clean, FALSE otherwise 4971 *********************************************************************/ 4972static bool 4973igb_rxeof(struct igb_queue *que, int count, int *done) 4974{ 4975 struct adapter *adapter = que->adapter; 4976 struct rx_ring *rxr = que->rxr; 4977 struct ifnet *ifp = adapter->ifp; 4978 struct lro_ctrl *lro = &rxr->lro; 4979 int i, processed = 0, rxdone = 0; 4980 u32 ptype, staterr = 0; 4981 union e1000_adv_rx_desc *cur; 4982 4983 IGB_RX_LOCK(rxr); 4984 /* Sync the ring. */ 4985 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 4986 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4987 4988#ifdef DEV_NETMAP 4989 if (netmap_rx_irq(ifp, rxr->me, &processed)) { 4990 IGB_RX_UNLOCK(rxr); 4991 return (FALSE); 4992 } 4993#endif /* DEV_NETMAP */ 4994 4995 /* Main clean loop */ 4996 for (i = rxr->next_to_check; count != 0;) { 4997 struct mbuf *sendmp, *mh, *mp; 4998 struct igb_rx_buf *rxbuf; 4999 u16 hlen, plen, hdr, vtag, pkt_info; 5000 bool eop = FALSE; 5001 5002 cur = &rxr->rx_base[i]; 5003 staterr = le32toh(cur->wb.upper.status_error); 5004 if ((staterr & E1000_RXD_STAT_DD) == 0) 5005 break; 5006 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 5007 break; 5008 count--; 5009 sendmp = mh = mp = NULL; 5010 cur->wb.upper.status_error = 0; 5011 rxbuf = &rxr->rx_buffers[i]; 5012 plen = le16toh(cur->wb.upper.length); 5013 ptype = le32toh(cur->wb.lower.lo_dword.data) & IGB_PKTTYPE_MASK; 5014 if (((adapter->hw.mac.type == e1000_i350) || 5015 (adapter->hw.mac.type == e1000_i354)) && 5016 (staterr & E1000_RXDEXT_STATERR_LB)) 5017 vtag = be16toh(cur->wb.upper.vlan); 5018 else 5019 vtag = le16toh(cur->wb.upper.vlan); 5020 hdr = le16toh(cur->wb.lower.lo_dword.hs_rss.hdr_info); 5021 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info); 5022 eop = ((staterr & E1000_RXD_STAT_EOP) == E1000_RXD_STAT_EOP); 5023 5024 /* 5025 * Free the frame (all segments) if we're at EOP and 5026 * it's an error. 5027 * 5028 * The datasheet states that EOP + status is only valid for 5029 * the final segment in a multi-segment frame. 5030 */ 5031 if (eop && ((staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) != 0)) { 5032 adapter->dropped_pkts++; 5033 ++rxr->rx_discarded; 5034 igb_rx_discard(rxr, i); 5035 goto next_desc; 5036 } 5037 5038 /* 5039 ** The way the hardware is configured to 5040 ** split, it will ONLY use the header buffer 5041 ** when header split is enabled, otherwise we 5042 ** get normal behavior, ie, both header and 5043 ** payload are DMA'd into the payload buffer. 5044 ** 5045 ** The fmp test is to catch the case where a 5046 ** packet spans multiple descriptors, in that 5047 ** case only the first header is valid. 5048 */ 5049 if (rxr->hdr_split && rxr->fmp == NULL) { 5050 bus_dmamap_unload(rxr->htag, rxbuf->hmap); 5051 hlen = (hdr & E1000_RXDADV_HDRBUFLEN_MASK) >> 5052 E1000_RXDADV_HDRBUFLEN_SHIFT; 5053 if (hlen > IGB_HDR_BUF) 5054 hlen = IGB_HDR_BUF; 5055 mh = rxr->rx_buffers[i].m_head; 5056 mh->m_len = hlen; 5057 /* clear buf pointer for refresh */ 5058 rxbuf->m_head = NULL; 5059 /* 5060 ** Get the payload length, this 5061 ** could be zero if its a small 5062 ** packet. 5063 */ 5064 if (plen > 0) { 5065 mp = rxr->rx_buffers[i].m_pack; 5066 mp->m_len = plen; 5067 mh->m_next = mp; 5068 /* clear buf pointer */ 5069 rxbuf->m_pack = NULL; 5070 rxr->rx_split_packets++; 5071 } 5072 } else { 5073 /* 5074 ** Either no header split, or a 5075 ** secondary piece of a fragmented 5076 ** split packet. 5077 */ 5078 mh = rxr->rx_buffers[i].m_pack; 5079 mh->m_len = plen; 5080 /* clear buf info for refresh */ 5081 rxbuf->m_pack = NULL; 5082 } 5083 bus_dmamap_unload(rxr->ptag, rxbuf->pmap); 5084 5085 ++processed; /* So we know when to refresh */ 5086 5087 /* Initial frame - setup */ 5088 if (rxr->fmp == NULL) { 5089 mh->m_pkthdr.len = mh->m_len; 5090 /* Save the head of the chain */ 5091 rxr->fmp = mh; 5092 rxr->lmp = mh; 5093 if (mp != NULL) { 5094 /* Add payload if split */ 5095 mh->m_pkthdr.len += mp->m_len; 5096 rxr->lmp = mh->m_next; 5097 } 5098 } else { 5099 /* Chain mbuf's together */ 5100 rxr->lmp->m_next = mh; 5101 rxr->lmp = rxr->lmp->m_next; 5102 rxr->fmp->m_pkthdr.len += mh->m_len; 5103 } 5104 5105 if (eop) { 5106 rxr->fmp->m_pkthdr.rcvif = ifp; 5107 rxr->rx_packets++; 5108 /* capture data for AIM */ 5109 rxr->packets++; 5110 rxr->bytes += rxr->fmp->m_pkthdr.len; 5111 rxr->rx_bytes += rxr->fmp->m_pkthdr.len; 5112 5113 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) 5114 igb_rx_checksum(staterr, rxr->fmp, ptype); 5115 5116 if ((ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 && 5117 (staterr & E1000_RXD_STAT_VP) != 0) { 5118 rxr->fmp->m_pkthdr.ether_vtag = vtag; 5119 rxr->fmp->m_flags |= M_VLANTAG; 5120 } 5121 5122 /* 5123 * In case of multiqueue, we have RXCSUM.PCSD bit set 5124 * and never cleared. This means we have RSS hash 5125 * available to be used. 5126 */ 5127 if (adapter->num_queues > 1) { 5128 rxr->fmp->m_pkthdr.flowid = 5129 le32toh(cur->wb.lower.hi_dword.rss); 5130 switch (pkt_info & E1000_RXDADV_RSSTYPE_MASK) { 5131 case E1000_RXDADV_RSSTYPE_IPV4_TCP: 5132 M_HASHTYPE_SET(rxr->fmp, 5133 M_HASHTYPE_RSS_TCP_IPV4); 5134 break; 5135 case E1000_RXDADV_RSSTYPE_IPV4: 5136 M_HASHTYPE_SET(rxr->fmp, 5137 M_HASHTYPE_RSS_IPV4); 5138 break; 5139 case E1000_RXDADV_RSSTYPE_IPV6_TCP: 5140 M_HASHTYPE_SET(rxr->fmp, 5141 M_HASHTYPE_RSS_TCP_IPV6); 5142 break; 5143 case E1000_RXDADV_RSSTYPE_IPV6_EX: 5144 M_HASHTYPE_SET(rxr->fmp, 5145 M_HASHTYPE_RSS_IPV6_EX); 5146 break; 5147 case E1000_RXDADV_RSSTYPE_IPV6: 5148 M_HASHTYPE_SET(rxr->fmp, 5149 M_HASHTYPE_RSS_IPV6); 5150 break; 5151 case E1000_RXDADV_RSSTYPE_IPV6_TCP_EX: 5152 M_HASHTYPE_SET(rxr->fmp, 5153 M_HASHTYPE_RSS_TCP_IPV6_EX); 5154 break; 5155 default: 5156 /* XXX fallthrough */ 5157 M_HASHTYPE_SET(rxr->fmp, 5158 M_HASHTYPE_OPAQUE_HASH); 5159 } 5160 } else { 5161#ifndef IGB_LEGACY_TX 5162 rxr->fmp->m_pkthdr.flowid = que->msix; 5163 M_HASHTYPE_SET(rxr->fmp, M_HASHTYPE_OPAQUE); 5164#endif 5165 } 5166 sendmp = rxr->fmp; 5167 /* Make sure to set M_PKTHDR. */ 5168 sendmp->m_flags |= M_PKTHDR; 5169 rxr->fmp = NULL; 5170 rxr->lmp = NULL; 5171 } 5172 5173next_desc: 5174 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map, 5175 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 5176 5177 /* Advance our pointers to the next descriptor. */ 5178 if (++i == adapter->num_rx_desc) 5179 i = 0; 5180 /* 5181 ** Send to the stack or LRO 5182 */ 5183 if (sendmp != NULL) { 5184 rxr->next_to_check = i; 5185 igb_rx_input(rxr, ifp, sendmp, ptype); 5186 i = rxr->next_to_check; 5187 rxdone++; 5188 } 5189 5190 /* Every 8 descriptors we go to refresh mbufs */ 5191 if (processed == 8) { 5192 igb_refresh_mbufs(rxr, i); 5193 processed = 0; 5194 } 5195 } 5196 5197 /* Catch any remainders */ 5198 if (igb_rx_unrefreshed(rxr)) 5199 igb_refresh_mbufs(rxr, i); 5200 5201 rxr->next_to_check = i; 5202 5203 /* 5204 * Flush any outstanding LRO work 5205 */ 5206 tcp_lro_flush_all(lro); 5207 5208 if (done != NULL) 5209 *done += rxdone; 5210 5211 IGB_RX_UNLOCK(rxr); 5212 return ((staterr & E1000_RXD_STAT_DD) ? TRUE : FALSE); 5213} 5214 5215/********************************************************************* 5216 * 5217 * Verify that the hardware indicated that the checksum is valid. 5218 * Inform the stack about the status of checksum so that stack 5219 * doesn't spend time verifying the checksum. 5220 * 5221 *********************************************************************/ 5222static void 5223igb_rx_checksum(u32 staterr, struct mbuf *mp, u32 ptype) 5224{ 5225 u16 status = (u16)staterr; 5226 u8 errors = (u8) (staterr >> 24); 5227 int sctp; 5228 5229 /* Ignore Checksum bit is set */ 5230 if (status & E1000_RXD_STAT_IXSM) { 5231 mp->m_pkthdr.csum_flags = 0; 5232 return; 5233 } 5234 5235 if ((ptype & E1000_RXDADV_PKTTYPE_ETQF) == 0 && 5236 (ptype & E1000_RXDADV_PKTTYPE_SCTP) != 0) 5237 sctp = 1; 5238 else 5239 sctp = 0; 5240 if (status & E1000_RXD_STAT_IPCS) { 5241 /* Did it pass? */ 5242 if (!(errors & E1000_RXD_ERR_IPE)) { 5243 /* IP Checksum Good */ 5244 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED; 5245 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; 5246 } else 5247 mp->m_pkthdr.csum_flags = 0; 5248 } 5249 5250 if (status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { 5251 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); 5252#if __FreeBSD_version >= 800000 5253 if (sctp) /* reassign */ 5254 type = CSUM_SCTP_VALID; 5255#endif 5256 /* Did it pass? */ 5257 if (!(errors & E1000_RXD_ERR_TCPE)) { 5258 mp->m_pkthdr.csum_flags |= type; 5259 if (sctp == 0) 5260 mp->m_pkthdr.csum_data = htons(0xffff); 5261 } 5262 } 5263 return; 5264} 5265 5266/* 5267 * This routine is run via an vlan 5268 * config EVENT 5269 */ 5270static void 5271igb_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) 5272{ 5273 struct adapter *adapter = ifp->if_softc; 5274 u32 index, bit; 5275 5276 if (ifp->if_softc != arg) /* Not our event */ 5277 return; 5278 5279 if ((vtag == 0) || (vtag > 4095)) /* Invalid */ 5280 return; 5281 5282 IGB_CORE_LOCK(adapter); 5283 index = (vtag >> 5) & 0x7F; 5284 bit = vtag & 0x1F; 5285 adapter->shadow_vfta[index] |= (1 << bit); 5286 ++adapter->num_vlans; 5287 /* Change hw filter setting */ 5288 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) 5289 igb_setup_vlan_hw_support(adapter); 5290 IGB_CORE_UNLOCK(adapter); 5291} 5292 5293/* 5294 * This routine is run via an vlan 5295 * unconfig EVENT 5296 */ 5297static void 5298igb_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) 5299{ 5300 struct adapter *adapter = ifp->if_softc; 5301 u32 index, bit; 5302 5303 if (ifp->if_softc != arg) 5304 return; 5305 5306 if ((vtag == 0) || (vtag > 4095)) /* Invalid */ 5307 return; 5308 5309 IGB_CORE_LOCK(adapter); 5310 index = (vtag >> 5) & 0x7F; 5311 bit = vtag & 0x1F; 5312 adapter->shadow_vfta[index] &= ~(1 << bit); 5313 --adapter->num_vlans; 5314 /* Change hw filter setting */ 5315 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) 5316 igb_setup_vlan_hw_support(adapter); 5317 IGB_CORE_UNLOCK(adapter); 5318} 5319 5320static void 5321igb_setup_vlan_hw_support(struct adapter *adapter) 5322{ 5323 struct e1000_hw *hw = &adapter->hw; 5324 struct ifnet *ifp = adapter->ifp; 5325 u32 reg; 5326 5327 if (adapter->vf_ifp) { 5328 e1000_rlpml_set_vf(hw, 5329 adapter->max_frame_size + VLAN_TAG_SIZE); 5330 return; 5331 } 5332 5333 reg = E1000_READ_REG(hw, E1000_CTRL); 5334 reg |= E1000_CTRL_VME; 5335 E1000_WRITE_REG(hw, E1000_CTRL, reg); 5336 5337 /* Enable the Filter Table */ 5338 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { 5339 reg = E1000_READ_REG(hw, E1000_RCTL); 5340 reg &= ~E1000_RCTL_CFIEN; 5341 reg |= E1000_RCTL_VFE; 5342 E1000_WRITE_REG(hw, E1000_RCTL, reg); 5343 } 5344 5345 /* Update the frame size */ 5346 E1000_WRITE_REG(&adapter->hw, E1000_RLPML, 5347 adapter->max_frame_size + VLAN_TAG_SIZE); 5348 5349 /* Don't bother with table if no vlans */ 5350 if ((adapter->num_vlans == 0) || 5351 ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)) 5352 return; 5353 /* 5354 ** A soft reset zero's out the VFTA, so 5355 ** we need to repopulate it now. 5356 */ 5357 for (int i = 0; i < IGB_VFTA_SIZE; i++) 5358 if (adapter->shadow_vfta[i] != 0) { 5359 if (adapter->vf_ifp) 5360 e1000_vfta_set_vf(hw, 5361 adapter->shadow_vfta[i], TRUE); 5362 else 5363 e1000_write_vfta(hw, 5364 i, adapter->shadow_vfta[i]); 5365 } 5366} 5367 5368static void 5369igb_enable_intr(struct adapter *adapter) 5370{ 5371 /* With RSS set up what to auto clear */ 5372 if (adapter->msix_mem) { 5373 u32 mask = (adapter->que_mask | adapter->link_mask); 5374 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, mask); 5375 E1000_WRITE_REG(&adapter->hw, E1000_EIAM, mask); 5376 E1000_WRITE_REG(&adapter->hw, E1000_EIMS, mask); 5377 E1000_WRITE_REG(&adapter->hw, E1000_IMS, 5378 E1000_IMS_LSC); 5379 } else { 5380 E1000_WRITE_REG(&adapter->hw, E1000_IMS, 5381 IMS_ENABLE_MASK); 5382 } 5383 E1000_WRITE_FLUSH(&adapter->hw); 5384 5385 return; 5386} 5387 5388static void 5389igb_disable_intr(struct adapter *adapter) 5390{ 5391 if (adapter->msix_mem) { 5392 E1000_WRITE_REG(&adapter->hw, E1000_EIMC, ~0); 5393 E1000_WRITE_REG(&adapter->hw, E1000_EIAC, 0); 5394 } 5395 E1000_WRITE_REG(&adapter->hw, E1000_IMC, ~0); 5396 E1000_WRITE_FLUSH(&adapter->hw); 5397 return; 5398} 5399 5400/* 5401 * Bit of a misnomer, what this really means is 5402 * to enable OS management of the system... aka 5403 * to disable special hardware management features 5404 */ 5405static void 5406igb_init_manageability(struct adapter *adapter) 5407{ 5408 if (adapter->has_manage) { 5409 int manc2h = E1000_READ_REG(&adapter->hw, E1000_MANC2H); 5410 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); 5411 5412 /* disable hardware interception of ARP */ 5413 manc &= ~(E1000_MANC_ARP_EN); 5414 5415 /* enable receiving management packets to the host */ 5416 manc |= E1000_MANC_EN_MNG2HOST; 5417 manc2h |= 1 << 5; /* Mng Port 623 */ 5418 manc2h |= 1 << 6; /* Mng Port 664 */ 5419 E1000_WRITE_REG(&adapter->hw, E1000_MANC2H, manc2h); 5420 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); 5421 } 5422} 5423 5424/* 5425 * Give control back to hardware management 5426 * controller if there is one. 5427 */ 5428static void 5429igb_release_manageability(struct adapter *adapter) 5430{ 5431 if (adapter->has_manage) { 5432 int manc = E1000_READ_REG(&adapter->hw, E1000_MANC); 5433 5434 /* re-enable hardware interception of ARP */ 5435 manc |= E1000_MANC_ARP_EN; 5436 manc &= ~E1000_MANC_EN_MNG2HOST; 5437 5438 E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc); 5439 } 5440} 5441 5442/* 5443 * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. 5444 * For ASF and Pass Through versions of f/w this means that 5445 * the driver is loaded. 5446 * 5447 */ 5448static void 5449igb_get_hw_control(struct adapter *adapter) 5450{ 5451 u32 ctrl_ext; 5452 5453 if (adapter->vf_ifp) 5454 return; 5455 5456 /* Let firmware know the driver has taken over */ 5457 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5458 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, 5459 ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); 5460} 5461 5462/* 5463 * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. 5464 * For ASF and Pass Through versions of f/w this means that the 5465 * driver is no longer loaded. 5466 * 5467 */ 5468static void 5469igb_release_hw_control(struct adapter *adapter) 5470{ 5471 u32 ctrl_ext; 5472 5473 if (adapter->vf_ifp) 5474 return; 5475 5476 /* Let firmware taken over control of h/w */ 5477 ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT); 5478 E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, 5479 ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); 5480} 5481 5482static int 5483igb_is_valid_ether_addr(uint8_t *addr) 5484{ 5485 char zero_addr[6] = { 0, 0, 0, 0, 0, 0 }; 5486 5487 if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) { 5488 return (FALSE); 5489 } 5490 5491 return (TRUE); 5492} 5493 5494 5495/* 5496 * Enable PCI Wake On Lan capability 5497 */ 5498static void 5499igb_enable_wakeup(device_t dev) 5500{ 5501 u16 cap, status; 5502 u8 id; 5503 5504 /* First find the capabilities pointer*/ 5505 cap = pci_read_config(dev, PCIR_CAP_PTR, 2); 5506 /* Read the PM Capabilities */ 5507 id = pci_read_config(dev, cap, 1); 5508 if (id != PCIY_PMG) /* Something wrong */ 5509 return; 5510 /* OK, we have the power capabilities, so 5511 now get the status register */ 5512 cap += PCIR_POWER_STATUS; 5513 status = pci_read_config(dev, cap, 2); 5514 status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE; 5515 pci_write_config(dev, cap, status, 2); 5516 return; 5517} 5518 5519static void 5520igb_led_func(void *arg, int onoff) 5521{ 5522 struct adapter *adapter = arg; 5523 5524 IGB_CORE_LOCK(adapter); 5525 if (onoff) { 5526 e1000_setup_led(&adapter->hw); 5527 e1000_led_on(&adapter->hw); 5528 } else { 5529 e1000_led_off(&adapter->hw); 5530 e1000_cleanup_led(&adapter->hw); 5531 } 5532 IGB_CORE_UNLOCK(adapter); 5533} 5534 5535static uint64_t 5536igb_get_vf_counter(if_t ifp, ift_counter cnt) 5537{ 5538 struct adapter *adapter; 5539 struct e1000_vf_stats *stats; 5540#ifndef IGB_LEGACY_TX 5541 struct tx_ring *txr; 5542 uint64_t rv; 5543#endif 5544 5545 adapter = if_getsoftc(ifp); 5546 stats = (struct e1000_vf_stats *)adapter->stats; 5547 5548 switch (cnt) { 5549 case IFCOUNTER_IPACKETS: 5550 return (stats->gprc); 5551 case IFCOUNTER_OPACKETS: 5552 return (stats->gptc); 5553 case IFCOUNTER_IBYTES: 5554 return (stats->gorc); 5555 case IFCOUNTER_OBYTES: 5556 return (stats->gotc); 5557 case IFCOUNTER_IMCASTS: 5558 return (stats->mprc); 5559 case IFCOUNTER_IERRORS: 5560 return (adapter->dropped_pkts); 5561 case IFCOUNTER_OERRORS: 5562 return (adapter->watchdog_events); 5563#ifndef IGB_LEGACY_TX 5564 case IFCOUNTER_OQDROPS: 5565 rv = 0; 5566 txr = adapter->tx_rings; 5567 for (int i = 0; i < adapter->num_queues; i++, txr++) 5568 rv += txr->br->br_drops; 5569 return (rv); 5570#endif 5571 default: 5572 return (if_get_counter_default(ifp, cnt)); 5573 } 5574} 5575 5576static uint64_t 5577igb_get_counter(if_t ifp, ift_counter cnt) 5578{ 5579 struct adapter *adapter; 5580 struct e1000_hw_stats *stats; 5581#ifndef IGB_LEGACY_TX 5582 struct tx_ring *txr; 5583 uint64_t rv; 5584#endif 5585 5586 adapter = if_getsoftc(ifp); 5587 if (adapter->vf_ifp) 5588 return (igb_get_vf_counter(ifp, cnt)); 5589 5590 stats = (struct e1000_hw_stats *)adapter->stats; 5591 5592 switch (cnt) { 5593 case IFCOUNTER_IPACKETS: 5594 return (stats->gprc); 5595 case IFCOUNTER_OPACKETS: 5596 return (stats->gptc); 5597 case IFCOUNTER_IBYTES: 5598 return (stats->gorc); 5599 case IFCOUNTER_OBYTES: 5600 return (stats->gotc); 5601 case IFCOUNTER_IMCASTS: 5602 return (stats->mprc); 5603 case IFCOUNTER_OMCASTS: 5604 return (stats->mptc); 5605 case IFCOUNTER_IERRORS: 5606 return (adapter->dropped_pkts + stats->rxerrc + 5607 stats->crcerrs + stats->algnerrc + 5608 stats->ruc + stats->roc + stats->cexterr); 5609 case IFCOUNTER_OERRORS: 5610 return (stats->ecol + stats->latecol + 5611 adapter->watchdog_events); 5612 case IFCOUNTER_COLLISIONS: 5613 return (stats->colc); 5614 case IFCOUNTER_IQDROPS: 5615 return (stats->mpc); 5616#ifndef IGB_LEGACY_TX 5617 case IFCOUNTER_OQDROPS: 5618 rv = 0; 5619 txr = adapter->tx_rings; 5620 for (int i = 0; i < adapter->num_queues; i++, txr++) 5621 rv += txr->br->br_drops; 5622 return (rv); 5623#endif 5624 default: 5625 return (if_get_counter_default(ifp, cnt)); 5626 } 5627} 5628 5629/********************************************************************** 5630 * 5631 * Update the board statistics counters. 5632 * 5633 **********************************************************************/ 5634static void 5635igb_update_stats_counters(struct adapter *adapter) 5636{ 5637 struct e1000_hw *hw = &adapter->hw; 5638 struct e1000_hw_stats *stats; 5639 5640 /* 5641 ** The virtual function adapter has only a 5642 ** small controlled set of stats, do only 5643 ** those and return. 5644 */ 5645 if (adapter->vf_ifp) { 5646 igb_update_vf_stats_counters(adapter); 5647 return; 5648 } 5649 5650 stats = (struct e1000_hw_stats *)adapter->stats; 5651 5652 if (adapter->hw.phy.media_type == e1000_media_type_copper || 5653 (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { 5654 stats->symerrs += 5655 E1000_READ_REG(hw,E1000_SYMERRS); 5656 stats->sec += E1000_READ_REG(hw, E1000_SEC); 5657 } 5658 5659 stats->crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); 5660 stats->mpc += E1000_READ_REG(hw, E1000_MPC); 5661 stats->scc += E1000_READ_REG(hw, E1000_SCC); 5662 stats->ecol += E1000_READ_REG(hw, E1000_ECOL); 5663 5664 stats->mcc += E1000_READ_REG(hw, E1000_MCC); 5665 stats->latecol += E1000_READ_REG(hw, E1000_LATECOL); 5666 stats->colc += E1000_READ_REG(hw, E1000_COLC); 5667 stats->dc += E1000_READ_REG(hw, E1000_DC); 5668 stats->rlec += E1000_READ_REG(hw, E1000_RLEC); 5669 stats->xonrxc += E1000_READ_REG(hw, E1000_XONRXC); 5670 stats->xontxc += E1000_READ_REG(hw, E1000_XONTXC); 5671 /* 5672 ** For watchdog management we need to know if we have been 5673 ** paused during the last interval, so capture that here. 5674 */ 5675 adapter->pause_frames = E1000_READ_REG(&adapter->hw, E1000_XOFFRXC); 5676 stats->xoffrxc += adapter->pause_frames; 5677 stats->xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); 5678 stats->fcruc += E1000_READ_REG(hw, E1000_FCRUC); 5679 stats->prc64 += E1000_READ_REG(hw, E1000_PRC64); 5680 stats->prc127 += E1000_READ_REG(hw, E1000_PRC127); 5681 stats->prc255 += E1000_READ_REG(hw, E1000_PRC255); 5682 stats->prc511 += E1000_READ_REG(hw, E1000_PRC511); 5683 stats->prc1023 += E1000_READ_REG(hw, E1000_PRC1023); 5684 stats->prc1522 += E1000_READ_REG(hw, E1000_PRC1522); 5685 stats->gprc += E1000_READ_REG(hw, E1000_GPRC); 5686 stats->bprc += E1000_READ_REG(hw, E1000_BPRC); 5687 stats->mprc += E1000_READ_REG(hw, E1000_MPRC); 5688 stats->gptc += E1000_READ_REG(hw, E1000_GPTC); 5689 5690 /* For the 64-bit byte counters the low dword must be read first. */ 5691 /* Both registers clear on the read of the high dword */ 5692 5693 stats->gorc += E1000_READ_REG(hw, E1000_GORCL) + 5694 ((u64)E1000_READ_REG(hw, E1000_GORCH) << 32); 5695 stats->gotc += E1000_READ_REG(hw, E1000_GOTCL) + 5696 ((u64)E1000_READ_REG(hw, E1000_GOTCH) << 32); 5697 5698 stats->rnbc += E1000_READ_REG(hw, E1000_RNBC); 5699 stats->ruc += E1000_READ_REG(hw, E1000_RUC); 5700 stats->rfc += E1000_READ_REG(hw, E1000_RFC); 5701 stats->roc += E1000_READ_REG(hw, E1000_ROC); 5702 stats->rjc += E1000_READ_REG(hw, E1000_RJC); 5703 5704 stats->mgprc += E1000_READ_REG(hw, E1000_MGTPRC); 5705 stats->mgpdc += E1000_READ_REG(hw, E1000_MGTPDC); 5706 stats->mgptc += E1000_READ_REG(hw, E1000_MGTPTC); 5707 5708 stats->tor += E1000_READ_REG(hw, E1000_TORL) + 5709 ((u64)E1000_READ_REG(hw, E1000_TORH) << 32); 5710 stats->tot += E1000_READ_REG(hw, E1000_TOTL) + 5711 ((u64)E1000_READ_REG(hw, E1000_TOTH) << 32); 5712 5713 stats->tpr += E1000_READ_REG(hw, E1000_TPR); 5714 stats->tpt += E1000_READ_REG(hw, E1000_TPT); 5715 stats->ptc64 += E1000_READ_REG(hw, E1000_PTC64); 5716 stats->ptc127 += E1000_READ_REG(hw, E1000_PTC127); 5717 stats->ptc255 += E1000_READ_REG(hw, E1000_PTC255); 5718 stats->ptc511 += E1000_READ_REG(hw, E1000_PTC511); 5719 stats->ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); 5720 stats->ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); 5721 stats->mptc += E1000_READ_REG(hw, E1000_MPTC); 5722 stats->bptc += E1000_READ_REG(hw, E1000_BPTC); 5723 5724 /* Interrupt Counts */ 5725 5726 stats->iac += E1000_READ_REG(hw, E1000_IAC); 5727 stats->icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); 5728 stats->icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); 5729 stats->ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); 5730 stats->ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); 5731 stats->ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); 5732 stats->ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); 5733 stats->icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); 5734 stats->icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); 5735 5736 /* Host to Card Statistics */ 5737 5738 stats->cbtmpc += E1000_READ_REG(hw, E1000_CBTMPC); 5739 stats->htdpmc += E1000_READ_REG(hw, E1000_HTDPMC); 5740 stats->cbrdpc += E1000_READ_REG(hw, E1000_CBRDPC); 5741 stats->cbrmpc += E1000_READ_REG(hw, E1000_CBRMPC); 5742 stats->rpthc += E1000_READ_REG(hw, E1000_RPTHC); 5743 stats->hgptc += E1000_READ_REG(hw, E1000_HGPTC); 5744 stats->htcbdpc += E1000_READ_REG(hw, E1000_HTCBDPC); 5745 stats->hgorc += (E1000_READ_REG(hw, E1000_HGORCL) + 5746 ((u64)E1000_READ_REG(hw, E1000_HGORCH) << 32)); 5747 stats->hgotc += (E1000_READ_REG(hw, E1000_HGOTCL) + 5748 ((u64)E1000_READ_REG(hw, E1000_HGOTCH) << 32)); 5749 stats->lenerrs += E1000_READ_REG(hw, E1000_LENERRS); 5750 stats->scvpc += E1000_READ_REG(hw, E1000_SCVPC); 5751 stats->hrmpc += E1000_READ_REG(hw, E1000_HRMPC); 5752 5753 stats->algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); 5754 stats->rxerrc += E1000_READ_REG(hw, E1000_RXERRC); 5755 stats->tncrs += E1000_READ_REG(hw, E1000_TNCRS); 5756 stats->cexterr += E1000_READ_REG(hw, E1000_CEXTERR); 5757 stats->tsctc += E1000_READ_REG(hw, E1000_TSCTC); 5758 stats->tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); 5759 5760 /* Driver specific counters */ 5761 adapter->device_control = E1000_READ_REG(hw, E1000_CTRL); 5762 adapter->rx_control = E1000_READ_REG(hw, E1000_RCTL); 5763 adapter->int_mask = E1000_READ_REG(hw, E1000_IMS); 5764 adapter->eint_mask = E1000_READ_REG(hw, E1000_EIMS); 5765 adapter->packet_buf_alloc_tx = 5766 ((E1000_READ_REG(hw, E1000_PBA) & 0xffff0000) >> 16); 5767 adapter->packet_buf_alloc_rx = 5768 (E1000_READ_REG(hw, E1000_PBA) & 0xffff); 5769} 5770 5771 5772/********************************************************************** 5773 * 5774 * Initialize the VF board statistics counters. 5775 * 5776 **********************************************************************/ 5777static void 5778igb_vf_init_stats(struct adapter *adapter) 5779{ 5780 struct e1000_hw *hw = &adapter->hw; 5781 struct e1000_vf_stats *stats; 5782 5783 stats = (struct e1000_vf_stats *)adapter->stats; 5784 if (stats == NULL) 5785 return; 5786 stats->last_gprc = E1000_READ_REG(hw, E1000_VFGPRC); 5787 stats->last_gorc = E1000_READ_REG(hw, E1000_VFGORC); 5788 stats->last_gptc = E1000_READ_REG(hw, E1000_VFGPTC); 5789 stats->last_gotc = E1000_READ_REG(hw, E1000_VFGOTC); 5790 stats->last_mprc = E1000_READ_REG(hw, E1000_VFMPRC); 5791} 5792 5793/********************************************************************** 5794 * 5795 * Update the VF board statistics counters. 5796 * 5797 **********************************************************************/ 5798static void 5799igb_update_vf_stats_counters(struct adapter *adapter) 5800{ 5801 struct e1000_hw *hw = &adapter->hw; 5802 struct e1000_vf_stats *stats; 5803 5804 if (adapter->link_speed == 0) 5805 return; 5806 5807 stats = (struct e1000_vf_stats *)adapter->stats; 5808 5809 UPDATE_VF_REG(E1000_VFGPRC, 5810 stats->last_gprc, stats->gprc); 5811 UPDATE_VF_REG(E1000_VFGORC, 5812 stats->last_gorc, stats->gorc); 5813 UPDATE_VF_REG(E1000_VFGPTC, 5814 stats->last_gptc, stats->gptc); 5815 UPDATE_VF_REG(E1000_VFGOTC, 5816 stats->last_gotc, stats->gotc); 5817 UPDATE_VF_REG(E1000_VFMPRC, 5818 stats->last_mprc, stats->mprc); 5819} 5820 5821/* Export a single 32-bit register via a read-only sysctl. */ 5822static int 5823igb_sysctl_reg_handler(SYSCTL_HANDLER_ARGS) 5824{ 5825 struct adapter *adapter; 5826 u_int val; 5827 5828 adapter = oidp->oid_arg1; 5829 val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2); 5830 return (sysctl_handle_int(oidp, &val, 0, req)); 5831} 5832 5833/* 5834** Tuneable interrupt rate handler 5835*/ 5836static int 5837igb_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) 5838{ 5839 struct igb_queue *que = ((struct igb_queue *)oidp->oid_arg1); 5840 int error; 5841 u32 reg, usec, rate; 5842 5843 reg = E1000_READ_REG(&que->adapter->hw, E1000_EITR(que->msix)); 5844 usec = ((reg & 0x7FFC) >> 2); 5845 if (usec > 0) 5846 rate = 1000000 / usec; 5847 else 5848 rate = 0; 5849 error = sysctl_handle_int(oidp, &rate, 0, req); 5850 if (error || !req->newptr) 5851 return error; 5852 return 0; 5853} 5854 5855/* 5856 * Add sysctl variables, one per statistic, to the system. 5857 */ 5858static void 5859igb_add_hw_stats(struct adapter *adapter) 5860{ 5861 device_t dev = adapter->dev; 5862 5863 struct tx_ring *txr = adapter->tx_rings; 5864 struct rx_ring *rxr = adapter->rx_rings; 5865 5866 struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); 5867 struct sysctl_oid *tree = device_get_sysctl_tree(dev); 5868 struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); 5869 struct e1000_hw_stats *stats = adapter->stats; 5870 5871 struct sysctl_oid *stat_node, *queue_node, *int_node, *host_node; 5872 struct sysctl_oid_list *stat_list, *queue_list, *int_list, *host_list; 5873 5874#define QUEUE_NAME_LEN 32 5875 char namebuf[QUEUE_NAME_LEN]; 5876 5877 /* Driver Statistics */ 5878 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", 5879 CTLFLAG_RD, &adapter->dropped_pkts, 5880 "Driver dropped packets"); 5881 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", 5882 CTLFLAG_RD, &adapter->link_irq, 5883 "Link MSIX IRQ Handled"); 5884 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail", 5885 CTLFLAG_RD, &adapter->mbuf_defrag_failed, 5886 "Defragmenting mbuf chain failed"); 5887 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail", 5888 CTLFLAG_RD, &adapter->no_tx_dma_setup, 5889 "Driver tx dma failure in xmit"); 5890 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns", 5891 CTLFLAG_RD, &adapter->rx_overruns, 5892 "RX overruns"); 5893 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts", 5894 CTLFLAG_RD, &adapter->watchdog_events, 5895 "Watchdog timeouts"); 5896 5897 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "device_control", 5898 CTLFLAG_RD, &adapter->device_control, 5899 "Device Control Register"); 5900 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_control", 5901 CTLFLAG_RD, &adapter->rx_control, 5902 "Receiver Control Register"); 5903 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "interrupt_mask", 5904 CTLFLAG_RD, &adapter->int_mask, 5905 "Interrupt Mask"); 5906 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "extended_int_mask", 5907 CTLFLAG_RD, &adapter->eint_mask, 5908 "Extended Interrupt Mask"); 5909 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_buf_alloc", 5910 CTLFLAG_RD, &adapter->packet_buf_alloc_tx, 5911 "Transmit Buffer Packet Allocation"); 5912 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_buf_alloc", 5913 CTLFLAG_RD, &adapter->packet_buf_alloc_rx, 5914 "Receive Buffer Packet Allocation"); 5915 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water", 5916 CTLFLAG_RD, &adapter->hw.fc.high_water, 0, 5917 "Flow Control High Watermark"); 5918 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water", 5919 CTLFLAG_RD, &adapter->hw.fc.low_water, 0, 5920 "Flow Control Low Watermark"); 5921 5922 for (int i = 0; i < adapter->num_queues; i++, rxr++, txr++) { 5923 struct lro_ctrl *lro = &rxr->lro; 5924 5925 snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); 5926 queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 5927 CTLFLAG_RD, NULL, "Queue Name"); 5928 queue_list = SYSCTL_CHILDREN(queue_node); 5929 5930 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", 5931 CTLTYPE_UINT | CTLFLAG_RD, &adapter->queues[i], 5932 sizeof(&adapter->queues[i]), 5933 igb_sysctl_interrupt_rate_handler, 5934 "IU", "Interrupt Rate"); 5935 5936 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", 5937 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(txr->me), 5938 igb_sysctl_reg_handler, "IU", 5939 "Transmit Descriptor Head"); 5940 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", 5941 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(txr->me), 5942 igb_sysctl_reg_handler, "IU", 5943 "Transmit Descriptor Tail"); 5944 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", 5945 CTLFLAG_RD, &txr->no_desc_avail, 5946 "Queue Descriptors Unavailable"); 5947 SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", 5948 CTLFLAG_RD, &txr->total_packets, 5949 "Queue Packets Transmitted"); 5950 5951 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", 5952 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(rxr->me), 5953 igb_sysctl_reg_handler, "IU", 5954 "Receive Descriptor Head"); 5955 SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", 5956 CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(rxr->me), 5957 igb_sysctl_reg_handler, "IU", 5958 "Receive Descriptor Tail"); 5959 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_packets", 5960 CTLFLAG_RD, &rxr->rx_packets, 5961 "Queue Packets Received"); 5962 SYSCTL_ADD_QUAD(ctx, queue_list, OID_AUTO, "rx_bytes", 5963 CTLFLAG_RD, &rxr->rx_bytes, 5964 "Queue Bytes Received"); 5965 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", 5966 CTLFLAG_RD, &lro->lro_queued, 0, 5967 "LRO Queued"); 5968 SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", 5969 CTLFLAG_RD, &lro->lro_flushed, 0, 5970 "LRO Flushed"); 5971 } 5972 5973 /* MAC stats get their own sub node */ 5974 5975 stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", 5976 CTLFLAG_RD, NULL, "MAC Statistics"); 5977 stat_list = SYSCTL_CHILDREN(stat_node); 5978 5979 /* 5980 ** VF adapter has a very limited set of stats 5981 ** since its not managing the metal, so to speak. 5982 */ 5983 if (adapter->vf_ifp) { 5984 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", 5985 CTLFLAG_RD, &stats->gprc, 5986 "Good Packets Received"); 5987 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", 5988 CTLFLAG_RD, &stats->gptc, 5989 "Good Packets Transmitted"); 5990 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 5991 CTLFLAG_RD, &stats->gorc, 5992 "Good Octets Received"); 5993 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 5994 CTLFLAG_RD, &stats->gotc, 5995 "Good Octets Transmitted"); 5996 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", 5997 CTLFLAG_RD, &stats->mprc, 5998 "Multicast Packets Received"); 5999 return; 6000 } 6001 6002 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "excess_coll", 6003 CTLFLAG_RD, &stats->ecol, 6004 "Excessive collisions"); 6005 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "single_coll", 6006 CTLFLAG_RD, &stats->scc, 6007 "Single collisions"); 6008 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "multiple_coll", 6009 CTLFLAG_RD, &stats->mcc, 6010 "Multiple collisions"); 6011 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "late_coll", 6012 CTLFLAG_RD, &stats->latecol, 6013 "Late collisions"); 6014 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "collision_count", 6015 CTLFLAG_RD, &stats->colc, 6016 "Collision Count"); 6017 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "symbol_errors", 6018 CTLFLAG_RD, &stats->symerrs, 6019 "Symbol Errors"); 6020 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "sequence_errors", 6021 CTLFLAG_RD, &stats->sec, 6022 "Sequence Errors"); 6023 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "defer_count", 6024 CTLFLAG_RD, &stats->dc, 6025 "Defer Count"); 6026 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "missed_packets", 6027 CTLFLAG_RD, &stats->mpc, 6028 "Missed Packets"); 6029 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_length_errors", 6030 CTLFLAG_RD, &stats->rlec, 6031 "Receive Length Errors"); 6032 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_no_buff", 6033 CTLFLAG_RD, &stats->rnbc, 6034 "Receive No Buffers"); 6035 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_undersize", 6036 CTLFLAG_RD, &stats->ruc, 6037 "Receive Undersize"); 6038 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", 6039 CTLFLAG_RD, &stats->rfc, 6040 "Fragmented Packets Received"); 6041 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_oversize", 6042 CTLFLAG_RD, &stats->roc, 6043 "Oversized Packets Received"); 6044 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_jabber", 6045 CTLFLAG_RD, &stats->rjc, 6046 "Recevied Jabber"); 6047 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "recv_errs", 6048 CTLFLAG_RD, &stats->rxerrc, 6049 "Receive Errors"); 6050 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "crc_errs", 6051 CTLFLAG_RD, &stats->crcerrs, 6052 "CRC errors"); 6053 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "alignment_errs", 6054 CTLFLAG_RD, &stats->algnerrc, 6055 "Alignment Errors"); 6056 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_no_crs", 6057 CTLFLAG_RD, &stats->tncrs, 6058 "Transmit with No CRS"); 6059 /* On 82575 these are collision counts */ 6060 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs", 6061 CTLFLAG_RD, &stats->cexterr, 6062 "Collision/Carrier extension errors"); 6063 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_recvd", 6064 CTLFLAG_RD, &stats->xonrxc, 6065 "XON Received"); 6066 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xon_txd", 6067 CTLFLAG_RD, &stats->xontxc, 6068 "XON Transmitted"); 6069 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", 6070 CTLFLAG_RD, &stats->xoffrxc, 6071 "XOFF Received"); 6072 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "xoff_txd", 6073 CTLFLAG_RD, &stats->xofftxc, 6074 "XOFF Transmitted"); 6075 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "unsupported_fc_recvd", 6076 CTLFLAG_RD, &stats->fcruc, 6077 "Unsupported Flow Control Received"); 6078 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_recvd", 6079 CTLFLAG_RD, &stats->mgprc, 6080 "Management Packets Received"); 6081 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_drop", 6082 CTLFLAG_RD, &stats->mgpdc, 6083 "Management Packets Dropped"); 6084 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mgmt_pkts_txd", 6085 CTLFLAG_RD, &stats->mgptc, 6086 "Management Packets Transmitted"); 6087 /* Packet Reception Stats */ 6088 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd", 6089 CTLFLAG_RD, &stats->tpr, 6090 "Total Packets Received"); 6091 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd", 6092 CTLFLAG_RD, &stats->gprc, 6093 "Good Packets Received"); 6094 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd", 6095 CTLFLAG_RD, &stats->bprc, 6096 "Broadcast Packets Received"); 6097 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd", 6098 CTLFLAG_RD, &stats->mprc, 6099 "Multicast Packets Received"); 6100 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", 6101 CTLFLAG_RD, &stats->prc64, 6102 "64 byte frames received"); 6103 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", 6104 CTLFLAG_RD, &stats->prc127, 6105 "65-127 byte frames received"); 6106 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", 6107 CTLFLAG_RD, &stats->prc255, 6108 "128-255 byte frames received"); 6109 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", 6110 CTLFLAG_RD, &stats->prc511, 6111 "256-511 byte frames received"); 6112 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", 6113 CTLFLAG_RD, &stats->prc1023, 6114 "512-1023 byte frames received"); 6115 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", 6116 CTLFLAG_RD, &stats->prc1522, 6117 "1023-1522 byte frames received"); 6118 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd", 6119 CTLFLAG_RD, &stats->gorc, 6120 "Good Octets Received"); 6121 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_recvd", 6122 CTLFLAG_RD, &stats->tor, 6123 "Total Octets Received"); 6124 6125 /* Packet Transmission Stats */ 6126 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", 6127 CTLFLAG_RD, &stats->gotc, 6128 "Good Octets Transmitted"); 6129 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_octets_txd", 6130 CTLFLAG_RD, &stats->tot, 6131 "Total Octets Transmitted"); 6132 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", 6133 CTLFLAG_RD, &stats->tpt, 6134 "Total Packets Transmitted"); 6135 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", 6136 CTLFLAG_RD, &stats->gptc, 6137 "Good Packets Transmitted"); 6138 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", 6139 CTLFLAG_RD, &stats->bptc, 6140 "Broadcast Packets Transmitted"); 6141 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", 6142 CTLFLAG_RD, &stats->mptc, 6143 "Multicast Packets Transmitted"); 6144 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", 6145 CTLFLAG_RD, &stats->ptc64, 6146 "64 byte frames transmitted"); 6147 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", 6148 CTLFLAG_RD, &stats->ptc127, 6149 "65-127 byte frames transmitted"); 6150 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", 6151 CTLFLAG_RD, &stats->ptc255, 6152 "128-255 byte frames transmitted"); 6153 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", 6154 CTLFLAG_RD, &stats->ptc511, 6155 "256-511 byte frames transmitted"); 6156 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", 6157 CTLFLAG_RD, &stats->ptc1023, 6158 "512-1023 byte frames transmitted"); 6159 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", 6160 CTLFLAG_RD, &stats->ptc1522, 6161 "1024-1522 byte frames transmitted"); 6162 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_txd", 6163 CTLFLAG_RD, &stats->tsctc, 6164 "TSO Contexts Transmitted"); 6165 SYSCTL_ADD_QUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail", 6166 CTLFLAG_RD, &stats->tsctfc, 6167 "TSO Contexts Failed"); 6168 6169 6170 /* Interrupt Stats */ 6171 6172 int_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "interrupts", 6173 CTLFLAG_RD, NULL, "Interrupt Statistics"); 6174 int_list = SYSCTL_CHILDREN(int_node); 6175 6176 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "asserts", 6177 CTLFLAG_RD, &stats->iac, 6178 "Interrupt Assertion Count"); 6179 6180 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_pkt_timer", 6181 CTLFLAG_RD, &stats->icrxptc, 6182 "Interrupt Cause Rx Pkt Timer Expire Count"); 6183 6184 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_abs_timer", 6185 CTLFLAG_RD, &stats->icrxatc, 6186 "Interrupt Cause Rx Abs Timer Expire Count"); 6187 6188 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_pkt_timer", 6189 CTLFLAG_RD, &stats->ictxptc, 6190 "Interrupt Cause Tx Pkt Timer Expire Count"); 6191 6192 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_abs_timer", 6193 CTLFLAG_RD, &stats->ictxatc, 6194 "Interrupt Cause Tx Abs Timer Expire Count"); 6195 6196 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_empty", 6197 CTLFLAG_RD, &stats->ictxqec, 6198 "Interrupt Cause Tx Queue Empty Count"); 6199 6200 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "tx_queue_min_thresh", 6201 CTLFLAG_RD, &stats->ictxqmtc, 6202 "Interrupt Cause Tx Queue Min Thresh Count"); 6203 6204 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_desc_min_thresh", 6205 CTLFLAG_RD, &stats->icrxdmtc, 6206 "Interrupt Cause Rx Desc Min Thresh Count"); 6207 6208 SYSCTL_ADD_QUAD(ctx, int_list, OID_AUTO, "rx_overrun", 6209 CTLFLAG_RD, &stats->icrxoc, 6210 "Interrupt Cause Receiver Overrun Count"); 6211 6212 /* Host to Card Stats */ 6213 6214 host_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "host", 6215 CTLFLAG_RD, NULL, 6216 "Host to Card Statistics"); 6217 6218 host_list = SYSCTL_CHILDREN(host_node); 6219 6220 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt", 6221 CTLFLAG_RD, &stats->cbtmpc, 6222 "Circuit Breaker Tx Packet Count"); 6223 6224 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "host_tx_pkt_discard", 6225 CTLFLAG_RD, &stats->htdpmc, 6226 "Host Transmit Discarded Packets"); 6227 6228 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_pkt", 6229 CTLFLAG_RD, &stats->rpthc, 6230 "Rx Packets To Host"); 6231 6232 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkts", 6233 CTLFLAG_RD, &stats->cbrmpc, 6234 "Circuit Breaker Rx Packet Count"); 6235 6236 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_rx_pkt_drop", 6237 CTLFLAG_RD, &stats->cbrdpc, 6238 "Circuit Breaker Rx Dropped Count"); 6239 6240 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_pkt", 6241 CTLFLAG_RD, &stats->hgptc, 6242 "Host Good Packets Tx Count"); 6243 6244 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "breaker_tx_pkt_drop", 6245 CTLFLAG_RD, &stats->htcbdpc, 6246 "Host Tx Circuit Breaker Dropped Count"); 6247 6248 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "rx_good_bytes", 6249 CTLFLAG_RD, &stats->hgorc, 6250 "Host Good Octets Received Count"); 6251 6252 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "tx_good_bytes", 6253 CTLFLAG_RD, &stats->hgotc, 6254 "Host Good Octets Transmit Count"); 6255 6256 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "length_errors", 6257 CTLFLAG_RD, &stats->lenerrs, 6258 "Length Errors"); 6259 6260 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "serdes_violation_pkt", 6261 CTLFLAG_RD, &stats->scvpc, 6262 "SerDes/SGMII Code Violation Pkt Count"); 6263 6264 SYSCTL_ADD_QUAD(ctx, host_list, OID_AUTO, "header_redir_missed", 6265 CTLFLAG_RD, &stats->hrmpc, 6266 "Header Redirection Missed Packet Count"); 6267} 6268 6269 6270/********************************************************************** 6271 * 6272 * This routine provides a way to dump out the adapter eeprom, 6273 * often a useful debug/service tool. This only dumps the first 6274 * 32 words, stuff that matters is in that extent. 6275 * 6276 **********************************************************************/ 6277static int 6278igb_sysctl_nvm_info(SYSCTL_HANDLER_ARGS) 6279{ 6280 struct adapter *adapter; 6281 int error; 6282 int result; 6283 6284 result = -1; 6285 error = sysctl_handle_int(oidp, &result, 0, req); 6286 6287 if (error || !req->newptr) 6288 return (error); 6289 6290 /* 6291 * This value will cause a hex dump of the 6292 * first 32 16-bit words of the EEPROM to 6293 * the screen. 6294 */ 6295 if (result == 1) { 6296 adapter = (struct adapter *)arg1; 6297 igb_print_nvm_info(adapter); 6298 } 6299 6300 return (error); 6301} 6302 6303static void 6304igb_print_nvm_info(struct adapter *adapter) 6305{ 6306 u16 eeprom_data; 6307 int i, j, row = 0; 6308 6309 /* Its a bit crude, but it gets the job done */ 6310 printf("\nInterface EEPROM Dump:\n"); 6311 printf("Offset\n0x0000 "); 6312 for (i = 0, j = 0; i < 32; i++, j++) { 6313 if (j == 8) { /* Make the offset block */ 6314 j = 0; ++row; 6315 printf("\n0x00%x0 ",row); 6316 } 6317 e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data); 6318 printf("%04x ", eeprom_data); 6319 } 6320 printf("\n"); 6321} 6322 6323static void 6324igb_set_sysctl_value(struct adapter *adapter, const char *name, 6325 const char *description, int *limit, int value) 6326{ 6327 *limit = value; 6328 SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), 6329 SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), 6330 OID_AUTO, name, CTLFLAG_RW, limit, value, description); 6331} 6332 6333/* 6334** Set flow control using sysctl: 6335** Flow control values: 6336** 0 - off 6337** 1 - rx pause 6338** 2 - tx pause 6339** 3 - full 6340*/ 6341static int 6342igb_set_flowcntl(SYSCTL_HANDLER_ARGS) 6343{ 6344 int error; 6345 static int input = 3; /* default is full */ 6346 struct adapter *adapter = (struct adapter *) arg1; 6347 6348 error = sysctl_handle_int(oidp, &input, 0, req); 6349 6350 if ((error) || (req->newptr == NULL)) 6351 return (error); 6352 6353 switch (input) { 6354 case e1000_fc_rx_pause: 6355 case e1000_fc_tx_pause: 6356 case e1000_fc_full: 6357 case e1000_fc_none: 6358 adapter->hw.fc.requested_mode = input; 6359 adapter->fc = input; 6360 break; 6361 default: 6362 /* Do nothing */ 6363 return (error); 6364 } 6365 6366 adapter->hw.fc.current_mode = adapter->hw.fc.requested_mode; 6367 e1000_force_mac_fc(&adapter->hw); 6368 /* XXX TODO: update DROP_EN on each RX queue if appropriate */ 6369 return (error); 6370} 6371 6372/* 6373** Manage DMA Coalesce: 6374** Control values: 6375** 0/1 - off/on 6376** Legal timer values are: 6377** 250,500,1000-10000 in thousands 6378*/ 6379static int 6380igb_sysctl_dmac(SYSCTL_HANDLER_ARGS) 6381{ 6382 struct adapter *adapter = (struct adapter *) arg1; 6383 int error; 6384 6385 error = sysctl_handle_int(oidp, &adapter->dmac, 0, req); 6386 6387 if ((error) || (req->newptr == NULL)) 6388 return (error); 6389 6390 switch (adapter->dmac) { 6391 case 0: 6392 /* Disabling */ 6393 break; 6394 case 1: /* Just enable and use default */ 6395 adapter->dmac = 1000; 6396 break; 6397 case 250: 6398 case 500: 6399 case 1000: 6400 case 2000: 6401 case 3000: 6402 case 4000: 6403 case 5000: 6404 case 6000: 6405 case 7000: 6406 case 8000: 6407 case 9000: 6408 case 10000: 6409 /* Legal values - allow */ 6410 break; 6411 default: 6412 /* Do nothing, illegal value */ 6413 adapter->dmac = 0; 6414 return (EINVAL); 6415 } 6416 /* Reinit the interface */ 6417 igb_init(adapter); 6418 return (error); 6419} 6420 6421/* 6422** Manage Energy Efficient Ethernet: 6423** Control values: 6424** 0/1 - enabled/disabled 6425*/ 6426static int 6427igb_sysctl_eee(SYSCTL_HANDLER_ARGS) 6428{ 6429 struct adapter *adapter = (struct adapter *) arg1; 6430 int error, value; 6431 6432 value = adapter->hw.dev_spec._82575.eee_disable; 6433 error = sysctl_handle_int(oidp, &value, 0, req); 6434 if (error || req->newptr == NULL) 6435 return (error); 6436 IGB_CORE_LOCK(adapter); 6437 adapter->hw.dev_spec._82575.eee_disable = (value != 0); 6438 igb_init_locked(adapter); 6439 IGB_CORE_UNLOCK(adapter); 6440 return (0); 6441} 6442