1/**************************************************************************** 2 * Driver for Solarflare Solarstorm network controllers and boards 3 * Copyright 2005-2006 Fen Systems Ltd. 4 * Copyright 2005-2009 Solarflare Communications Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11#include <linux/module.h> 12#include <linux/pci.h> 13#include <linux/netdevice.h> 14#include <linux/etherdevice.h> 15#include <linux/delay.h> 16#include <linux/notifier.h> 17#include <linux/ip.h> 18#include <linux/tcp.h> 19#include <linux/in.h> 20#include <linux/crc32.h> 21#include <linux/ethtool.h> 22#include <linux/topology.h> 23#include <linux/gfp.h> 24#include "net_driver.h" 25#include "efx.h" 26#include "mdio_10g.h" 27#include "nic.h" 28 29#include "mcdi.h" 30#include "workarounds.h" 31 32/************************************************************************** 33 * 34 * Type name strings 35 * 36 ************************************************************************** 37 */ 38 39/* Loopback mode names (see LOOPBACK_MODE()) */ 40const unsigned int efx_loopback_mode_max = LOOPBACK_MAX; 41const char *efx_loopback_mode_names[] = { 42 [LOOPBACK_NONE] = "NONE", 43 [LOOPBACK_DATA] = "DATAPATH", 44 [LOOPBACK_GMAC] = "GMAC", 45 [LOOPBACK_XGMII] = "XGMII", 46 [LOOPBACK_XGXS] = "XGXS", 47 [LOOPBACK_XAUI] = "XAUI", 48 [LOOPBACK_GMII] = "GMII", 49 [LOOPBACK_SGMII] = "SGMII", 50 [LOOPBACK_XGBR] = "XGBR", 51 [LOOPBACK_XFI] = "XFI", 52 [LOOPBACK_XAUI_FAR] = "XAUI_FAR", 53 [LOOPBACK_GMII_FAR] = "GMII_FAR", 54 [LOOPBACK_SGMII_FAR] = "SGMII_FAR", 55 [LOOPBACK_XFI_FAR] = "XFI_FAR", 56 [LOOPBACK_GPHY] = "GPHY", 57 [LOOPBACK_PHYXS] = "PHYXS", 58 [LOOPBACK_PCS] = "PCS", 59 [LOOPBACK_PMAPMD] = "PMA/PMD", 60 [LOOPBACK_XPORT] = "XPORT", 61 [LOOPBACK_XGMII_WS] = "XGMII_WS", 62 [LOOPBACK_XAUI_WS] = "XAUI_WS", 63 [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR", 64 [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR", 65 [LOOPBACK_GMII_WS] = "GMII_WS", 66 [LOOPBACK_XFI_WS] = "XFI_WS", 67 [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR", 68 [LOOPBACK_PHYXS_WS] = "PHYXS_WS", 69}; 70 71/* Interrupt mode names (see INT_MODE())) */ 72const unsigned int efx_interrupt_mode_max = EFX_INT_MODE_MAX; 73const char *efx_interrupt_mode_names[] = { 74 [EFX_INT_MODE_MSIX] = "MSI-X", 75 [EFX_INT_MODE_MSI] = "MSI", 76 [EFX_INT_MODE_LEGACY] = "legacy", 77}; 78 79const unsigned int efx_reset_type_max = RESET_TYPE_MAX; 80const char *efx_reset_type_names[] = { 81 [RESET_TYPE_INVISIBLE] = "INVISIBLE", 82 [RESET_TYPE_ALL] = "ALL", 83 [RESET_TYPE_WORLD] = "WORLD", 84 [RESET_TYPE_DISABLE] = "DISABLE", 85 [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG", 86 [RESET_TYPE_INT_ERROR] = "INT_ERROR", 87 [RESET_TYPE_RX_RECOVERY] = "RX_RECOVERY", 88 [RESET_TYPE_RX_DESC_FETCH] = "RX_DESC_FETCH", 89 [RESET_TYPE_TX_DESC_FETCH] = "TX_DESC_FETCH", 90 [RESET_TYPE_TX_SKIP] = "TX_SKIP", 91 [RESET_TYPE_MC_FAILURE] = "MC_FAILURE", 92}; 93 94#define EFX_MAX_MTU (9 * 1024) 95 96/* Reset workqueue. If any NIC has a hardware failure then a reset will be 97 * queued onto this work queue. This is not a per-nic work queue, because 98 * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised. 99 */ 100static struct workqueue_struct *reset_workqueue; 101 102/************************************************************************** 103 * 104 * Configurable values 105 * 106 *************************************************************************/ 107 108/* 109 * Use separate channels for TX and RX events 110 * 111 * Set this to 1 to use separate channels for TX and RX. It allows us 112 * to control interrupt affinity separately for TX and RX. 113 * 114 * This is only used in MSI-X interrupt mode 115 */ 116static unsigned int separate_tx_channels; 117module_param(separate_tx_channels, uint, 0644); 118MODULE_PARM_DESC(separate_tx_channels, 119 "Use separate channels for TX and RX"); 120 121/* This is the weight assigned to each of the (per-channel) virtual 122 * NAPI devices. 123 */ 124static int napi_weight = 64; 125 126/* This is the time (in jiffies) between invocations of the hardware 127 * monitor, which checks for known hardware bugs and resets the 128 * hardware and driver as necessary. 129 */ 130unsigned int efx_monitor_interval = 1 * HZ; 131 132/* This controls whether or not the driver will initialise devices 133 * with invalid MAC addresses stored in the EEPROM or flash. If true, 134 * such devices will be initialised with a random locally-generated 135 * MAC address. This allows for loading the sfc_mtd driver to 136 * reprogram the flash, even if the flash contents (including the MAC 137 * address) have previously been erased. 138 */ 139static unsigned int allow_bad_hwaddr; 140 141/* Initial interrupt moderation settings. They can be modified after 142 * module load with ethtool. 143 * 144 * The default for RX should strike a balance between increasing the 145 * round-trip latency and reducing overhead. 146 */ 147static unsigned int rx_irq_mod_usec = 60; 148 149/* Initial interrupt moderation settings. They can be modified after 150 * module load with ethtool. 151 * 152 * This default is chosen to ensure that a 10G link does not go idle 153 * while a TX queue is stopped after it has become full. A queue is 154 * restarted when it drops below half full. The time this takes (assuming 155 * worst case 3 descriptors per packet and 1024 descriptors) is 156 * 512 / 3 * 1.2 = 205 usec. 157 */ 158static unsigned int tx_irq_mod_usec = 150; 159 160/* This is the first interrupt mode to try out of: 161 * 0 => MSI-X 162 * 1 => MSI 163 * 2 => legacy 164 */ 165static unsigned int interrupt_mode; 166 167/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), 168 * i.e. the number of CPUs among which we may distribute simultaneous 169 * interrupt handling. 170 * 171 * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. 172 * The default (0) means to assign an interrupt to each package (level II cache) 173 */ 174static unsigned int rss_cpus; 175module_param(rss_cpus, uint, 0444); 176MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling"); 177 178static int phy_flash_cfg; 179module_param(phy_flash_cfg, int, 0644); 180MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially"); 181 182static unsigned irq_adapt_low_thresh = 10000; 183module_param(irq_adapt_low_thresh, uint, 0644); 184MODULE_PARM_DESC(irq_adapt_low_thresh, 185 "Threshold score for reducing IRQ moderation"); 186 187static unsigned irq_adapt_high_thresh = 20000; 188module_param(irq_adapt_high_thresh, uint, 0644); 189MODULE_PARM_DESC(irq_adapt_high_thresh, 190 "Threshold score for increasing IRQ moderation"); 191 192static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE | 193 NETIF_MSG_LINK | NETIF_MSG_IFDOWN | 194 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR | 195 NETIF_MSG_TX_ERR | NETIF_MSG_HW); 196module_param(debug, uint, 0); 197MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value"); 198 199/************************************************************************** 200 * 201 * Utility functions and prototypes 202 * 203 *************************************************************************/ 204static void efx_remove_channel(struct efx_channel *channel); 205static void efx_remove_port(struct efx_nic *efx); 206static void efx_fini_napi(struct efx_nic *efx); 207static void efx_fini_channels(struct efx_nic *efx); 208 209#define EFX_ASSERT_RESET_SERIALISED(efx) \ 210 do { \ 211 if ((efx->state == STATE_RUNNING) || \ 212 (efx->state == STATE_DISABLED)) \ 213 ASSERT_RTNL(); \ 214 } while (0) 215 216/************************************************************************** 217 * 218 * Event queue processing 219 * 220 *************************************************************************/ 221 222/* Process channel's event queue 223 * 224 * This function is responsible for processing the event queue of a 225 * single channel. The caller must guarantee that this function will 226 * never be concurrently called more than once on the same channel, 227 * though different channels may be being processed concurrently. 228 */ 229static int efx_process_channel(struct efx_channel *channel, int budget) 230{ 231 struct efx_nic *efx = channel->efx; 232 int spent; 233 234 if (unlikely(efx->reset_pending != RESET_TYPE_NONE || 235 !channel->enabled)) 236 return 0; 237 238 spent = efx_nic_process_eventq(channel, budget); 239 if (spent == 0) 240 return 0; 241 242 /* Deliver last RX packet. */ 243 if (channel->rx_pkt) { 244 __efx_rx_packet(channel, channel->rx_pkt, 245 channel->rx_pkt_csummed); 246 channel->rx_pkt = NULL; 247 } 248 249 efx_rx_strategy(channel); 250 251 efx_fast_push_rx_descriptors(&efx->rx_queue[channel->channel]); 252 253 return spent; 254} 255 256/* Mark channel as finished processing 257 * 258 * Note that since we will not receive further interrupts for this 259 * channel before we finish processing and call the eventq_read_ack() 260 * method, there is no need to use the interrupt hold-off timers. 261 */ 262static inline void efx_channel_processed(struct efx_channel *channel) 263{ 264 /* The interrupt handler for this channel may set work_pending 265 * as soon as we acknowledge the events we've seen. Make sure 266 * it's cleared before then. */ 267 channel->work_pending = false; 268 smp_wmb(); 269 270 efx_nic_eventq_read_ack(channel); 271} 272 273/* NAPI poll handler 274 * 275 * NAPI guarantees serialisation of polls of the same device, which 276 * provides the guarantee required by efx_process_channel(). 277 */ 278static int efx_poll(struct napi_struct *napi, int budget) 279{ 280 struct efx_channel *channel = 281 container_of(napi, struct efx_channel, napi_str); 282 struct efx_nic *efx = channel->efx; 283 int spent; 284 285 netif_vdbg(efx, intr, efx->net_dev, 286 "channel %d NAPI poll executing on CPU %d\n", 287 channel->channel, raw_smp_processor_id()); 288 289 spent = efx_process_channel(channel, budget); 290 291 if (spent < budget) { 292 if (channel->channel < efx->n_rx_channels && 293 efx->irq_rx_adaptive && 294 unlikely(++channel->irq_count == 1000)) { 295 if (unlikely(channel->irq_mod_score < 296 irq_adapt_low_thresh)) { 297 if (channel->irq_moderation > 1) { 298 channel->irq_moderation -= 1; 299 efx->type->push_irq_moderation(channel); 300 } 301 } else if (unlikely(channel->irq_mod_score > 302 irq_adapt_high_thresh)) { 303 if (channel->irq_moderation < 304 efx->irq_rx_moderation) { 305 channel->irq_moderation += 1; 306 efx->type->push_irq_moderation(channel); 307 } 308 } 309 channel->irq_count = 0; 310 channel->irq_mod_score = 0; 311 } 312 313 /* There is no race here; although napi_disable() will 314 * only wait for napi_complete(), this isn't a problem 315 * since efx_channel_processed() will have no effect if 316 * interrupts have already been disabled. 317 */ 318 napi_complete(napi); 319 efx_channel_processed(channel); 320 } 321 322 return spent; 323} 324 325/* Process the eventq of the specified channel immediately on this CPU 326 * 327 * Disable hardware generated interrupts, wait for any existing 328 * processing to finish, then directly poll (and ack ) the eventq. 329 * Finally reenable NAPI and interrupts. 330 * 331 * Since we are touching interrupts the caller should hold the suspend lock 332 */ 333void efx_process_channel_now(struct efx_channel *channel) 334{ 335 struct efx_nic *efx = channel->efx; 336 337 BUG_ON(!channel->enabled); 338 339 /* Disable interrupts and wait for ISRs to complete */ 340 efx_nic_disable_interrupts(efx); 341 if (efx->legacy_irq) 342 synchronize_irq(efx->legacy_irq); 343 if (channel->irq) 344 synchronize_irq(channel->irq); 345 346 /* Wait for any NAPI processing to complete */ 347 napi_disable(&channel->napi_str); 348 349 /* Poll the channel */ 350 efx_process_channel(channel, EFX_EVQ_SIZE); 351 352 /* Ack the eventq. This may cause an interrupt to be generated 353 * when they are reenabled */ 354 efx_channel_processed(channel); 355 356 napi_enable(&channel->napi_str); 357 efx_nic_enable_interrupts(efx); 358} 359 360/* Create event queue 361 * Event queue memory allocations are done only once. If the channel 362 * is reset, the memory buffer will be reused; this guards against 363 * errors during channel reset and also simplifies interrupt handling. 364 */ 365static int efx_probe_eventq(struct efx_channel *channel) 366{ 367 netif_dbg(channel->efx, probe, channel->efx->net_dev, 368 "chan %d create event queue\n", channel->channel); 369 370 return efx_nic_probe_eventq(channel); 371} 372 373/* Prepare channel's event queue */ 374static void efx_init_eventq(struct efx_channel *channel) 375{ 376 netif_dbg(channel->efx, drv, channel->efx->net_dev, 377 "chan %d init event queue\n", channel->channel); 378 379 channel->eventq_read_ptr = 0; 380 381 efx_nic_init_eventq(channel); 382} 383 384static void efx_fini_eventq(struct efx_channel *channel) 385{ 386 netif_dbg(channel->efx, drv, channel->efx->net_dev, 387 "chan %d fini event queue\n", channel->channel); 388 389 efx_nic_fini_eventq(channel); 390} 391 392static void efx_remove_eventq(struct efx_channel *channel) 393{ 394 netif_dbg(channel->efx, drv, channel->efx->net_dev, 395 "chan %d remove event queue\n", channel->channel); 396 397 efx_nic_remove_eventq(channel); 398} 399 400/************************************************************************** 401 * 402 * Channel handling 403 * 404 *************************************************************************/ 405 406static int efx_probe_channel(struct efx_channel *channel) 407{ 408 struct efx_tx_queue *tx_queue; 409 struct efx_rx_queue *rx_queue; 410 int rc; 411 412 netif_dbg(channel->efx, probe, channel->efx->net_dev, 413 "creating channel %d\n", channel->channel); 414 415 rc = efx_probe_eventq(channel); 416 if (rc) 417 goto fail1; 418 419 efx_for_each_channel_tx_queue(tx_queue, channel) { 420 rc = efx_probe_tx_queue(tx_queue); 421 if (rc) 422 goto fail2; 423 } 424 425 efx_for_each_channel_rx_queue(rx_queue, channel) { 426 rc = efx_probe_rx_queue(rx_queue); 427 if (rc) 428 goto fail3; 429 } 430 431 channel->n_rx_frm_trunc = 0; 432 433 return 0; 434 435 fail3: 436 efx_for_each_channel_rx_queue(rx_queue, channel) 437 efx_remove_rx_queue(rx_queue); 438 fail2: 439 efx_for_each_channel_tx_queue(tx_queue, channel) 440 efx_remove_tx_queue(tx_queue); 441 fail1: 442 return rc; 443} 444 445 446static void efx_set_channel_names(struct efx_nic *efx) 447{ 448 struct efx_channel *channel; 449 const char *type = ""; 450 int number; 451 452 efx_for_each_channel(channel, efx) { 453 number = channel->channel; 454 if (efx->n_channels > efx->n_rx_channels) { 455 if (channel->channel < efx->n_rx_channels) { 456 type = "-rx"; 457 } else { 458 type = "-tx"; 459 number -= efx->n_rx_channels; 460 } 461 } 462 snprintf(channel->name, sizeof(channel->name), 463 "%s%s-%d", efx->name, type, number); 464 } 465} 466 467/* Channels are shutdown and reinitialised whilst the NIC is running 468 * to propagate configuration changes (mtu, checksum offload), or 469 * to clear hardware error conditions 470 */ 471static void efx_init_channels(struct efx_nic *efx) 472{ 473 struct efx_tx_queue *tx_queue; 474 struct efx_rx_queue *rx_queue; 475 struct efx_channel *channel; 476 477 /* Calculate the rx buffer allocation parameters required to 478 * support the current MTU, including padding for header 479 * alignment and overruns. 480 */ 481 efx->rx_buffer_len = (max(EFX_PAGE_IP_ALIGN, NET_IP_ALIGN) + 482 EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + 483 efx->type->rx_buffer_hash_size + 484 efx->type->rx_buffer_padding); 485 efx->rx_buffer_order = get_order(efx->rx_buffer_len + 486 sizeof(struct efx_rx_page_state)); 487 488 /* Initialise the channels */ 489 efx_for_each_channel(channel, efx) { 490 netif_dbg(channel->efx, drv, channel->efx->net_dev, 491 "init chan %d\n", channel->channel); 492 493 efx_init_eventq(channel); 494 495 efx_for_each_channel_tx_queue(tx_queue, channel) 496 efx_init_tx_queue(tx_queue); 497 498 /* The rx buffer allocation strategy is MTU dependent */ 499 efx_rx_strategy(channel); 500 501 efx_for_each_channel_rx_queue(rx_queue, channel) 502 efx_init_rx_queue(rx_queue); 503 504 WARN_ON(channel->rx_pkt != NULL); 505 efx_rx_strategy(channel); 506 } 507} 508 509/* This enables event queue processing and packet transmission. 510 * 511 * Note that this function is not allowed to fail, since that would 512 * introduce too much complexity into the suspend/resume path. 513 */ 514static void efx_start_channel(struct efx_channel *channel) 515{ 516 struct efx_rx_queue *rx_queue; 517 518 netif_dbg(channel->efx, ifup, channel->efx->net_dev, 519 "starting chan %d\n", channel->channel); 520 521 /* The interrupt handler for this channel may set work_pending 522 * as soon as we enable it. Make sure it's cleared before 523 * then. Similarly, make sure it sees the enabled flag set. */ 524 channel->work_pending = false; 525 channel->enabled = true; 526 smp_wmb(); 527 528 /* Fill the queues before enabling NAPI */ 529 efx_for_each_channel_rx_queue(rx_queue, channel) 530 efx_fast_push_rx_descriptors(rx_queue); 531 532 napi_enable(&channel->napi_str); 533} 534 535/* This disables event queue processing and packet transmission. 536 * This function does not guarantee that all queue processing 537 * (e.g. RX refill) is complete. 538 */ 539static void efx_stop_channel(struct efx_channel *channel) 540{ 541 if (!channel->enabled) 542 return; 543 544 netif_dbg(channel->efx, ifdown, channel->efx->net_dev, 545 "stop chan %d\n", channel->channel); 546 547 channel->enabled = false; 548 napi_disable(&channel->napi_str); 549} 550 551static void efx_fini_channels(struct efx_nic *efx) 552{ 553 struct efx_channel *channel; 554 struct efx_tx_queue *tx_queue; 555 struct efx_rx_queue *rx_queue; 556 int rc; 557 558 EFX_ASSERT_RESET_SERIALISED(efx); 559 BUG_ON(efx->port_enabled); 560 561 rc = efx_nic_flush_queues(efx); 562 if (rc && EFX_WORKAROUND_7803(efx)) { 563 /* Schedule a reset to recover from the flush failure. The 564 * descriptor caches reference memory we're about to free, 565 * but falcon_reconfigure_mac_wrapper() won't reconnect 566 * the MACs because of the pending reset. */ 567 netif_err(efx, drv, efx->net_dev, 568 "Resetting to recover from flush failure\n"); 569 efx_schedule_reset(efx, RESET_TYPE_ALL); 570 } else if (rc) { 571 netif_err(efx, drv, efx->net_dev, "failed to flush queues\n"); 572 } else { 573 netif_dbg(efx, drv, efx->net_dev, 574 "successfully flushed all queues\n"); 575 } 576 577 efx_for_each_channel(channel, efx) { 578 netif_dbg(channel->efx, drv, channel->efx->net_dev, 579 "shut down chan %d\n", channel->channel); 580 581 efx_for_each_channel_rx_queue(rx_queue, channel) 582 efx_fini_rx_queue(rx_queue); 583 efx_for_each_channel_tx_queue(tx_queue, channel) 584 efx_fini_tx_queue(tx_queue); 585 efx_fini_eventq(channel); 586 } 587} 588 589static void efx_remove_channel(struct efx_channel *channel) 590{ 591 struct efx_tx_queue *tx_queue; 592 struct efx_rx_queue *rx_queue; 593 594 netif_dbg(channel->efx, drv, channel->efx->net_dev, 595 "destroy chan %d\n", channel->channel); 596 597 efx_for_each_channel_rx_queue(rx_queue, channel) 598 efx_remove_rx_queue(rx_queue); 599 efx_for_each_channel_tx_queue(tx_queue, channel) 600 efx_remove_tx_queue(tx_queue); 601 efx_remove_eventq(channel); 602} 603 604void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue) 605{ 606 mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100)); 607} 608 609/************************************************************************** 610 * 611 * Port handling 612 * 613 **************************************************************************/ 614 615/* This ensures that the kernel is kept informed (via 616 * netif_carrier_on/off) of the link status, and also maintains the 617 * link status's stop on the port's TX queue. 618 */ 619void efx_link_status_changed(struct efx_nic *efx) 620{ 621 struct efx_link_state *link_state = &efx->link_state; 622 623 /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure 624 * that no events are triggered between unregister_netdev() and the 625 * driver unloading. A more general condition is that NETDEV_CHANGE 626 * can only be generated between NETDEV_UP and NETDEV_DOWN */ 627 if (!netif_running(efx->net_dev)) 628 return; 629 630 if (efx->port_inhibited) { 631 netif_carrier_off(efx->net_dev); 632 return; 633 } 634 635 if (link_state->up != netif_carrier_ok(efx->net_dev)) { 636 efx->n_link_state_changes++; 637 638 if (link_state->up) 639 netif_carrier_on(efx->net_dev); 640 else 641 netif_carrier_off(efx->net_dev); 642 } 643 644 /* Status message for kernel log */ 645 if (link_state->up) { 646 netif_info(efx, link, efx->net_dev, 647 "link up at %uMbps %s-duplex (MTU %d)%s\n", 648 link_state->speed, link_state->fd ? "full" : "half", 649 efx->net_dev->mtu, 650 (efx->promiscuous ? " [PROMISC]" : "")); 651 } else { 652 netif_info(efx, link, efx->net_dev, "link down\n"); 653 } 654 655} 656 657void efx_link_set_advertising(struct efx_nic *efx, u32 advertising) 658{ 659 efx->link_advertising = advertising; 660 if (advertising) { 661 if (advertising & ADVERTISED_Pause) 662 efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX); 663 else 664 efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX); 665 if (advertising & ADVERTISED_Asym_Pause) 666 efx->wanted_fc ^= EFX_FC_TX; 667 } 668} 669 670void efx_link_set_wanted_fc(struct efx_nic *efx, enum efx_fc_type wanted_fc) 671{ 672 efx->wanted_fc = wanted_fc; 673 if (efx->link_advertising) { 674 if (wanted_fc & EFX_FC_RX) 675 efx->link_advertising |= (ADVERTISED_Pause | 676 ADVERTISED_Asym_Pause); 677 else 678 efx->link_advertising &= ~(ADVERTISED_Pause | 679 ADVERTISED_Asym_Pause); 680 if (wanted_fc & EFX_FC_TX) 681 efx->link_advertising ^= ADVERTISED_Asym_Pause; 682 } 683} 684 685static void efx_fini_port(struct efx_nic *efx); 686 687/* Push loopback/power/transmit disable settings to the PHY, and reconfigure 688 * the MAC appropriately. All other PHY configuration changes are pushed 689 * through phy_op->set_settings(), and pushed asynchronously to the MAC 690 * through efx_monitor(). 691 * 692 * Callers must hold the mac_lock 693 */ 694int __efx_reconfigure_port(struct efx_nic *efx) 695{ 696 enum efx_phy_mode phy_mode; 697 int rc; 698 699 WARN_ON(!mutex_is_locked(&efx->mac_lock)); 700 701 /* Serialise the promiscuous flag with efx_set_multicast_list. */ 702 if (efx_dev_registered(efx)) { 703 netif_addr_lock_bh(efx->net_dev); 704 netif_addr_unlock_bh(efx->net_dev); 705 } 706 707 /* Disable PHY transmit in mac level loopbacks */ 708 phy_mode = efx->phy_mode; 709 if (LOOPBACK_INTERNAL(efx)) 710 efx->phy_mode |= PHY_MODE_TX_DISABLED; 711 else 712 efx->phy_mode &= ~PHY_MODE_TX_DISABLED; 713 714 rc = efx->type->reconfigure_port(efx); 715 716 if (rc) 717 efx->phy_mode = phy_mode; 718 719 return rc; 720} 721 722/* Reinitialise the MAC to pick up new PHY settings, even if the port is 723 * disabled. */ 724int efx_reconfigure_port(struct efx_nic *efx) 725{ 726 int rc; 727 728 EFX_ASSERT_RESET_SERIALISED(efx); 729 730 mutex_lock(&efx->mac_lock); 731 rc = __efx_reconfigure_port(efx); 732 mutex_unlock(&efx->mac_lock); 733 734 return rc; 735} 736 737/* Asynchronous work item for changing MAC promiscuity and multicast 738 * hash. Avoid a drain/rx_ingress enable by reconfiguring the current 739 * MAC directly. */ 740static void efx_mac_work(struct work_struct *data) 741{ 742 struct efx_nic *efx = container_of(data, struct efx_nic, mac_work); 743 744 mutex_lock(&efx->mac_lock); 745 if (efx->port_enabled) { 746 efx->type->push_multicast_hash(efx); 747 efx->mac_op->reconfigure(efx); 748 } 749 mutex_unlock(&efx->mac_lock); 750} 751 752static int efx_probe_port(struct efx_nic *efx) 753{ 754 int rc; 755 756 netif_dbg(efx, probe, efx->net_dev, "create port\n"); 757 758 if (phy_flash_cfg) 759 efx->phy_mode = PHY_MODE_SPECIAL; 760 761 /* Connect up MAC/PHY operations table */ 762 rc = efx->type->probe_port(efx); 763 if (rc) 764 goto err; 765 766 /* Sanity check MAC address */ 767 if (is_valid_ether_addr(efx->mac_address)) { 768 memcpy(efx->net_dev->dev_addr, efx->mac_address, ETH_ALEN); 769 } else { 770 netif_err(efx, probe, efx->net_dev, "invalid MAC address %pM\n", 771 efx->mac_address); 772 if (!allow_bad_hwaddr) { 773 rc = -EINVAL; 774 goto err; 775 } 776 random_ether_addr(efx->net_dev->dev_addr); 777 netif_info(efx, probe, efx->net_dev, 778 "using locally-generated MAC %pM\n", 779 efx->net_dev->dev_addr); 780 } 781 782 return 0; 783 784 err: 785 efx_remove_port(efx); 786 return rc; 787} 788 789static int efx_init_port(struct efx_nic *efx) 790{ 791 int rc; 792 793 netif_dbg(efx, drv, efx->net_dev, "init port\n"); 794 795 mutex_lock(&efx->mac_lock); 796 797 rc = efx->phy_op->init(efx); 798 if (rc) 799 goto fail1; 800 801 efx->port_initialized = true; 802 803 /* Reconfigure the MAC before creating dma queues (required for 804 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */ 805 efx->mac_op->reconfigure(efx); 806 807 /* Ensure the PHY advertises the correct flow control settings */ 808 rc = efx->phy_op->reconfigure(efx); 809 if (rc) 810 goto fail2; 811 812 mutex_unlock(&efx->mac_lock); 813 return 0; 814 815fail2: 816 efx->phy_op->fini(efx); 817fail1: 818 mutex_unlock(&efx->mac_lock); 819 return rc; 820} 821 822static void efx_start_port(struct efx_nic *efx) 823{ 824 netif_dbg(efx, ifup, efx->net_dev, "start port\n"); 825 BUG_ON(efx->port_enabled); 826 827 mutex_lock(&efx->mac_lock); 828 efx->port_enabled = true; 829 830 /* efx_mac_work() might have been scheduled after efx_stop_port(), 831 * and then cancelled by efx_flush_all() */ 832 efx->type->push_multicast_hash(efx); 833 efx->mac_op->reconfigure(efx); 834 835 mutex_unlock(&efx->mac_lock); 836} 837 838/* Prevent efx_mac_work() and efx_monitor() from working */ 839static void efx_stop_port(struct efx_nic *efx) 840{ 841 netif_dbg(efx, ifdown, efx->net_dev, "stop port\n"); 842 843 mutex_lock(&efx->mac_lock); 844 efx->port_enabled = false; 845 mutex_unlock(&efx->mac_lock); 846 847 /* Serialise against efx_set_multicast_list() */ 848 if (efx_dev_registered(efx)) { 849 netif_addr_lock_bh(efx->net_dev); 850 netif_addr_unlock_bh(efx->net_dev); 851 } 852} 853 854static void efx_fini_port(struct efx_nic *efx) 855{ 856 netif_dbg(efx, drv, efx->net_dev, "shut down port\n"); 857 858 if (!efx->port_initialized) 859 return; 860 861 efx->phy_op->fini(efx); 862 efx->port_initialized = false; 863 864 efx->link_state.up = false; 865 efx_link_status_changed(efx); 866} 867 868static void efx_remove_port(struct efx_nic *efx) 869{ 870 netif_dbg(efx, drv, efx->net_dev, "destroying port\n"); 871 872 efx->type->remove_port(efx); 873} 874 875/************************************************************************** 876 * 877 * NIC handling 878 * 879 **************************************************************************/ 880 881/* This configures the PCI device to enable I/O and DMA. */ 882static int efx_init_io(struct efx_nic *efx) 883{ 884 struct pci_dev *pci_dev = efx->pci_dev; 885 dma_addr_t dma_mask = efx->type->max_dma_mask; 886 int rc; 887 888 netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n"); 889 890 rc = pci_enable_device(pci_dev); 891 if (rc) { 892 netif_err(efx, probe, efx->net_dev, 893 "failed to enable PCI device\n"); 894 goto fail1; 895 } 896 897 pci_set_master(pci_dev); 898 899 /* Set the PCI DMA mask. Try all possibilities from our 900 * genuine mask down to 32 bits, because some architectures 901 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit 902 * masks event though they reject 46 bit masks. 903 */ 904 while (dma_mask > 0x7fffffffUL) { 905 if (pci_dma_supported(pci_dev, dma_mask) && 906 ((rc = pci_set_dma_mask(pci_dev, dma_mask)) == 0)) 907 break; 908 dma_mask >>= 1; 909 } 910 if (rc) { 911 netif_err(efx, probe, efx->net_dev, 912 "could not find a suitable DMA mask\n"); 913 goto fail2; 914 } 915 netif_dbg(efx, probe, efx->net_dev, 916 "using DMA mask %llx\n", (unsigned long long) dma_mask); 917 rc = pci_set_consistent_dma_mask(pci_dev, dma_mask); 918 if (rc) { 919 /* pci_set_consistent_dma_mask() is not *allowed* to 920 * fail with a mask that pci_set_dma_mask() accepted, 921 * but just in case... 922 */ 923 netif_err(efx, probe, efx->net_dev, 924 "failed to set consistent DMA mask\n"); 925 goto fail2; 926 } 927 928 efx->membase_phys = pci_resource_start(efx->pci_dev, EFX_MEM_BAR); 929 rc = pci_request_region(pci_dev, EFX_MEM_BAR, "sfc"); 930 if (rc) { 931 netif_err(efx, probe, efx->net_dev, 932 "request for memory BAR failed\n"); 933 rc = -EIO; 934 goto fail3; 935 } 936 efx->membase = ioremap_nocache(efx->membase_phys, 937 efx->type->mem_map_size); 938 if (!efx->membase) { 939 netif_err(efx, probe, efx->net_dev, 940 "could not map memory BAR at %llx+%x\n", 941 (unsigned long long)efx->membase_phys, 942 efx->type->mem_map_size); 943 rc = -ENOMEM; 944 goto fail4; 945 } 946 netif_dbg(efx, probe, efx->net_dev, 947 "memory BAR at %llx+%x (virtual %p)\n", 948 (unsigned long long)efx->membase_phys, 949 efx->type->mem_map_size, efx->membase); 950 951 return 0; 952 953 fail4: 954 pci_release_region(efx->pci_dev, EFX_MEM_BAR); 955 fail3: 956 efx->membase_phys = 0; 957 fail2: 958 pci_disable_device(efx->pci_dev); 959 fail1: 960 return rc; 961} 962 963static void efx_fini_io(struct efx_nic *efx) 964{ 965 netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n"); 966 967 if (efx->membase) { 968 iounmap(efx->membase); 969 efx->membase = NULL; 970 } 971 972 if (efx->membase_phys) { 973 pci_release_region(efx->pci_dev, EFX_MEM_BAR); 974 efx->membase_phys = 0; 975 } 976 977 pci_disable_device(efx->pci_dev); 978} 979 980/* Get number of channels wanted. Each channel will have its own IRQ, 981 * 1 RX queue and/or 2 TX queues. */ 982static int efx_wanted_channels(void) 983{ 984 cpumask_var_t core_mask; 985 int count; 986 int cpu; 987 988 if (unlikely(!zalloc_cpumask_var(&core_mask, GFP_KERNEL))) { 989 printk(KERN_WARNING 990 "sfc: RSS disabled due to allocation failure\n"); 991 return 1; 992 } 993 994 count = 0; 995 for_each_online_cpu(cpu) { 996 if (!cpumask_test_cpu(cpu, core_mask)) { 997 ++count; 998 cpumask_or(core_mask, core_mask, 999 topology_core_cpumask(cpu)); 1000 } 1001 } 1002 1003 free_cpumask_var(core_mask); 1004 return count; 1005} 1006 1007/* Probe the number and type of interrupts we are able to obtain, and 1008 * the resulting numbers of channels and RX queues. 1009 */ 1010static void efx_probe_interrupts(struct efx_nic *efx) 1011{ 1012 int max_channels = 1013 min_t(int, efx->type->phys_addr_channels, EFX_MAX_CHANNELS); 1014 int rc, i; 1015 1016 if (efx->interrupt_mode == EFX_INT_MODE_MSIX) { 1017 struct msix_entry xentries[EFX_MAX_CHANNELS]; 1018 int n_channels; 1019 1020 n_channels = efx_wanted_channels(); 1021 if (separate_tx_channels) 1022 n_channels *= 2; 1023 n_channels = min(n_channels, max_channels); 1024 1025 for (i = 0; i < n_channels; i++) 1026 xentries[i].entry = i; 1027 rc = pci_enable_msix(efx->pci_dev, xentries, n_channels); 1028 if (rc > 0) { 1029 netif_err(efx, drv, efx->net_dev, 1030 "WARNING: Insufficient MSI-X vectors" 1031 " available (%d < %d).\n", rc, n_channels); 1032 netif_err(efx, drv, efx->net_dev, 1033 "WARNING: Performance may be reduced.\n"); 1034 EFX_BUG_ON_PARANOID(rc >= n_channels); 1035 n_channels = rc; 1036 rc = pci_enable_msix(efx->pci_dev, xentries, 1037 n_channels); 1038 } 1039 1040 if (rc == 0) { 1041 efx->n_channels = n_channels; 1042 if (separate_tx_channels) { 1043 efx->n_tx_channels = 1044 max(efx->n_channels / 2, 1U); 1045 efx->n_rx_channels = 1046 max(efx->n_channels - 1047 efx->n_tx_channels, 1U); 1048 } else { 1049 efx->n_tx_channels = efx->n_channels; 1050 efx->n_rx_channels = efx->n_channels; 1051 } 1052 for (i = 0; i < n_channels; i++) 1053 efx->channel[i].irq = xentries[i].vector; 1054 } else { 1055 /* Fall back to single channel MSI */ 1056 efx->interrupt_mode = EFX_INT_MODE_MSI; 1057 netif_err(efx, drv, efx->net_dev, 1058 "could not enable MSI-X\n"); 1059 } 1060 } 1061 1062 /* Try single interrupt MSI */ 1063 if (efx->interrupt_mode == EFX_INT_MODE_MSI) { 1064 efx->n_channels = 1; 1065 efx->n_rx_channels = 1; 1066 efx->n_tx_channels = 1; 1067 rc = pci_enable_msi(efx->pci_dev); 1068 if (rc == 0) { 1069 efx->channel[0].irq = efx->pci_dev->irq; 1070 } else { 1071 netif_err(efx, drv, efx->net_dev, 1072 "could not enable MSI\n"); 1073 efx->interrupt_mode = EFX_INT_MODE_LEGACY; 1074 } 1075 } 1076 1077 /* Assume legacy interrupts */ 1078 if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) { 1079 efx->n_channels = 1 + (separate_tx_channels ? 1 : 0); 1080 efx->n_rx_channels = 1; 1081 efx->n_tx_channels = 1; 1082 efx->legacy_irq = efx->pci_dev->irq; 1083 } 1084} 1085 1086static void efx_remove_interrupts(struct efx_nic *efx) 1087{ 1088 struct efx_channel *channel; 1089 1090 /* Remove MSI/MSI-X interrupts */ 1091 efx_for_each_channel(channel, efx) 1092 channel->irq = 0; 1093 pci_disable_msi(efx->pci_dev); 1094 pci_disable_msix(efx->pci_dev); 1095 1096 /* Remove legacy interrupt */ 1097 efx->legacy_irq = 0; 1098} 1099 1100static void efx_set_channels(struct efx_nic *efx) 1101{ 1102 struct efx_channel *channel; 1103 struct efx_tx_queue *tx_queue; 1104 struct efx_rx_queue *rx_queue; 1105 unsigned tx_channel_offset = 1106 separate_tx_channels ? efx->n_channels - efx->n_tx_channels : 0; 1107 1108 efx_for_each_channel(channel, efx) { 1109 if (channel->channel - tx_channel_offset < efx->n_tx_channels) { 1110 channel->tx_queue = &efx->tx_queue[ 1111 (channel->channel - tx_channel_offset) * 1112 EFX_TXQ_TYPES]; 1113 efx_for_each_channel_tx_queue(tx_queue, channel) 1114 tx_queue->channel = channel; 1115 } 1116 } 1117 1118 efx_for_each_rx_queue(rx_queue, efx) 1119 rx_queue->channel = &efx->channel[rx_queue->queue]; 1120} 1121 1122static int efx_probe_nic(struct efx_nic *efx) 1123{ 1124 size_t i; 1125 int rc; 1126 1127 netif_dbg(efx, probe, efx->net_dev, "creating NIC\n"); 1128 1129 /* Carry out hardware-type specific initialisation */ 1130 rc = efx->type->probe(efx); 1131 if (rc) 1132 return rc; 1133 1134 /* Determine the number of channels and queues by trying to hook 1135 * in MSI-X interrupts. */ 1136 efx_probe_interrupts(efx); 1137 1138 if (efx->n_channels > 1) 1139 get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key)); 1140 for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++) 1141 efx->rx_indir_table[i] = i % efx->n_rx_channels; 1142 1143 efx_set_channels(efx); 1144 efx->net_dev->real_num_tx_queues = efx->n_tx_channels; 1145 1146 /* Initialise the interrupt moderation settings */ 1147 efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true); 1148 1149 return 0; 1150} 1151 1152static void efx_remove_nic(struct efx_nic *efx) 1153{ 1154 netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n"); 1155 1156 efx_remove_interrupts(efx); 1157 efx->type->remove(efx); 1158} 1159 1160/************************************************************************** 1161 * 1162 * NIC startup/shutdown 1163 * 1164 *************************************************************************/ 1165 1166static int efx_probe_all(struct efx_nic *efx) 1167{ 1168 struct efx_channel *channel; 1169 int rc; 1170 1171 /* Create NIC */ 1172 rc = efx_probe_nic(efx); 1173 if (rc) { 1174 netif_err(efx, probe, efx->net_dev, "failed to create NIC\n"); 1175 goto fail1; 1176 } 1177 1178 /* Create port */ 1179 rc = efx_probe_port(efx); 1180 if (rc) { 1181 netif_err(efx, probe, efx->net_dev, "failed to create port\n"); 1182 goto fail2; 1183 } 1184 1185 /* Create channels */ 1186 efx_for_each_channel(channel, efx) { 1187 rc = efx_probe_channel(channel); 1188 if (rc) { 1189 netif_err(efx, probe, efx->net_dev, 1190 "failed to create channel %d\n", 1191 channel->channel); 1192 goto fail3; 1193 } 1194 } 1195 efx_set_channel_names(efx); 1196 1197 return 0; 1198 1199 fail3: 1200 efx_for_each_channel(channel, efx) 1201 efx_remove_channel(channel); 1202 efx_remove_port(efx); 1203 fail2: 1204 efx_remove_nic(efx); 1205 fail1: 1206 return rc; 1207} 1208 1209/* Called after previous invocation(s) of efx_stop_all, restarts the 1210 * port, kernel transmit queue, NAPI processing and hardware interrupts, 1211 * and ensures that the port is scheduled to be reconfigured. 1212 * This function is safe to call multiple times when the NIC is in any 1213 * state. */ 1214static void efx_start_all(struct efx_nic *efx) 1215{ 1216 struct efx_channel *channel; 1217 1218 EFX_ASSERT_RESET_SERIALISED(efx); 1219 1220 /* Check that it is appropriate to restart the interface. All 1221 * of these flags are safe to read under just the rtnl lock */ 1222 if (efx->port_enabled) 1223 return; 1224 if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT)) 1225 return; 1226 if (efx_dev_registered(efx) && !netif_running(efx->net_dev)) 1227 return; 1228 1229 /* Mark the port as enabled so port reconfigurations can start, then 1230 * restart the transmit interface early so the watchdog timer stops */ 1231 efx_start_port(efx); 1232 1233 efx_for_each_channel(channel, efx) { 1234 if (efx_dev_registered(efx)) 1235 efx_wake_queue(channel); 1236 efx_start_channel(channel); 1237 } 1238 1239 efx_nic_enable_interrupts(efx); 1240 1241 /* Switch to event based MCDI completions after enabling interrupts. 1242 * If a reset has been scheduled, then we need to stay in polled mode. 1243 * Rather than serialising efx_mcdi_mode_event() [which sleeps] and 1244 * reset_pending [modified from an atomic context], we instead guarantee 1245 * that efx_mcdi_mode_poll() isn't reverted erroneously */ 1246 efx_mcdi_mode_event(efx); 1247 if (efx->reset_pending != RESET_TYPE_NONE) 1248 efx_mcdi_mode_poll(efx); 1249 1250 /* Start the hardware monitor if there is one. Otherwise (we're link 1251 * event driven), we have to poll the PHY because after an event queue 1252 * flush, we could have a missed a link state change */ 1253 if (efx->type->monitor != NULL) { 1254 queue_delayed_work(efx->workqueue, &efx->monitor_work, 1255 efx_monitor_interval); 1256 } else { 1257 mutex_lock(&efx->mac_lock); 1258 if (efx->phy_op->poll(efx)) 1259 efx_link_status_changed(efx); 1260 mutex_unlock(&efx->mac_lock); 1261 } 1262 1263 efx->type->start_stats(efx); 1264} 1265 1266/* Flush all delayed work. Should only be called when no more delayed work 1267 * will be scheduled. This doesn't flush pending online resets (efx_reset), 1268 * since we're holding the rtnl_lock at this point. */ 1269static void efx_flush_all(struct efx_nic *efx) 1270{ 1271 /* Make sure the hardware monitor is stopped */ 1272 cancel_delayed_work_sync(&efx->monitor_work); 1273 /* Stop scheduled port reconfigurations */ 1274 cancel_work_sync(&efx->mac_work); 1275} 1276 1277/* Quiesce hardware and software without bringing the link down. 1278 * Safe to call multiple times, when the nic and interface is in any 1279 * state. The caller is guaranteed to subsequently be in a position 1280 * to modify any hardware and software state they see fit without 1281 * taking locks. */ 1282static void efx_stop_all(struct efx_nic *efx) 1283{ 1284 struct efx_channel *channel; 1285 1286 EFX_ASSERT_RESET_SERIALISED(efx); 1287 1288 /* port_enabled can be read safely under the rtnl lock */ 1289 if (!efx->port_enabled) 1290 return; 1291 1292 efx->type->stop_stats(efx); 1293 1294 /* Switch to MCDI polling on Siena before disabling interrupts */ 1295 efx_mcdi_mode_poll(efx); 1296 1297 /* Disable interrupts and wait for ISR to complete */ 1298 efx_nic_disable_interrupts(efx); 1299 if (efx->legacy_irq) 1300 synchronize_irq(efx->legacy_irq); 1301 efx_for_each_channel(channel, efx) { 1302 if (channel->irq) 1303 synchronize_irq(channel->irq); 1304 } 1305 1306 /* Stop all NAPI processing and synchronous rx refills */ 1307 efx_for_each_channel(channel, efx) 1308 efx_stop_channel(channel); 1309 1310 /* Stop all asynchronous port reconfigurations. Since all 1311 * event processing has already been stopped, there is no 1312 * window to loose phy events */ 1313 efx_stop_port(efx); 1314 1315 /* Flush efx_mac_work(), refill_workqueue, monitor_work */ 1316 efx_flush_all(efx); 1317 1318 /* Stop the kernel transmit interface late, so the watchdog 1319 * timer isn't ticking over the flush */ 1320 if (efx_dev_registered(efx)) { 1321 struct efx_channel *channel; 1322 efx_for_each_channel(channel, efx) 1323 efx_stop_queue(channel); 1324 netif_tx_lock_bh(efx->net_dev); 1325 netif_tx_unlock_bh(efx->net_dev); 1326 } 1327} 1328 1329static void efx_remove_all(struct efx_nic *efx) 1330{ 1331 struct efx_channel *channel; 1332 1333 efx_for_each_channel(channel, efx) 1334 efx_remove_channel(channel); 1335 efx_remove_port(efx); 1336 efx_remove_nic(efx); 1337} 1338 1339/************************************************************************** 1340 * 1341 * Interrupt moderation 1342 * 1343 **************************************************************************/ 1344 1345static unsigned irq_mod_ticks(int usecs, int resolution) 1346{ 1347 if (usecs <= 0) 1348 return 0; /* cannot receive interrupts ahead of time :-) */ 1349 if (usecs < resolution) 1350 return 1; /* never round down to 0 */ 1351 return usecs / resolution; 1352} 1353 1354/* Set interrupt moderation parameters */ 1355void efx_init_irq_moderation(struct efx_nic *efx, int tx_usecs, int rx_usecs, 1356 bool rx_adaptive) 1357{ 1358 struct efx_tx_queue *tx_queue; 1359 struct efx_rx_queue *rx_queue; 1360 unsigned tx_ticks = irq_mod_ticks(tx_usecs, EFX_IRQ_MOD_RESOLUTION); 1361 unsigned rx_ticks = irq_mod_ticks(rx_usecs, EFX_IRQ_MOD_RESOLUTION); 1362 1363 EFX_ASSERT_RESET_SERIALISED(efx); 1364 1365 efx_for_each_tx_queue(tx_queue, efx) 1366 tx_queue->channel->irq_moderation = tx_ticks; 1367 1368 efx->irq_rx_adaptive = rx_adaptive; 1369 efx->irq_rx_moderation = rx_ticks; 1370 efx_for_each_rx_queue(rx_queue, efx) 1371 rx_queue->channel->irq_moderation = rx_ticks; 1372} 1373 1374/************************************************************************** 1375 * 1376 * Hardware monitor 1377 * 1378 **************************************************************************/ 1379 1380/* Run periodically off the general workqueue. Serialised against 1381 * efx_reconfigure_port via the mac_lock */ 1382static void efx_monitor(struct work_struct *data) 1383{ 1384 struct efx_nic *efx = container_of(data, struct efx_nic, 1385 monitor_work.work); 1386 1387 netif_vdbg(efx, timer, efx->net_dev, 1388 "hardware monitor executing on CPU %d\n", 1389 raw_smp_processor_id()); 1390 BUG_ON(efx->type->monitor == NULL); 1391 1392 /* If the mac_lock is already held then it is likely a port 1393 * reconfiguration is already in place, which will likely do 1394 * most of the work of check_hw() anyway. */ 1395 if (!mutex_trylock(&efx->mac_lock)) 1396 goto out_requeue; 1397 if (!efx->port_enabled) 1398 goto out_unlock; 1399 efx->type->monitor(efx); 1400 1401out_unlock: 1402 mutex_unlock(&efx->mac_lock); 1403out_requeue: 1404 queue_delayed_work(efx->workqueue, &efx->monitor_work, 1405 efx_monitor_interval); 1406} 1407 1408/************************************************************************** 1409 * 1410 * ioctls 1411 * 1412 *************************************************************************/ 1413 1414/* Net device ioctl 1415 * Context: process, rtnl_lock() held. 1416 */ 1417static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd) 1418{ 1419 struct efx_nic *efx = netdev_priv(net_dev); 1420 struct mii_ioctl_data *data = if_mii(ifr); 1421 1422 EFX_ASSERT_RESET_SERIALISED(efx); 1423 1424 /* Convert phy_id from older PRTAD/DEVAD format */ 1425 if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) && 1426 (data->phy_id & 0xfc00) == 0x0400) 1427 data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400; 1428 1429 return mdio_mii_ioctl(&efx->mdio, data, cmd); 1430} 1431 1432/************************************************************************** 1433 * 1434 * NAPI interface 1435 * 1436 **************************************************************************/ 1437 1438static int efx_init_napi(struct efx_nic *efx) 1439{ 1440 struct efx_channel *channel; 1441 1442 efx_for_each_channel(channel, efx) { 1443 channel->napi_dev = efx->net_dev; 1444 netif_napi_add(channel->napi_dev, &channel->napi_str, 1445 efx_poll, napi_weight); 1446 } 1447 return 0; 1448} 1449 1450static void efx_fini_napi(struct efx_nic *efx) 1451{ 1452 struct efx_channel *channel; 1453 1454 efx_for_each_channel(channel, efx) { 1455 if (channel->napi_dev) 1456 netif_napi_del(&channel->napi_str); 1457 channel->napi_dev = NULL; 1458 } 1459} 1460 1461/************************************************************************** 1462 * 1463 * Kernel netpoll interface 1464 * 1465 *************************************************************************/ 1466 1467#ifdef CONFIG_NET_POLL_CONTROLLER 1468 1469/* Although in the common case interrupts will be disabled, this is not 1470 * guaranteed. However, all our work happens inside the NAPI callback, 1471 * so no locking is required. 1472 */ 1473static void efx_netpoll(struct net_device *net_dev) 1474{ 1475 struct efx_nic *efx = netdev_priv(net_dev); 1476 struct efx_channel *channel; 1477 1478 efx_for_each_channel(channel, efx) 1479 efx_schedule_channel(channel); 1480} 1481 1482#endif 1483 1484/************************************************************************** 1485 * 1486 * Kernel net device interface 1487 * 1488 *************************************************************************/ 1489 1490/* Context: process, rtnl_lock() held. */ 1491static int efx_net_open(struct net_device *net_dev) 1492{ 1493 struct efx_nic *efx = netdev_priv(net_dev); 1494 EFX_ASSERT_RESET_SERIALISED(efx); 1495 1496 netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n", 1497 raw_smp_processor_id()); 1498 1499 if (efx->state == STATE_DISABLED) 1500 return -EIO; 1501 if (efx->phy_mode & PHY_MODE_SPECIAL) 1502 return -EBUSY; 1503 if (efx_mcdi_poll_reboot(efx) && efx_reset(efx, RESET_TYPE_ALL)) 1504 return -EIO; 1505 1506 /* Notify the kernel of the link state polled during driver load, 1507 * before the monitor starts running */ 1508 efx_link_status_changed(efx); 1509 1510 efx_start_all(efx); 1511 return 0; 1512} 1513 1514/* Context: process, rtnl_lock() held. 1515 * Note that the kernel will ignore our return code; this method 1516 * should really be a void. 1517 */ 1518static int efx_net_stop(struct net_device *net_dev) 1519{ 1520 struct efx_nic *efx = netdev_priv(net_dev); 1521 1522 netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n", 1523 raw_smp_processor_id()); 1524 1525 if (efx->state != STATE_DISABLED) { 1526 /* Stop the device and flush all the channels */ 1527 efx_stop_all(efx); 1528 efx_fini_channels(efx); 1529 efx_init_channels(efx); 1530 } 1531 1532 return 0; 1533} 1534 1535/* Context: process, dev_base_lock or RTNL held, non-blocking. */ 1536static struct rtnl_link_stats64 *efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats) 1537{ 1538 struct efx_nic *efx = netdev_priv(net_dev); 1539 struct efx_mac_stats *mac_stats = &efx->mac_stats; 1540 1541 spin_lock_bh(&efx->stats_lock); 1542 efx->type->update_stats(efx); 1543 spin_unlock_bh(&efx->stats_lock); 1544 1545 stats->rx_packets = mac_stats->rx_packets; 1546 stats->tx_packets = mac_stats->tx_packets; 1547 stats->rx_bytes = mac_stats->rx_bytes; 1548 stats->tx_bytes = mac_stats->tx_bytes; 1549 stats->multicast = mac_stats->rx_multicast; 1550 stats->collisions = mac_stats->tx_collision; 1551 stats->rx_length_errors = (mac_stats->rx_gtjumbo + 1552 mac_stats->rx_length_error); 1553 stats->rx_over_errors = efx->n_rx_nodesc_drop_cnt; 1554 stats->rx_crc_errors = mac_stats->rx_bad; 1555 stats->rx_frame_errors = mac_stats->rx_align_error; 1556 stats->rx_fifo_errors = mac_stats->rx_overflow; 1557 stats->rx_missed_errors = mac_stats->rx_missed; 1558 stats->tx_window_errors = mac_stats->tx_late_collision; 1559 1560 stats->rx_errors = (stats->rx_length_errors + 1561 stats->rx_crc_errors + 1562 stats->rx_frame_errors + 1563 mac_stats->rx_symbol_error); 1564 stats->tx_errors = (stats->tx_window_errors + 1565 mac_stats->tx_bad); 1566 1567 return stats; 1568} 1569 1570/* Context: netif_tx_lock held, BHs disabled. */ 1571static void efx_watchdog(struct net_device *net_dev) 1572{ 1573 struct efx_nic *efx = netdev_priv(net_dev); 1574 1575 netif_err(efx, tx_err, efx->net_dev, 1576 "TX stuck with port_enabled=%d: resetting channels\n", 1577 efx->port_enabled); 1578 1579 efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG); 1580} 1581 1582 1583/* Context: process, rtnl_lock() held. */ 1584static int efx_change_mtu(struct net_device *net_dev, int new_mtu) 1585{ 1586 struct efx_nic *efx = netdev_priv(net_dev); 1587 int rc = 0; 1588 1589 EFX_ASSERT_RESET_SERIALISED(efx); 1590 1591 if (new_mtu > EFX_MAX_MTU) 1592 return -EINVAL; 1593 1594 efx_stop_all(efx); 1595 1596 netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); 1597 1598 efx_fini_channels(efx); 1599 1600 mutex_lock(&efx->mac_lock); 1601 /* Reconfigure the MAC before enabling the dma queues so that 1602 * the RX buffers don't overflow */ 1603 net_dev->mtu = new_mtu; 1604 efx->mac_op->reconfigure(efx); 1605 mutex_unlock(&efx->mac_lock); 1606 1607 efx_init_channels(efx); 1608 1609 efx_start_all(efx); 1610 return rc; 1611} 1612 1613static int efx_set_mac_address(struct net_device *net_dev, void *data) 1614{ 1615 struct efx_nic *efx = netdev_priv(net_dev); 1616 struct sockaddr *addr = data; 1617 char *new_addr = addr->sa_data; 1618 1619 EFX_ASSERT_RESET_SERIALISED(efx); 1620 1621 if (!is_valid_ether_addr(new_addr)) { 1622 netif_err(efx, drv, efx->net_dev, 1623 "invalid ethernet MAC address requested: %pM\n", 1624 new_addr); 1625 return -EINVAL; 1626 } 1627 1628 memcpy(net_dev->dev_addr, new_addr, net_dev->addr_len); 1629 1630 /* Reconfigure the MAC */ 1631 mutex_lock(&efx->mac_lock); 1632 efx->mac_op->reconfigure(efx); 1633 mutex_unlock(&efx->mac_lock); 1634 1635 return 0; 1636} 1637 1638/* Context: netif_addr_lock held, BHs disabled. */ 1639static void efx_set_multicast_list(struct net_device *net_dev) 1640{ 1641 struct efx_nic *efx = netdev_priv(net_dev); 1642 struct netdev_hw_addr *ha; 1643 union efx_multicast_hash *mc_hash = &efx->multicast_hash; 1644 u32 crc; 1645 int bit; 1646 1647 efx->promiscuous = !!(net_dev->flags & IFF_PROMISC); 1648 1649 /* Build multicast hash table */ 1650 if (efx->promiscuous || (net_dev->flags & IFF_ALLMULTI)) { 1651 memset(mc_hash, 0xff, sizeof(*mc_hash)); 1652 } else { 1653 memset(mc_hash, 0x00, sizeof(*mc_hash)); 1654 netdev_for_each_mc_addr(ha, net_dev) { 1655 crc = ether_crc_le(ETH_ALEN, ha->addr); 1656 bit = crc & (EFX_MCAST_HASH_ENTRIES - 1); 1657 set_bit_le(bit, mc_hash->byte); 1658 } 1659 1660 /* Broadcast packets go through the multicast hash filter. 1661 * ether_crc_le() of the broadcast address is 0xbe2612ff 1662 * so we always add bit 0xff to the mask. 1663 */ 1664 set_bit_le(0xff, mc_hash->byte); 1665 } 1666 1667 if (efx->port_enabled) 1668 queue_work(efx->workqueue, &efx->mac_work); 1669 /* Otherwise efx_start_port() will do this */ 1670} 1671 1672static const struct net_device_ops efx_netdev_ops = { 1673 .ndo_open = efx_net_open, 1674 .ndo_stop = efx_net_stop, 1675 .ndo_get_stats64 = efx_net_stats, 1676 .ndo_tx_timeout = efx_watchdog, 1677 .ndo_start_xmit = efx_hard_start_xmit, 1678 .ndo_validate_addr = eth_validate_addr, 1679 .ndo_do_ioctl = efx_ioctl, 1680 .ndo_change_mtu = efx_change_mtu, 1681 .ndo_set_mac_address = efx_set_mac_address, 1682 .ndo_set_multicast_list = efx_set_multicast_list, 1683#ifdef CONFIG_NET_POLL_CONTROLLER 1684 .ndo_poll_controller = efx_netpoll, 1685#endif 1686}; 1687 1688static void efx_update_name(struct efx_nic *efx) 1689{ 1690 strcpy(efx->name, efx->net_dev->name); 1691 efx_mtd_rename(efx); 1692 efx_set_channel_names(efx); 1693} 1694 1695static int efx_netdev_event(struct notifier_block *this, 1696 unsigned long event, void *ptr) 1697{ 1698 struct net_device *net_dev = ptr; 1699 1700 if (net_dev->netdev_ops == &efx_netdev_ops && 1701 event == NETDEV_CHANGENAME) 1702 efx_update_name(netdev_priv(net_dev)); 1703 1704 return NOTIFY_DONE; 1705} 1706 1707static struct notifier_block efx_netdev_notifier = { 1708 .notifier_call = efx_netdev_event, 1709}; 1710 1711static ssize_t 1712show_phy_type(struct device *dev, struct device_attribute *attr, char *buf) 1713{ 1714 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 1715 return sprintf(buf, "%d\n", efx->phy_type); 1716} 1717static DEVICE_ATTR(phy_type, 0644, show_phy_type, NULL); 1718 1719static int efx_register_netdev(struct efx_nic *efx) 1720{ 1721 struct net_device *net_dev = efx->net_dev; 1722 int rc; 1723 1724 net_dev->watchdog_timeo = 5 * HZ; 1725 net_dev->irq = efx->pci_dev->irq; 1726 net_dev->netdev_ops = &efx_netdev_ops; 1727 SET_ETHTOOL_OPS(net_dev, &efx_ethtool_ops); 1728 1729 /* Clear MAC statistics */ 1730 efx->mac_op->update_stats(efx); 1731 memset(&efx->mac_stats, 0, sizeof(efx->mac_stats)); 1732 1733 rtnl_lock(); 1734 1735 rc = dev_alloc_name(net_dev, net_dev->name); 1736 if (rc < 0) 1737 goto fail_locked; 1738 efx_update_name(efx); 1739 1740 rc = register_netdevice(net_dev); 1741 if (rc) 1742 goto fail_locked; 1743 1744 /* Always start with carrier off; PHY events will detect the link */ 1745 netif_carrier_off(efx->net_dev); 1746 1747 rtnl_unlock(); 1748 1749 rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type); 1750 if (rc) { 1751 netif_err(efx, drv, efx->net_dev, 1752 "failed to init net dev attributes\n"); 1753 goto fail_registered; 1754 } 1755 1756 return 0; 1757 1758fail_locked: 1759 rtnl_unlock(); 1760 netif_err(efx, drv, efx->net_dev, "could not register net dev\n"); 1761 return rc; 1762 1763fail_registered: 1764 unregister_netdev(net_dev); 1765 return rc; 1766} 1767 1768static void efx_unregister_netdev(struct efx_nic *efx) 1769{ 1770 struct efx_tx_queue *tx_queue; 1771 1772 if (!efx->net_dev) 1773 return; 1774 1775 BUG_ON(netdev_priv(efx->net_dev) != efx); 1776 1777 /* Free up any skbs still remaining. This has to happen before 1778 * we try to unregister the netdev as running their destructors 1779 * may be needed to get the device ref. count to 0. */ 1780 efx_for_each_tx_queue(tx_queue, efx) 1781 efx_release_tx_buffers(tx_queue); 1782 1783 if (efx_dev_registered(efx)) { 1784 strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); 1785 device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type); 1786 unregister_netdev(efx->net_dev); 1787 } 1788} 1789 1790/************************************************************************** 1791 * 1792 * Device reset and suspend 1793 * 1794 **************************************************************************/ 1795 1796/* Tears down the entire software state and most of the hardware state 1797 * before reset. */ 1798void efx_reset_down(struct efx_nic *efx, enum reset_type method) 1799{ 1800 EFX_ASSERT_RESET_SERIALISED(efx); 1801 1802 efx_stop_all(efx); 1803 mutex_lock(&efx->mac_lock); 1804 mutex_lock(&efx->spi_lock); 1805 1806 efx_fini_channels(efx); 1807 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) 1808 efx->phy_op->fini(efx); 1809 efx->type->fini(efx); 1810} 1811 1812/* This function will always ensure that the locks acquired in 1813 * efx_reset_down() are released. A failure return code indicates 1814 * that we were unable to reinitialise the hardware, and the 1815 * driver should be disabled. If ok is false, then the rx and tx 1816 * engines are not restarted, pending a RESET_DISABLE. */ 1817int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) 1818{ 1819 int rc; 1820 1821 EFX_ASSERT_RESET_SERIALISED(efx); 1822 1823 rc = efx->type->init(efx); 1824 if (rc) { 1825 netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); 1826 goto fail; 1827 } 1828 1829 if (!ok) 1830 goto fail; 1831 1832 if (efx->port_initialized && method != RESET_TYPE_INVISIBLE) { 1833 rc = efx->phy_op->init(efx); 1834 if (rc) 1835 goto fail; 1836 if (efx->phy_op->reconfigure(efx)) 1837 netif_err(efx, drv, efx->net_dev, 1838 "could not restore PHY settings\n"); 1839 } 1840 1841 efx->mac_op->reconfigure(efx); 1842 1843 efx_init_channels(efx); 1844 1845 mutex_unlock(&efx->spi_lock); 1846 mutex_unlock(&efx->mac_lock); 1847 1848 efx_start_all(efx); 1849 1850 return 0; 1851 1852fail: 1853 efx->port_initialized = false; 1854 1855 mutex_unlock(&efx->spi_lock); 1856 mutex_unlock(&efx->mac_lock); 1857 1858 return rc; 1859} 1860 1861/* Reset the NIC using the specified method. Note that the reset may 1862 * fail, in which case the card will be left in an unusable state. 1863 * 1864 * Caller must hold the rtnl_lock. 1865 */ 1866int efx_reset(struct efx_nic *efx, enum reset_type method) 1867{ 1868 int rc, rc2; 1869 bool disabled; 1870 1871 netif_info(efx, drv, efx->net_dev, "resetting (%s)\n", 1872 RESET_TYPE(method)); 1873 1874 efx_reset_down(efx, method); 1875 1876 rc = efx->type->reset(efx, method); 1877 if (rc) { 1878 netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); 1879 goto out; 1880 } 1881 1882 /* Allow resets to be rescheduled. */ 1883 efx->reset_pending = RESET_TYPE_NONE; 1884 1885 /* Reinitialise bus-mastering, which may have been turned off before 1886 * the reset was scheduled. This is still appropriate, even in the 1887 * RESET_TYPE_DISABLE since this driver generally assumes the hardware 1888 * can respond to requests. */ 1889 pci_set_master(efx->pci_dev); 1890 1891out: 1892 /* Leave device stopped if necessary */ 1893 disabled = rc || method == RESET_TYPE_DISABLE; 1894 rc2 = efx_reset_up(efx, method, !disabled); 1895 if (rc2) { 1896 disabled = true; 1897 if (!rc) 1898 rc = rc2; 1899 } 1900 1901 if (disabled) { 1902 dev_close(efx->net_dev); 1903 netif_err(efx, drv, efx->net_dev, "has been disabled\n"); 1904 efx->state = STATE_DISABLED; 1905 } else { 1906 netif_dbg(efx, drv, efx->net_dev, "reset complete\n"); 1907 } 1908 return rc; 1909} 1910 1911/* The worker thread exists so that code that cannot sleep can 1912 * schedule a reset for later. 1913 */ 1914static void efx_reset_work(struct work_struct *data) 1915{ 1916 struct efx_nic *efx = container_of(data, struct efx_nic, reset_work); 1917 1918 if (efx->reset_pending == RESET_TYPE_NONE) 1919 return; 1920 1921 /* If we're not RUNNING then don't reset. Leave the reset_pending 1922 * flag set so that efx_pci_probe_main will be retried */ 1923 if (efx->state != STATE_RUNNING) { 1924 netif_info(efx, drv, efx->net_dev, 1925 "scheduled reset quenched. NIC not RUNNING\n"); 1926 return; 1927 } 1928 1929 rtnl_lock(); 1930 (void)efx_reset(efx, efx->reset_pending); 1931 rtnl_unlock(); 1932} 1933 1934void efx_schedule_reset(struct efx_nic *efx, enum reset_type type) 1935{ 1936 enum reset_type method; 1937 1938 if (efx->reset_pending != RESET_TYPE_NONE) { 1939 netif_info(efx, drv, efx->net_dev, 1940 "quenching already scheduled reset\n"); 1941 return; 1942 } 1943 1944 switch (type) { 1945 case RESET_TYPE_INVISIBLE: 1946 case RESET_TYPE_ALL: 1947 case RESET_TYPE_WORLD: 1948 case RESET_TYPE_DISABLE: 1949 method = type; 1950 break; 1951 case RESET_TYPE_RX_RECOVERY: 1952 case RESET_TYPE_RX_DESC_FETCH: 1953 case RESET_TYPE_TX_DESC_FETCH: 1954 case RESET_TYPE_TX_SKIP: 1955 method = RESET_TYPE_INVISIBLE; 1956 break; 1957 case RESET_TYPE_MC_FAILURE: 1958 default: 1959 method = RESET_TYPE_ALL; 1960 break; 1961 } 1962 1963 if (method != type) 1964 netif_dbg(efx, drv, efx->net_dev, 1965 "scheduling %s reset for %s\n", 1966 RESET_TYPE(method), RESET_TYPE(type)); 1967 else 1968 netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n", 1969 RESET_TYPE(method)); 1970 1971 efx->reset_pending = method; 1972 1973 /* efx_process_channel() will no longer read events once a 1974 * reset is scheduled. So switch back to poll'd MCDI completions. */ 1975 efx_mcdi_mode_poll(efx); 1976 1977 queue_work(reset_workqueue, &efx->reset_work); 1978} 1979 1980/************************************************************************** 1981 * 1982 * List of NICs we support 1983 * 1984 **************************************************************************/ 1985 1986/* PCI device ID table */ 1987static DEFINE_PCI_DEVICE_TABLE(efx_pci_table) = { 1988 {PCI_DEVICE(EFX_VENDID_SFC, FALCON_A_P_DEVID), 1989 .driver_data = (unsigned long) &falcon_a1_nic_type}, 1990 {PCI_DEVICE(EFX_VENDID_SFC, FALCON_B_P_DEVID), 1991 .driver_data = (unsigned long) &falcon_b0_nic_type}, 1992 {PCI_DEVICE(EFX_VENDID_SFC, BETHPAGE_A_P_DEVID), 1993 .driver_data = (unsigned long) &siena_a0_nic_type}, 1994 {PCI_DEVICE(EFX_VENDID_SFC, SIENA_A_P_DEVID), 1995 .driver_data = (unsigned long) &siena_a0_nic_type}, 1996 {0} /* end of list */ 1997}; 1998 1999/************************************************************************** 2000 * 2001 * Dummy PHY/MAC operations 2002 * 2003 * Can be used for some unimplemented operations 2004 * Needed so all function pointers are valid and do not have to be tested 2005 * before use 2006 * 2007 **************************************************************************/ 2008int efx_port_dummy_op_int(struct efx_nic *efx) 2009{ 2010 return 0; 2011} 2012void efx_port_dummy_op_void(struct efx_nic *efx) {} 2013void efx_port_dummy_op_set_id_led(struct efx_nic *efx, enum efx_led_mode mode) 2014{ 2015} 2016bool efx_port_dummy_op_poll(struct efx_nic *efx) 2017{ 2018 return false; 2019} 2020 2021static struct efx_phy_operations efx_dummy_phy_operations = { 2022 .init = efx_port_dummy_op_int, 2023 .reconfigure = efx_port_dummy_op_int, 2024 .poll = efx_port_dummy_op_poll, 2025 .fini = efx_port_dummy_op_void, 2026}; 2027 2028/************************************************************************** 2029 * 2030 * Data housekeeping 2031 * 2032 **************************************************************************/ 2033 2034/* This zeroes out and then fills in the invariants in a struct 2035 * efx_nic (including all sub-structures). 2036 */ 2037static int efx_init_struct(struct efx_nic *efx, struct efx_nic_type *type, 2038 struct pci_dev *pci_dev, struct net_device *net_dev) 2039{ 2040 struct efx_channel *channel; 2041 struct efx_tx_queue *tx_queue; 2042 struct efx_rx_queue *rx_queue; 2043 int i; 2044 2045 /* Initialise common structures */ 2046 memset(efx, 0, sizeof(*efx)); 2047 spin_lock_init(&efx->biu_lock); 2048 mutex_init(&efx->mdio_lock); 2049 mutex_init(&efx->spi_lock); 2050#ifdef CONFIG_SFC_MTD 2051 INIT_LIST_HEAD(&efx->mtd_list); 2052#endif 2053 INIT_WORK(&efx->reset_work, efx_reset_work); 2054 INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor); 2055 efx->pci_dev = pci_dev; 2056 efx->msg_enable = debug; 2057 efx->state = STATE_INIT; 2058 efx->reset_pending = RESET_TYPE_NONE; 2059 strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name)); 2060 2061 efx->net_dev = net_dev; 2062 efx->rx_checksum_enabled = true; 2063 spin_lock_init(&efx->stats_lock); 2064 mutex_init(&efx->mac_lock); 2065 efx->mac_op = type->default_mac_ops; 2066 efx->phy_op = &efx_dummy_phy_operations; 2067 efx->mdio.dev = net_dev; 2068 INIT_WORK(&efx->mac_work, efx_mac_work); 2069 2070 for (i = 0; i < EFX_MAX_CHANNELS; i++) { 2071 channel = &efx->channel[i]; 2072 channel->efx = efx; 2073 channel->channel = i; 2074 channel->work_pending = false; 2075 spin_lock_init(&channel->tx_stop_lock); 2076 atomic_set(&channel->tx_stop_count, 1); 2077 } 2078 for (i = 0; i < EFX_MAX_TX_QUEUES; i++) { 2079 tx_queue = &efx->tx_queue[i]; 2080 tx_queue->efx = efx; 2081 tx_queue->queue = i; 2082 tx_queue->buffer = NULL; 2083 tx_queue->channel = &efx->channel[0]; /* for safety */ 2084 tx_queue->tso_headers_free = NULL; 2085 } 2086 for (i = 0; i < EFX_MAX_RX_QUEUES; i++) { 2087 rx_queue = &efx->rx_queue[i]; 2088 rx_queue->efx = efx; 2089 rx_queue->queue = i; 2090 rx_queue->channel = &efx->channel[0]; /* for safety */ 2091 rx_queue->buffer = NULL; 2092 setup_timer(&rx_queue->slow_fill, efx_rx_slow_fill, 2093 (unsigned long)rx_queue); 2094 } 2095 2096 efx->type = type; 2097 2098 /* As close as we can get to guaranteeing that we don't overflow */ 2099 BUILD_BUG_ON(EFX_EVQ_SIZE < EFX_TXQ_SIZE + EFX_RXQ_SIZE); 2100 2101 EFX_BUG_ON_PARANOID(efx->type->phys_addr_channels > EFX_MAX_CHANNELS); 2102 2103 /* Higher numbered interrupt modes are less capable! */ 2104 efx->interrupt_mode = max(efx->type->max_interrupt_mode, 2105 interrupt_mode); 2106 2107 /* Would be good to use the net_dev name, but we're too early */ 2108 snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s", 2109 pci_name(pci_dev)); 2110 efx->workqueue = create_singlethread_workqueue(efx->workqueue_name); 2111 if (!efx->workqueue) 2112 return -ENOMEM; 2113 2114 return 0; 2115} 2116 2117static void efx_fini_struct(struct efx_nic *efx) 2118{ 2119 if (efx->workqueue) { 2120 destroy_workqueue(efx->workqueue); 2121 efx->workqueue = NULL; 2122 } 2123} 2124 2125/************************************************************************** 2126 * 2127 * PCI interface 2128 * 2129 **************************************************************************/ 2130 2131/* Main body of final NIC shutdown code 2132 * This is called only at module unload (or hotplug removal). 2133 */ 2134static void efx_pci_remove_main(struct efx_nic *efx) 2135{ 2136 efx_nic_fini_interrupt(efx); 2137 efx_fini_channels(efx); 2138 efx_fini_port(efx); 2139 efx->type->fini(efx); 2140 efx_fini_napi(efx); 2141 efx_remove_all(efx); 2142} 2143 2144/* Final NIC shutdown 2145 * This is called only at module unload (or hotplug removal). 2146 */ 2147static void efx_pci_remove(struct pci_dev *pci_dev) 2148{ 2149 struct efx_nic *efx; 2150 2151 efx = pci_get_drvdata(pci_dev); 2152 if (!efx) 2153 return; 2154 2155 /* Mark the NIC as fini, then stop the interface */ 2156 rtnl_lock(); 2157 efx->state = STATE_FINI; 2158 dev_close(efx->net_dev); 2159 2160 /* Allow any queued efx_resets() to complete */ 2161 rtnl_unlock(); 2162 2163 efx_unregister_netdev(efx); 2164 2165 efx_mtd_remove(efx); 2166 2167 /* Wait for any scheduled resets to complete. No more will be 2168 * scheduled from this point because efx_stop_all() has been 2169 * called, we are no longer registered with driverlink, and 2170 * the net_device's have been removed. */ 2171 cancel_work_sync(&efx->reset_work); 2172 2173 efx_pci_remove_main(efx); 2174 2175 efx_fini_io(efx); 2176 netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n"); 2177 2178 pci_set_drvdata(pci_dev, NULL); 2179 efx_fini_struct(efx); 2180 free_netdev(efx->net_dev); 2181}; 2182 2183/* Main body of NIC initialisation 2184 * This is called at module load (or hotplug insertion, theoretically). 2185 */ 2186static int efx_pci_probe_main(struct efx_nic *efx) 2187{ 2188 int rc; 2189 2190 /* Do start-of-day initialisation */ 2191 rc = efx_probe_all(efx); 2192 if (rc) 2193 goto fail1; 2194 2195 rc = efx_init_napi(efx); 2196 if (rc) 2197 goto fail2; 2198 2199 rc = efx->type->init(efx); 2200 if (rc) { 2201 netif_err(efx, probe, efx->net_dev, 2202 "failed to initialise NIC\n"); 2203 goto fail3; 2204 } 2205 2206 rc = efx_init_port(efx); 2207 if (rc) { 2208 netif_err(efx, probe, efx->net_dev, 2209 "failed to initialise port\n"); 2210 goto fail4; 2211 } 2212 2213 efx_init_channels(efx); 2214 2215 rc = efx_nic_init_interrupt(efx); 2216 if (rc) 2217 goto fail5; 2218 2219 return 0; 2220 2221 fail5: 2222 efx_fini_channels(efx); 2223 efx_fini_port(efx); 2224 fail4: 2225 efx->type->fini(efx); 2226 fail3: 2227 efx_fini_napi(efx); 2228 fail2: 2229 efx_remove_all(efx); 2230 fail1: 2231 return rc; 2232} 2233 2234/* NIC initialisation 2235 * 2236 * This is called at module load (or hotplug insertion, 2237 * theoretically). It sets up PCI mappings, tests and resets the NIC, 2238 * sets up and registers the network devices with the kernel and hooks 2239 * the interrupt service routine. It does not prepare the device for 2240 * transmission; this is left to the first time one of the network 2241 * interfaces is brought up (i.e. efx_net_open). 2242 */ 2243static int __devinit efx_pci_probe(struct pci_dev *pci_dev, 2244 const struct pci_device_id *entry) 2245{ 2246 struct efx_nic_type *type = (struct efx_nic_type *) entry->driver_data; 2247 struct net_device *net_dev; 2248 struct efx_nic *efx; 2249 int i, rc; 2250 2251 /* Allocate and initialise a struct net_device and struct efx_nic */ 2252 net_dev = alloc_etherdev_mq(sizeof(*efx), EFX_MAX_CORE_TX_QUEUES); 2253 if (!net_dev) 2254 return -ENOMEM; 2255 net_dev->features |= (type->offload_features | NETIF_F_SG | 2256 NETIF_F_HIGHDMA | NETIF_F_TSO | 2257 NETIF_F_GRO); 2258 if (type->offload_features & NETIF_F_V6_CSUM) 2259 net_dev->features |= NETIF_F_TSO6; 2260 /* Mask for features that also apply to VLAN devices */ 2261 net_dev->vlan_features |= (NETIF_F_ALL_CSUM | NETIF_F_SG | 2262 NETIF_F_HIGHDMA | NETIF_F_TSO); 2263 efx = netdev_priv(net_dev); 2264 pci_set_drvdata(pci_dev, efx); 2265 SET_NETDEV_DEV(net_dev, &pci_dev->dev); 2266 rc = efx_init_struct(efx, type, pci_dev, net_dev); 2267 if (rc) 2268 goto fail1; 2269 2270 netif_info(efx, probe, efx->net_dev, 2271 "Solarflare Communications NIC detected\n"); 2272 2273 /* Set up basic I/O (BAR mappings etc) */ 2274 rc = efx_init_io(efx); 2275 if (rc) 2276 goto fail2; 2277 2278 /* No serialisation is required with the reset path because 2279 * we're in STATE_INIT. */ 2280 for (i = 0; i < 5; i++) { 2281 rc = efx_pci_probe_main(efx); 2282 2283 /* Serialise against efx_reset(). No more resets will be 2284 * scheduled since efx_stop_all() has been called, and we 2285 * have not and never have been registered with either 2286 * the rtnetlink or driverlink layers. */ 2287 cancel_work_sync(&efx->reset_work); 2288 2289 if (rc == 0) { 2290 if (efx->reset_pending != RESET_TYPE_NONE) { 2291 /* If there was a scheduled reset during 2292 * probe, the NIC is probably hosed anyway */ 2293 efx_pci_remove_main(efx); 2294 rc = -EIO; 2295 } else { 2296 break; 2297 } 2298 } 2299 2300 /* Retry if a recoverably reset event has been scheduled */ 2301 if ((efx->reset_pending != RESET_TYPE_INVISIBLE) && 2302 (efx->reset_pending != RESET_TYPE_ALL)) 2303 goto fail3; 2304 2305 efx->reset_pending = RESET_TYPE_NONE; 2306 } 2307 2308 if (rc) { 2309 netif_err(efx, probe, efx->net_dev, "Could not reset NIC\n"); 2310 goto fail4; 2311 } 2312 2313 /* Switch to the running state before we expose the device to the OS, 2314 * so that dev_open()|efx_start_all() will actually start the device */ 2315 efx->state = STATE_RUNNING; 2316 2317 rc = efx_register_netdev(efx); 2318 if (rc) 2319 goto fail5; 2320 2321 netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n"); 2322 2323 rtnl_lock(); 2324 efx_mtd_probe(efx); /* allowed to fail */ 2325 rtnl_unlock(); 2326 return 0; 2327 2328 fail5: 2329 efx_pci_remove_main(efx); 2330 fail4: 2331 fail3: 2332 efx_fini_io(efx); 2333 fail2: 2334 efx_fini_struct(efx); 2335 fail1: 2336 WARN_ON(rc > 0); 2337 netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc); 2338 free_netdev(net_dev); 2339 return rc; 2340} 2341 2342static int efx_pm_freeze(struct device *dev) 2343{ 2344 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2345 2346 efx->state = STATE_FINI; 2347 2348 netif_device_detach(efx->net_dev); 2349 2350 efx_stop_all(efx); 2351 efx_fini_channels(efx); 2352 2353 return 0; 2354} 2355 2356static int efx_pm_thaw(struct device *dev) 2357{ 2358 struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev)); 2359 2360 efx->state = STATE_INIT; 2361 2362 efx_init_channels(efx); 2363 2364 mutex_lock(&efx->mac_lock); 2365 efx->phy_op->reconfigure(efx); 2366 mutex_unlock(&efx->mac_lock); 2367 2368 efx_start_all(efx); 2369 2370 netif_device_attach(efx->net_dev); 2371 2372 efx->state = STATE_RUNNING; 2373 2374 efx->type->resume_wol(efx); 2375 2376 /* Reschedule any quenched resets scheduled during efx_pm_freeze() */ 2377 queue_work(reset_workqueue, &efx->reset_work); 2378 2379 return 0; 2380} 2381 2382static int efx_pm_poweroff(struct device *dev) 2383{ 2384 struct pci_dev *pci_dev = to_pci_dev(dev); 2385 struct efx_nic *efx = pci_get_drvdata(pci_dev); 2386 2387 efx->type->fini(efx); 2388 2389 efx->reset_pending = RESET_TYPE_NONE; 2390 2391 pci_save_state(pci_dev); 2392 return pci_set_power_state(pci_dev, PCI_D3hot); 2393} 2394 2395/* Used for both resume and restore */ 2396static int efx_pm_resume(struct device *dev) 2397{ 2398 struct pci_dev *pci_dev = to_pci_dev(dev); 2399 struct efx_nic *efx = pci_get_drvdata(pci_dev); 2400 int rc; 2401 2402 rc = pci_set_power_state(pci_dev, PCI_D0); 2403 if (rc) 2404 return rc; 2405 pci_restore_state(pci_dev); 2406 rc = pci_enable_device(pci_dev); 2407 if (rc) 2408 return rc; 2409 pci_set_master(efx->pci_dev); 2410 rc = efx->type->reset(efx, RESET_TYPE_ALL); 2411 if (rc) 2412 return rc; 2413 rc = efx->type->init(efx); 2414 if (rc) 2415 return rc; 2416 efx_pm_thaw(dev); 2417 return 0; 2418} 2419 2420static int efx_pm_suspend(struct device *dev) 2421{ 2422 int rc; 2423 2424 efx_pm_freeze(dev); 2425 rc = efx_pm_poweroff(dev); 2426 if (rc) 2427 efx_pm_resume(dev); 2428 return rc; 2429} 2430 2431static struct dev_pm_ops efx_pm_ops = { 2432 .suspend = efx_pm_suspend, 2433 .resume = efx_pm_resume, 2434 .freeze = efx_pm_freeze, 2435 .thaw = efx_pm_thaw, 2436 .poweroff = efx_pm_poweroff, 2437 .restore = efx_pm_resume, 2438}; 2439 2440static struct pci_driver efx_pci_driver = { 2441 .name = KBUILD_MODNAME, 2442 .id_table = efx_pci_table, 2443 .probe = efx_pci_probe, 2444 .remove = efx_pci_remove, 2445 .driver.pm = &efx_pm_ops, 2446}; 2447 2448/************************************************************************** 2449 * 2450 * Kernel module interface 2451 * 2452 *************************************************************************/ 2453 2454module_param(interrupt_mode, uint, 0444); 2455MODULE_PARM_DESC(interrupt_mode, 2456 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)"); 2457 2458static int __init efx_init_module(void) 2459{ 2460 int rc; 2461 2462 printk(KERN_INFO "Solarflare NET driver v" EFX_DRIVER_VERSION "\n"); 2463 2464 rc = register_netdevice_notifier(&efx_netdev_notifier); 2465 if (rc) 2466 goto err_notifier; 2467 2468 reset_workqueue = create_singlethread_workqueue("sfc_reset"); 2469 if (!reset_workqueue) { 2470 rc = -ENOMEM; 2471 goto err_reset; 2472 } 2473 2474 rc = pci_register_driver(&efx_pci_driver); 2475 if (rc < 0) 2476 goto err_pci; 2477 2478 return 0; 2479 2480 err_pci: 2481 destroy_workqueue(reset_workqueue); 2482 err_reset: 2483 unregister_netdevice_notifier(&efx_netdev_notifier); 2484 err_notifier: 2485 return rc; 2486} 2487 2488static void __exit efx_exit_module(void) 2489{ 2490 printk(KERN_INFO "Solarflare NET driver unloading\n"); 2491 2492 pci_unregister_driver(&efx_pci_driver); 2493 destroy_workqueue(reset_workqueue); 2494 unregister_netdevice_notifier(&efx_netdev_notifier); 2495 2496} 2497 2498module_init(efx_init_module); 2499module_exit(efx_exit_module); 2500 2501MODULE_AUTHOR("Solarflare Communications and " 2502 "Michael Brown <mbrown@fensystems.co.uk>"); 2503MODULE_DESCRIPTION("Solarflare Communications network driver"); 2504MODULE_LICENSE("GPL"); 2505MODULE_DEVICE_TABLE(pci, efx_pci_table); 2506