1/* 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/sched.h> 35#include <linux/spinlock.h> 36#include <linux/idr.h> 37#include <linux/pci.h> 38#include <linux/io.h> 39#include <linux/delay.h> 40#include <linux/netdevice.h> 41#include <linux/vmalloc.h> 42#include <linux/bitmap.h> 43#include <linux/slab.h> 44 45#include "ipath_kernel.h" 46#include "ipath_verbs.h" 47 48static void ipath_update_pio_bufs(struct ipath_devdata *); 49 50const char *ipath_get_unit_name(int unit) 51{ 52 static char iname[16]; 53 snprintf(iname, sizeof iname, "infinipath%u", unit); 54 return iname; 55} 56 57#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: " 58#define PFX IPATH_DRV_NAME ": " 59 60/* 61 * The size has to be longer than this string, so we can append 62 * board/chip information to it in the init code. 63 */ 64const char ib_ipath_version[] = IPATH_IDSTR "\n"; 65 66static struct idr unit_table; 67DEFINE_SPINLOCK(ipath_devs_lock); 68LIST_HEAD(ipath_dev_list); 69 70wait_queue_head_t ipath_state_wait; 71 72unsigned ipath_debug = __IPATH_INFO; 73 74module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO); 75MODULE_PARM_DESC(debug, "mask for debug prints"); 76EXPORT_SYMBOL_GPL(ipath_debug); 77 78unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */ 79module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO); 80MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported"); 81 82static unsigned ipath_hol_timeout_ms = 13000; 83module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO); 84MODULE_PARM_DESC(hol_timeout_ms, 85 "duration of user app suspension after link failure"); 86 87unsigned ipath_linkrecovery = 1; 88module_param_named(linkrecovery, ipath_linkrecovery, uint, S_IWUSR | S_IRUGO); 89MODULE_PARM_DESC(linkrecovery, "enable workaround for link recovery issue"); 90 91MODULE_LICENSE("GPL"); 92MODULE_AUTHOR("QLogic <support@qlogic.com>"); 93MODULE_DESCRIPTION("QLogic InfiniPath driver"); 94 95/* 96 * Table to translate the LINKTRAININGSTATE portion of 97 * IBCStatus to a human-readable form. 98 */ 99const char *ipath_ibcstatus_str[] = { 100 "Disabled", 101 "LinkUp", 102 "PollActive", 103 "PollQuiet", 104 "SleepDelay", 105 "SleepQuiet", 106 "LState6", /* unused */ 107 "LState7", /* unused */ 108 "CfgDebounce", 109 "CfgRcvfCfg", 110 "CfgWaitRmt", 111 "CfgIdle", 112 "RecovRetrain", 113 "CfgTxRevLane", /* unused before IBA7220 */ 114 "RecovWaitRmt", 115 "RecovIdle", 116 /* below were added for IBA7220 */ 117 "CfgEnhanced", 118 "CfgTest", 119 "CfgWaitRmtTest", 120 "CfgWaitCfgEnhanced", 121 "SendTS_T", 122 "SendTstIdles", 123 "RcvTS_T", 124 "SendTst_TS1s", 125 "LTState18", "LTState19", "LTState1A", "LTState1B", 126 "LTState1C", "LTState1D", "LTState1E", "LTState1F" 127}; 128 129static void __devexit ipath_remove_one(struct pci_dev *); 130static int __devinit ipath_init_one(struct pci_dev *, 131 const struct pci_device_id *); 132 133/* Only needed for registration, nothing else needs this info */ 134#define PCI_VENDOR_ID_PATHSCALE 0x1fc1 135#define PCI_DEVICE_ID_INFINIPATH_HT 0xd 136 137/* Number of seconds before our card status check... */ 138#define STATUS_TIMEOUT 60 139 140static const struct pci_device_id ipath_pci_tbl[] = { 141 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) }, 142 { 0, } 143}; 144 145MODULE_DEVICE_TABLE(pci, ipath_pci_tbl); 146 147static struct pci_driver ipath_driver = { 148 .name = IPATH_DRV_NAME, 149 .probe = ipath_init_one, 150 .remove = __devexit_p(ipath_remove_one), 151 .id_table = ipath_pci_tbl, 152 .driver = { 153 .groups = ipath_driver_attr_groups, 154 }, 155}; 156 157static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, 158 u32 *bar0, u32 *bar1) 159{ 160 int ret; 161 162 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0); 163 if (ret) 164 ipath_dev_err(dd, "failed to read bar0 before enable: " 165 "error %d\n", -ret); 166 167 ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1); 168 if (ret) 169 ipath_dev_err(dd, "failed to read bar1 before enable: " 170 "error %d\n", -ret); 171 172 ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1); 173} 174 175static void ipath_free_devdata(struct pci_dev *pdev, 176 struct ipath_devdata *dd) 177{ 178 unsigned long flags; 179 180 pci_set_drvdata(pdev, NULL); 181 182 if (dd->ipath_unit != -1) { 183 spin_lock_irqsave(&ipath_devs_lock, flags); 184 idr_remove(&unit_table, dd->ipath_unit); 185 list_del(&dd->ipath_list); 186 spin_unlock_irqrestore(&ipath_devs_lock, flags); 187 } 188 vfree(dd); 189} 190 191static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev) 192{ 193 unsigned long flags; 194 struct ipath_devdata *dd; 195 int ret; 196 197 if (!idr_pre_get(&unit_table, GFP_KERNEL)) { 198 dd = ERR_PTR(-ENOMEM); 199 goto bail; 200 } 201 202 dd = vmalloc(sizeof(*dd)); 203 if (!dd) { 204 dd = ERR_PTR(-ENOMEM); 205 goto bail; 206 } 207 memset(dd, 0, sizeof(*dd)); 208 dd->ipath_unit = -1; 209 210 spin_lock_irqsave(&ipath_devs_lock, flags); 211 212 ret = idr_get_new(&unit_table, dd, &dd->ipath_unit); 213 if (ret < 0) { 214 printk(KERN_ERR IPATH_DRV_NAME 215 ": Could not allocate unit ID: error %d\n", -ret); 216 ipath_free_devdata(pdev, dd); 217 dd = ERR_PTR(ret); 218 goto bail_unlock; 219 } 220 221 dd->pcidev = pdev; 222 pci_set_drvdata(pdev, dd); 223 224 list_add(&dd->ipath_list, &ipath_dev_list); 225 226bail_unlock: 227 spin_unlock_irqrestore(&ipath_devs_lock, flags); 228 229bail: 230 return dd; 231} 232 233static inline struct ipath_devdata *__ipath_lookup(int unit) 234{ 235 return idr_find(&unit_table, unit); 236} 237 238struct ipath_devdata *ipath_lookup(int unit) 239{ 240 struct ipath_devdata *dd; 241 unsigned long flags; 242 243 spin_lock_irqsave(&ipath_devs_lock, flags); 244 dd = __ipath_lookup(unit); 245 spin_unlock_irqrestore(&ipath_devs_lock, flags); 246 247 return dd; 248} 249 250int ipath_count_units(int *npresentp, int *nupp, int *maxportsp) 251{ 252 int nunits, npresent, nup; 253 struct ipath_devdata *dd; 254 unsigned long flags; 255 int maxports; 256 257 nunits = npresent = nup = maxports = 0; 258 259 spin_lock_irqsave(&ipath_devs_lock, flags); 260 261 list_for_each_entry(dd, &ipath_dev_list, ipath_list) { 262 nunits++; 263 if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase) 264 npresent++; 265 if (dd->ipath_lid && 266 !(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN 267 | IPATH_LINKUNK))) 268 nup++; 269 if (dd->ipath_cfgports > maxports) 270 maxports = dd->ipath_cfgports; 271 } 272 273 spin_unlock_irqrestore(&ipath_devs_lock, flags); 274 275 if (npresentp) 276 *npresentp = npresent; 277 if (nupp) 278 *nupp = nup; 279 if (maxportsp) 280 *maxportsp = maxports; 281 282 return nunits; 283} 284 285/* 286 * These next two routines are placeholders in case we don't have per-arch 287 * code for controlling write combining. If explicit control of write 288 * combining is not available, performance will probably be awful. 289 */ 290 291int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd) 292{ 293 return -EOPNOTSUPP; 294} 295 296void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd) 297{ 298} 299 300/* 301 * Perform a PIO buffer bandwidth write test, to verify proper system 302 * configuration. Even when all the setup calls work, occasionally 303 * BIOS or other issues can prevent write combining from working, or 304 * can cause other bandwidth problems to the chip. 305 * 306 * This test simply writes the same buffer over and over again, and 307 * measures close to the peak bandwidth to the chip (not testing 308 * data bandwidth to the wire). On chips that use an address-based 309 * trigger to send packets to the wire, this is easy. On chips that 310 * use a count to trigger, we want to make sure that the packet doesn't 311 * go out on the wire, or trigger flow control checks. 312 */ 313static void ipath_verify_pioperf(struct ipath_devdata *dd) 314{ 315 u32 pbnum, cnt, lcnt; 316 u32 __iomem *piobuf; 317 u32 *addr; 318 u64 msecs, emsecs; 319 320 piobuf = ipath_getpiobuf(dd, 0, &pbnum); 321 if (!piobuf) { 322 dev_info(&dd->pcidev->dev, 323 "No PIObufs for checking perf, skipping\n"); 324 return; 325 } 326 327 /* 328 * Enough to give us a reasonable test, less than piobuf size, and 329 * likely multiple of store buffer length. 330 */ 331 cnt = 1024; 332 333 addr = vmalloc(cnt); 334 if (!addr) { 335 dev_info(&dd->pcidev->dev, 336 "Couldn't get memory for checking PIO perf," 337 " skipping\n"); 338 goto done; 339 } 340 341 preempt_disable(); /* we want reasonably accurate elapsed time */ 342 msecs = 1 + jiffies_to_msecs(jiffies); 343 for (lcnt = 0; lcnt < 10000U; lcnt++) { 344 /* wait until we cross msec boundary */ 345 if (jiffies_to_msecs(jiffies) >= msecs) 346 break; 347 udelay(1); 348 } 349 350 ipath_disable_armlaunch(dd); 351 352 /* 353 * length 0, no dwords actually sent, and mark as VL15 354 * on chips where that may matter (due to IB flowcontrol) 355 */ 356 if ((dd->ipath_flags & IPATH_HAS_PBC_CNT)) 357 writeq(1UL << 63, piobuf); 358 else 359 writeq(0, piobuf); 360 ipath_flush_wc(); 361 362 /* 363 * this is only roughly accurate, since even with preempt we 364 * still take interrupts that could take a while. Running for 365 * >= 5 msec seems to get us "close enough" to accurate values 366 */ 367 msecs = jiffies_to_msecs(jiffies); 368 for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) { 369 __iowrite32_copy(piobuf + 64, addr, cnt >> 2); 370 emsecs = jiffies_to_msecs(jiffies) - msecs; 371 } 372 373 /* 1 GiB/sec, slightly over IB SDR line rate */ 374 if (lcnt < (emsecs * 1024U)) 375 ipath_dev_err(dd, 376 "Performance problem: bandwidth to PIO buffers is " 377 "only %u MiB/sec\n", 378 lcnt / (u32) emsecs); 379 else 380 ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n", 381 lcnt / (u32) emsecs); 382 383 preempt_enable(); 384 385 vfree(addr); 386 387done: 388 /* disarm piobuf, so it's available again */ 389 ipath_disarm_piobufs(dd, pbnum, 1); 390 ipath_enable_armlaunch(dd); 391} 392 393static void cleanup_device(struct ipath_devdata *dd); 394 395static int __devinit ipath_init_one(struct pci_dev *pdev, 396 const struct pci_device_id *ent) 397{ 398 int ret, len, j; 399 struct ipath_devdata *dd; 400 unsigned long long addr; 401 u32 bar0 = 0, bar1 = 0; 402 u8 rev; 403 404 dd = ipath_alloc_devdata(pdev); 405 if (IS_ERR(dd)) { 406 ret = PTR_ERR(dd); 407 printk(KERN_ERR IPATH_DRV_NAME 408 ": Could not allocate devdata: error %d\n", -ret); 409 goto bail; 410 } 411 412 ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit); 413 414 ret = pci_enable_device(pdev); 415 if (ret) { 416 /* This can happen iff: 417 * 418 * We did a chip reset, and then failed to reprogram the 419 * BAR, or the chip reset due to an internal error. We then 420 * unloaded the driver and reloaded it. 421 * 422 * Both reset cases set the BAR back to initial state. For 423 * the latter case, the AER sticky error bit at offset 0x718 424 * should be set, but the Linux kernel doesn't yet know 425 * about that, it appears. If the original BAR was retained 426 * in the kernel data structures, this may be OK. 427 */ 428 ipath_dev_err(dd, "enable unit %d failed: error %d\n", 429 dd->ipath_unit, -ret); 430 goto bail_devdata; 431 } 432 addr = pci_resource_start(pdev, 0); 433 len = pci_resource_len(pdev, 0); 434 ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %d, vend %x/%x " 435 "driver_data %lx\n", addr, len, pdev->irq, ent->vendor, 436 ent->device, ent->driver_data); 437 438 read_bars(dd, pdev, &bar0, &bar1); 439 440 if (!bar1 && !(bar0 & ~0xf)) { 441 if (addr) { 442 dev_info(&pdev->dev, "BAR is 0 (probable RESET), " 443 "rewriting as %llx\n", addr); 444 ret = pci_write_config_dword( 445 pdev, PCI_BASE_ADDRESS_0, addr); 446 if (ret) { 447 ipath_dev_err(dd, "rewrite of BAR0 " 448 "failed: err %d\n", -ret); 449 goto bail_disable; 450 } 451 ret = pci_write_config_dword( 452 pdev, PCI_BASE_ADDRESS_1, addr >> 32); 453 if (ret) { 454 ipath_dev_err(dd, "rewrite of BAR1 " 455 "failed: err %d\n", -ret); 456 goto bail_disable; 457 } 458 } else { 459 ipath_dev_err(dd, "BAR is 0 (probable RESET), " 460 "not usable until reboot\n"); 461 ret = -ENODEV; 462 goto bail_disable; 463 } 464 } 465 466 ret = pci_request_regions(pdev, IPATH_DRV_NAME); 467 if (ret) { 468 dev_info(&pdev->dev, "pci_request_regions unit %u fails: " 469 "err %d\n", dd->ipath_unit, -ret); 470 goto bail_disable; 471 } 472 473 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); 474 if (ret) { 475 /* 476 * if the 64 bit setup fails, try 32 bit. Some systems 477 * do not setup 64 bit maps on systems with 2GB or less 478 * memory installed. 479 */ 480 ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); 481 if (ret) { 482 dev_info(&pdev->dev, 483 "Unable to set DMA mask for unit %u: %d\n", 484 dd->ipath_unit, ret); 485 goto bail_regions; 486 } 487 else { 488 ipath_dbg("No 64bit DMA mask, used 32 bit mask\n"); 489 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); 490 if (ret) 491 dev_info(&pdev->dev, 492 "Unable to set DMA consistent mask " 493 "for unit %u: %d\n", 494 dd->ipath_unit, ret); 495 496 } 497 } 498 else { 499 ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); 500 if (ret) 501 dev_info(&pdev->dev, 502 "Unable to set DMA consistent mask " 503 "for unit %u: %d\n", 504 dd->ipath_unit, ret); 505 } 506 507 pci_set_master(pdev); 508 509 /* 510 * Save BARs to rewrite after device reset. Save all 64 bits of 511 * BAR, just in case. 512 */ 513 dd->ipath_pcibar0 = addr; 514 dd->ipath_pcibar1 = addr >> 32; 515 dd->ipath_deviceid = ent->device; /* save for later use */ 516 dd->ipath_vendorid = ent->vendor; 517 518 /* setup the chip-specific functions, as early as possible. */ 519 switch (ent->device) { 520 case PCI_DEVICE_ID_INFINIPATH_HT: 521 ipath_init_iba6110_funcs(dd); 522 break; 523 524 default: 525 ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, " 526 "failing\n", ent->device); 527 return -ENODEV; 528 } 529 530 for (j = 0; j < 6; j++) { 531 if (!pdev->resource[j].start) 532 continue; 533 ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n", 534 j, (unsigned long long)pdev->resource[j].start, 535 (unsigned long long)pdev->resource[j].end, 536 (unsigned long long)pci_resource_len(pdev, j)); 537 } 538 539 if (!addr) { 540 ipath_dev_err(dd, "No valid address in BAR 0!\n"); 541 ret = -ENODEV; 542 goto bail_regions; 543 } 544 545 ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev); 546 if (ret) { 547 ipath_dev_err(dd, "Failed to read PCI revision ID unit " 548 "%u: err %d\n", dd->ipath_unit, -ret); 549 goto bail_regions; /* shouldn't ever happen */ 550 } 551 dd->ipath_pcirev = rev; 552 553#if defined(__powerpc__) 554 /* There isn't a generic way to specify writethrough mappings */ 555 dd->ipath_kregbase = __ioremap(addr, len, 556 (_PAGE_NO_CACHE|_PAGE_WRITETHRU)); 557#else 558 dd->ipath_kregbase = ioremap_nocache(addr, len); 559#endif 560 561 if (!dd->ipath_kregbase) { 562 ipath_dbg("Unable to map io addr %llx to kvirt, failing\n", 563 addr); 564 ret = -ENOMEM; 565 goto bail_iounmap; 566 } 567 dd->ipath_kregend = (u64 __iomem *) 568 ((void __iomem *)dd->ipath_kregbase + len); 569 dd->ipath_physaddr = addr; /* used for io_remap, etc. */ 570 /* for user mmap */ 571 ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n", 572 addr, dd->ipath_kregbase); 573 574 if (dd->ipath_f_bus(dd, pdev)) 575 ipath_dev_err(dd, "Failed to setup config space; " 576 "continuing anyway\n"); 577 578 /* 579 * set up our interrupt handler; IRQF_SHARED probably not needed, 580 * since MSI interrupts shouldn't be shared but won't hurt for now. 581 * check 0 irq after we return from chip-specific bus setup, since 582 * that can affect this due to setup 583 */ 584 if (!dd->ipath_irq) 585 ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't " 586 "work\n"); 587 else { 588 ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED, 589 IPATH_DRV_NAME, dd); 590 if (ret) { 591 ipath_dev_err(dd, "Couldn't setup irq handler, " 592 "irq=%d: %d\n", dd->ipath_irq, ret); 593 goto bail_iounmap; 594 } 595 } 596 597 ret = ipath_init_chip(dd, 0); /* do the chip-specific init */ 598 if (ret) 599 goto bail_irqsetup; 600 601 ret = ipath_enable_wc(dd); 602 603 if (ret) { 604 ipath_dev_err(dd, "Write combining not enabled " 605 "(err %d): performance may be poor\n", 606 -ret); 607 ret = 0; 608 } 609 610 ipath_verify_pioperf(dd); 611 612 ipath_device_create_group(&pdev->dev, dd); 613 ipathfs_add_device(dd); 614 ipath_user_add(dd); 615 ipath_diag_add(dd); 616 ipath_register_ib_device(dd); 617 618 goto bail; 619 620bail_irqsetup: 621 cleanup_device(dd); 622 623 if (dd->ipath_irq) 624 dd->ipath_f_free_irq(dd); 625 626 if (dd->ipath_f_cleanup) 627 dd->ipath_f_cleanup(dd); 628 629bail_iounmap: 630 iounmap((volatile void __iomem *) dd->ipath_kregbase); 631 632bail_regions: 633 pci_release_regions(pdev); 634 635bail_disable: 636 pci_disable_device(pdev); 637 638bail_devdata: 639 ipath_free_devdata(pdev, dd); 640 641bail: 642 return ret; 643} 644 645static void cleanup_device(struct ipath_devdata *dd) 646{ 647 int port; 648 struct ipath_portdata **tmp; 649 unsigned long flags; 650 651 if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) { 652 /* can't do anything more with chip; needs re-init */ 653 *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT; 654 if (dd->ipath_kregbase) { 655 /* 656 * if we haven't already cleaned up before these are 657 * to ensure any register reads/writes "fail" until 658 * re-init 659 */ 660 dd->ipath_kregbase = NULL; 661 dd->ipath_uregbase = 0; 662 dd->ipath_sregbase = 0; 663 dd->ipath_cregbase = 0; 664 dd->ipath_kregsize = 0; 665 } 666 ipath_disable_wc(dd); 667 } 668 669 if (dd->ipath_spectriggerhit) 670 dev_info(&dd->pcidev->dev, "%lu special trigger hits\n", 671 dd->ipath_spectriggerhit); 672 673 if (dd->ipath_pioavailregs_dma) { 674 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 675 (void *) dd->ipath_pioavailregs_dma, 676 dd->ipath_pioavailregs_phys); 677 dd->ipath_pioavailregs_dma = NULL; 678 } 679 if (dd->ipath_dummy_hdrq) { 680 dma_free_coherent(&dd->pcidev->dev, 681 dd->ipath_pd[0]->port_rcvhdrq_size, 682 dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys); 683 dd->ipath_dummy_hdrq = NULL; 684 } 685 686 if (dd->ipath_pageshadow) { 687 struct page **tmpp = dd->ipath_pageshadow; 688 dma_addr_t *tmpd = dd->ipath_physshadow; 689 int i, cnt = 0; 690 691 ipath_cdbg(VERBOSE, "Unlocking any expTID pages still " 692 "locked\n"); 693 for (port = 0; port < dd->ipath_cfgports; port++) { 694 int port_tidbase = port * dd->ipath_rcvtidcnt; 695 int maxtid = port_tidbase + dd->ipath_rcvtidcnt; 696 for (i = port_tidbase; i < maxtid; i++) { 697 if (!tmpp[i]) 698 continue; 699 pci_unmap_page(dd->pcidev, tmpd[i], 700 PAGE_SIZE, PCI_DMA_FROMDEVICE); 701 ipath_release_user_pages(&tmpp[i], 1); 702 tmpp[i] = NULL; 703 cnt++; 704 } 705 } 706 if (cnt) { 707 ipath_stats.sps_pageunlocks += cnt; 708 ipath_cdbg(VERBOSE, "There were still %u expTID " 709 "entries locked\n", cnt); 710 } 711 if (ipath_stats.sps_pagelocks || 712 ipath_stats.sps_pageunlocks) 713 ipath_cdbg(VERBOSE, "%llu pages locked, %llu " 714 "unlocked via ipath_m{un}lock\n", 715 (unsigned long long) 716 ipath_stats.sps_pagelocks, 717 (unsigned long long) 718 ipath_stats.sps_pageunlocks); 719 720 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", 721 dd->ipath_pageshadow); 722 tmpp = dd->ipath_pageshadow; 723 dd->ipath_pageshadow = NULL; 724 vfree(tmpp); 725 726 dd->ipath_egrtidbase = NULL; 727 } 728 729 /* 730 * free any resources still in use (usually just kernel ports) 731 * at unload; we do for portcnt, because that's what we allocate. 732 * We acquire lock to be really paranoid that ipath_pd isn't being 733 * accessed from some interrupt-related code (that should not happen, 734 * but best to be sure). 735 */ 736 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 737 tmp = dd->ipath_pd; 738 dd->ipath_pd = NULL; 739 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 740 for (port = 0; port < dd->ipath_portcnt; port++) { 741 struct ipath_portdata *pd = tmp[port]; 742 tmp[port] = NULL; /* debugging paranoia */ 743 ipath_free_pddata(dd, pd); 744 } 745 kfree(tmp); 746} 747 748static void __devexit ipath_remove_one(struct pci_dev *pdev) 749{ 750 struct ipath_devdata *dd = pci_get_drvdata(pdev); 751 752 ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd); 753 754 /* 755 * disable the IB link early, to be sure no new packets arrive, which 756 * complicates the shutdown process 757 */ 758 ipath_shutdown_device(dd); 759 760 flush_scheduled_work(); 761 762 if (dd->verbs_dev) 763 ipath_unregister_ib_device(dd->verbs_dev); 764 765 ipath_diag_remove(dd); 766 ipath_user_remove(dd); 767 ipathfs_remove_device(dd); 768 ipath_device_remove_group(&pdev->dev, dd); 769 770 ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, " 771 "unit %u\n", dd, (u32) dd->ipath_unit); 772 773 cleanup_device(dd); 774 775 /* 776 * turn off rcv, send, and interrupts for all ports, all drivers 777 * should also hard reset the chip here? 778 * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs 779 * for all versions of the driver, if they were allocated 780 */ 781 if (dd->ipath_irq) { 782 ipath_cdbg(VERBOSE, "unit %u free irq %d\n", 783 dd->ipath_unit, dd->ipath_irq); 784 dd->ipath_f_free_irq(dd); 785 } else 786 ipath_dbg("irq is 0, not doing free_irq " 787 "for unit %u\n", dd->ipath_unit); 788 /* 789 * we check for NULL here, because it's outside 790 * the kregbase check, and we need to call it 791 * after the free_irq. Thus it's possible that 792 * the function pointers were never initialized. 793 */ 794 if (dd->ipath_f_cleanup) 795 /* clean up chip-specific stuff */ 796 dd->ipath_f_cleanup(dd); 797 798 ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase); 799 iounmap((volatile void __iomem *) dd->ipath_kregbase); 800 pci_release_regions(pdev); 801 ipath_cdbg(VERBOSE, "calling pci_disable_device\n"); 802 pci_disable_device(pdev); 803 804 ipath_free_devdata(pdev, dd); 805} 806 807/* general driver use */ 808DEFINE_MUTEX(ipath_mutex); 809 810static DEFINE_SPINLOCK(ipath_pioavail_lock); 811 812/** 813 * ipath_disarm_piobufs - cancel a range of PIO buffers 814 * @dd: the infinipath device 815 * @first: the first PIO buffer to cancel 816 * @cnt: the number of PIO buffers to cancel 817 * 818 * cancel a range of PIO buffers, used when they might be armed, but 819 * not triggered. Used at init to ensure buffer state, and also user 820 * process close, in case it died while writing to a PIO buffer 821 * Also after errors. 822 */ 823void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first, 824 unsigned cnt) 825{ 826 unsigned i, last = first + cnt; 827 unsigned long flags; 828 829 ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first); 830 for (i = first; i < last; i++) { 831 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 832 /* 833 * The disarm-related bits are write-only, so it 834 * is ok to OR them in with our copy of sendctrl 835 * while we hold the lock. 836 */ 837 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 838 dd->ipath_sendctrl | INFINIPATH_S_DISARM | 839 (i << INFINIPATH_S_DISARMPIOBUF_SHIFT)); 840 /* can't disarm bufs back-to-back per iba7220 spec */ 841 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 842 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 843 } 844 /* on some older chips, update may not happen after cancel */ 845 ipath_force_pio_avail_update(dd); 846} 847 848/** 849 * ipath_wait_linkstate - wait for an IB link state change to occur 850 * @dd: the infinipath device 851 * @state: the state to wait for 852 * @msecs: the number of milliseconds to wait 853 * 854 * wait up to msecs milliseconds for IB link state change to occur for 855 * now, take the easy polling route. Currently used only by 856 * ipath_set_linkstate. Returns 0 if state reached, otherwise 857 * -ETIMEDOUT state can have multiple states set, for any of several 858 * transitions. 859 */ 860int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs) 861{ 862 dd->ipath_state_wanted = state; 863 wait_event_interruptible_timeout(ipath_state_wait, 864 (dd->ipath_flags & state), 865 msecs_to_jiffies(msecs)); 866 dd->ipath_state_wanted = 0; 867 868 if (!(dd->ipath_flags & state)) { 869 u64 val; 870 ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u" 871 " ms\n", 872 /* test INIT ahead of DOWN, both can be set */ 873 (state & IPATH_LINKINIT) ? "INIT" : 874 ((state & IPATH_LINKDOWN) ? "DOWN" : 875 ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")), 876 msecs); 877 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 878 ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n", 879 (unsigned long long) ipath_read_kreg64( 880 dd, dd->ipath_kregs->kr_ibcctrl), 881 (unsigned long long) val, 882 ipath_ibcstatus_str[val & dd->ibcs_lts_mask]); 883 } 884 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; 885} 886 887static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err, 888 char *buf, size_t blen) 889{ 890 static const struct { 891 ipath_err_t err; 892 const char *msg; 893 } errs[] = { 894 { INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" }, 895 { INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" }, 896 { INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" }, 897 { INFINIPATH_E_SDMABASE, "SDmaBase" }, 898 { INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" }, 899 { INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" }, 900 { INFINIPATH_E_SDMADWEN, "SDmaDwEn" }, 901 { INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" }, 902 { INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" }, 903 { INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" }, 904 { INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" }, 905 { INFINIPATH_E_SDMADISABLED, "SDmaDisabled" }, 906 }; 907 int i; 908 int expected; 909 size_t bidx = 0; 910 911 for (i = 0; i < ARRAY_SIZE(errs); i++) { 912 expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 : 913 test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status); 914 if ((err & errs[i].err) && !expected) 915 bidx += snprintf(buf + bidx, blen - bidx, 916 "%s ", errs[i].msg); 917 } 918} 919 920/* 921 * Decode the error status into strings, deciding whether to always 922 * print * it or not depending on "normal packet errors" vs everything 923 * else. Return 1 if "real" errors, otherwise 0 if only packet 924 * errors, so caller can decide what to print with the string. 925 */ 926int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen, 927 ipath_err_t err) 928{ 929 int iserr = 1; 930 *buf = '\0'; 931 if (err & INFINIPATH_E_PKTERRS) { 932 if (!(err & ~INFINIPATH_E_PKTERRS)) 933 iserr = 0; // if only packet errors. 934 if (ipath_debug & __IPATH_ERRPKTDBG) { 935 if (err & INFINIPATH_E_REBP) 936 strlcat(buf, "EBP ", blen); 937 if (err & INFINIPATH_E_RVCRC) 938 strlcat(buf, "VCRC ", blen); 939 if (err & INFINIPATH_E_RICRC) { 940 strlcat(buf, "CRC ", blen); 941 // clear for check below, so only once 942 err &= INFINIPATH_E_RICRC; 943 } 944 if (err & INFINIPATH_E_RSHORTPKTLEN) 945 strlcat(buf, "rshortpktlen ", blen); 946 if (err & INFINIPATH_E_SDROPPEDDATAPKT) 947 strlcat(buf, "sdroppeddatapkt ", blen); 948 if (err & INFINIPATH_E_SPKTLEN) 949 strlcat(buf, "spktlen ", blen); 950 } 951 if ((err & INFINIPATH_E_RICRC) && 952 !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP))) 953 strlcat(buf, "CRC ", blen); 954 if (!iserr) 955 goto done; 956 } 957 if (err & INFINIPATH_E_RHDRLEN) 958 strlcat(buf, "rhdrlen ", blen); 959 if (err & INFINIPATH_E_RBADTID) 960 strlcat(buf, "rbadtid ", blen); 961 if (err & INFINIPATH_E_RBADVERSION) 962 strlcat(buf, "rbadversion ", blen); 963 if (err & INFINIPATH_E_RHDR) 964 strlcat(buf, "rhdr ", blen); 965 if (err & INFINIPATH_E_SENDSPECIALTRIGGER) 966 strlcat(buf, "sendspecialtrigger ", blen); 967 if (err & INFINIPATH_E_RLONGPKTLEN) 968 strlcat(buf, "rlongpktlen ", blen); 969 if (err & INFINIPATH_E_RMAXPKTLEN) 970 strlcat(buf, "rmaxpktlen ", blen); 971 if (err & INFINIPATH_E_RMINPKTLEN) 972 strlcat(buf, "rminpktlen ", blen); 973 if (err & INFINIPATH_E_SMINPKTLEN) 974 strlcat(buf, "sminpktlen ", blen); 975 if (err & INFINIPATH_E_RFORMATERR) 976 strlcat(buf, "rformaterr ", blen); 977 if (err & INFINIPATH_E_RUNSUPVL) 978 strlcat(buf, "runsupvl ", blen); 979 if (err & INFINIPATH_E_RUNEXPCHAR) 980 strlcat(buf, "runexpchar ", blen); 981 if (err & INFINIPATH_E_RIBFLOW) 982 strlcat(buf, "ribflow ", blen); 983 if (err & INFINIPATH_E_SUNDERRUN) 984 strlcat(buf, "sunderrun ", blen); 985 if (err & INFINIPATH_E_SPIOARMLAUNCH) 986 strlcat(buf, "spioarmlaunch ", blen); 987 if (err & INFINIPATH_E_SUNEXPERRPKTNUM) 988 strlcat(buf, "sunexperrpktnum ", blen); 989 if (err & INFINIPATH_E_SDROPPEDSMPPKT) 990 strlcat(buf, "sdroppedsmppkt ", blen); 991 if (err & INFINIPATH_E_SMAXPKTLEN) 992 strlcat(buf, "smaxpktlen ", blen); 993 if (err & INFINIPATH_E_SUNSUPVL) 994 strlcat(buf, "sunsupVL ", blen); 995 if (err & INFINIPATH_E_INVALIDADDR) 996 strlcat(buf, "invalidaddr ", blen); 997 if (err & INFINIPATH_E_RRCVEGRFULL) 998 strlcat(buf, "rcvegrfull ", blen); 999 if (err & INFINIPATH_E_RRCVHDRFULL) 1000 strlcat(buf, "rcvhdrfull ", blen); 1001 if (err & INFINIPATH_E_IBSTATUSCHANGED) 1002 strlcat(buf, "ibcstatuschg ", blen); 1003 if (err & INFINIPATH_E_RIBLOSTLINK) 1004 strlcat(buf, "riblostlink ", blen); 1005 if (err & INFINIPATH_E_HARDWARE) 1006 strlcat(buf, "hardware ", blen); 1007 if (err & INFINIPATH_E_RESET) 1008 strlcat(buf, "reset ", blen); 1009 if (err & INFINIPATH_E_SDMAERRS) 1010 decode_sdma_errs(dd, err, buf, blen); 1011 if (err & INFINIPATH_E_INVALIDEEPCMD) 1012 strlcat(buf, "invalideepromcmd ", blen); 1013done: 1014 return iserr; 1015} 1016 1017/** 1018 * get_rhf_errstring - decode RHF errors 1019 * @err: the err number 1020 * @msg: the output buffer 1021 * @len: the length of the output buffer 1022 * 1023 * only used one place now, may want more later 1024 */ 1025static void get_rhf_errstring(u32 err, char *msg, size_t len) 1026{ 1027 /* if no errors, and so don't need to check what's first */ 1028 *msg = '\0'; 1029 1030 if (err & INFINIPATH_RHF_H_ICRCERR) 1031 strlcat(msg, "icrcerr ", len); 1032 if (err & INFINIPATH_RHF_H_VCRCERR) 1033 strlcat(msg, "vcrcerr ", len); 1034 if (err & INFINIPATH_RHF_H_PARITYERR) 1035 strlcat(msg, "parityerr ", len); 1036 if (err & INFINIPATH_RHF_H_LENERR) 1037 strlcat(msg, "lenerr ", len); 1038 if (err & INFINIPATH_RHF_H_MTUERR) 1039 strlcat(msg, "mtuerr ", len); 1040 if (err & INFINIPATH_RHF_H_IHDRERR) 1041 /* infinipath hdr checksum error */ 1042 strlcat(msg, "ipathhdrerr ", len); 1043 if (err & INFINIPATH_RHF_H_TIDERR) 1044 strlcat(msg, "tiderr ", len); 1045 if (err & INFINIPATH_RHF_H_MKERR) 1046 /* bad port, offset, etc. */ 1047 strlcat(msg, "invalid ipathhdr ", len); 1048 if (err & INFINIPATH_RHF_H_IBERR) 1049 strlcat(msg, "iberr ", len); 1050 if (err & INFINIPATH_RHF_L_SWA) 1051 strlcat(msg, "swA ", len); 1052 if (err & INFINIPATH_RHF_L_SWB) 1053 strlcat(msg, "swB ", len); 1054} 1055 1056/** 1057 * ipath_get_egrbuf - get an eager buffer 1058 * @dd: the infinipath device 1059 * @bufnum: the eager buffer to get 1060 * 1061 * must only be called if ipath_pd[port] is known to be allocated 1062 */ 1063static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum) 1064{ 1065 return dd->ipath_port0_skbinfo ? 1066 (void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL; 1067} 1068 1069/** 1070 * ipath_alloc_skb - allocate an skb and buffer with possible constraints 1071 * @dd: the infinipath device 1072 * @gfp_mask: the sk_buff SFP mask 1073 */ 1074struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, 1075 gfp_t gfp_mask) 1076{ 1077 struct sk_buff *skb; 1078 u32 len; 1079 1080 /* 1081 * Only fully supported way to handle this is to allocate lots 1082 * extra, align as needed, and then do skb_reserve(). That wastes 1083 * a lot of memory... I'll have to hack this into infinipath_copy 1084 * also. 1085 */ 1086 1087 /* 1088 * We need 2 extra bytes for ipath_ether data sent in the 1089 * key header. In order to keep everything dword aligned, 1090 * we'll reserve 4 bytes. 1091 */ 1092 len = dd->ipath_ibmaxlen + 4; 1093 1094 if (dd->ipath_flags & IPATH_4BYTE_TID) { 1095 /* We need a 2KB multiple alignment, and there is no way 1096 * to do it except to allocate extra and then skb_reserve 1097 * enough to bring it up to the right alignment. 1098 */ 1099 len += 2047; 1100 } 1101 1102 skb = __dev_alloc_skb(len, gfp_mask); 1103 if (!skb) { 1104 ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n", 1105 len); 1106 goto bail; 1107 } 1108 1109 skb_reserve(skb, 4); 1110 1111 if (dd->ipath_flags & IPATH_4BYTE_TID) { 1112 u32 una = (unsigned long)skb->data & 2047; 1113 if (una) 1114 skb_reserve(skb, 2048 - una); 1115 } 1116 1117bail: 1118 return skb; 1119} 1120 1121static void ipath_rcv_hdrerr(struct ipath_devdata *dd, 1122 u32 eflags, 1123 u32 l, 1124 u32 etail, 1125 __le32 *rhf_addr, 1126 struct ipath_message_header *hdr) 1127{ 1128 char emsg[128]; 1129 1130 get_rhf_errstring(eflags, emsg, sizeof emsg); 1131 ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u " 1132 "tlen=%x opcode=%x egridx=%x: %s\n", 1133 eflags, l, 1134 ipath_hdrget_rcv_type(rhf_addr), 1135 ipath_hdrget_length_in_bytes(rhf_addr), 1136 be32_to_cpu(hdr->bth[0]) >> 24, 1137 etail, emsg); 1138 1139 /* Count local link integrity errors. */ 1140 if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) { 1141 u8 n = (dd->ipath_ibcctrl >> 1142 INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) & 1143 INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK; 1144 1145 if (++dd->ipath_lli_counter > n) { 1146 dd->ipath_lli_counter = 0; 1147 dd->ipath_lli_errors++; 1148 } 1149 } 1150} 1151 1152/* 1153 * ipath_kreceive - receive a packet 1154 * @pd: the infinipath port 1155 * 1156 * called from interrupt handler for errors or receive interrupt 1157 */ 1158void ipath_kreceive(struct ipath_portdata *pd) 1159{ 1160 struct ipath_devdata *dd = pd->port_dd; 1161 __le32 *rhf_addr; 1162 void *ebuf; 1163 const u32 rsize = dd->ipath_rcvhdrentsize; /* words */ 1164 const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */ 1165 u32 etail = -1, l, hdrqtail; 1166 struct ipath_message_header *hdr; 1167 u32 eflags, i, etype, tlen, pkttot = 0, updegr = 0, reloop = 0; 1168 static u64 totcalls; /* stats, may eventually remove */ 1169 int last; 1170 1171 l = pd->port_head; 1172 rhf_addr = (__le32 *) pd->port_rcvhdrq + l + dd->ipath_rhf_offset; 1173 if (dd->ipath_flags & IPATH_NODMA_RTAIL) { 1174 u32 seq = ipath_hdrget_seq(rhf_addr); 1175 1176 if (seq != pd->port_seq_cnt) 1177 goto bail; 1178 hdrqtail = 0; 1179 } else { 1180 hdrqtail = ipath_get_rcvhdrtail(pd); 1181 if (l == hdrqtail) 1182 goto bail; 1183 smp_rmb(); 1184 } 1185 1186reloop: 1187 for (last = 0, i = 1; !last; i += !last) { 1188 hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); 1189 eflags = ipath_hdrget_err_flags(rhf_addr); 1190 etype = ipath_hdrget_rcv_type(rhf_addr); 1191 /* total length */ 1192 tlen = ipath_hdrget_length_in_bytes(rhf_addr); 1193 ebuf = NULL; 1194 if ((dd->ipath_flags & IPATH_NODMA_RTAIL) ? 1195 ipath_hdrget_use_egr_buf(rhf_addr) : 1196 (etype != RCVHQ_RCV_TYPE_EXPECTED)) { 1197 /* 1198 * It turns out that the chip uses an eager buffer 1199 * for all non-expected packets, whether it "needs" 1200 * one or not. So always get the index, but don't 1201 * set ebuf (so we try to copy data) unless the 1202 * length requires it. 1203 */ 1204 etail = ipath_hdrget_index(rhf_addr); 1205 updegr = 1; 1206 if (tlen > sizeof(*hdr) || 1207 etype == RCVHQ_RCV_TYPE_NON_KD) 1208 ebuf = ipath_get_egrbuf(dd, etail); 1209 } 1210 1211 /* 1212 * both tiderr and ipathhdrerr are set for all plain IB 1213 * packets; only ipathhdrerr should be set. 1214 */ 1215 1216 if (etype != RCVHQ_RCV_TYPE_NON_KD && 1217 etype != RCVHQ_RCV_TYPE_ERROR && 1218 ipath_hdrget_ipath_ver(hdr->iph.ver_port_tid_offset) != 1219 IPS_PROTO_VERSION) 1220 ipath_cdbg(PKT, "Bad InfiniPath protocol version " 1221 "%x\n", etype); 1222 1223 if (unlikely(eflags)) 1224 ipath_rcv_hdrerr(dd, eflags, l, etail, rhf_addr, hdr); 1225 else if (etype == RCVHQ_RCV_TYPE_NON_KD) { 1226 ipath_ib_rcv(dd->verbs_dev, (u32 *)hdr, ebuf, tlen); 1227 if (dd->ipath_lli_counter) 1228 dd->ipath_lli_counter--; 1229 } else if (etype == RCVHQ_RCV_TYPE_EAGER) { 1230 u8 opcode = be32_to_cpu(hdr->bth[0]) >> 24; 1231 u32 qp = be32_to_cpu(hdr->bth[1]) & 0xffffff; 1232 ipath_cdbg(PKT, "typ %x, opcode %x (eager, " 1233 "qp=%x), len %x; ignored\n", 1234 etype, opcode, qp, tlen); 1235 } 1236 else if (etype == RCVHQ_RCV_TYPE_EXPECTED) 1237 ipath_dbg("Bug: Expected TID, opcode %x; ignored\n", 1238 be32_to_cpu(hdr->bth[0]) >> 24); 1239 else { 1240 /* 1241 * error packet, type of error unknown. 1242 * Probably type 3, but we don't know, so don't 1243 * even try to print the opcode, etc. 1244 * Usually caused by a "bad packet", that has no 1245 * BTH, when the LRH says it should. 1246 */ 1247 ipath_cdbg(ERRPKT, "Error Pkt, but no eflags! egrbuf" 1248 " %x, len %x hdrq+%x rhf: %Lx\n", 1249 etail, tlen, l, (unsigned long long) 1250 le64_to_cpu(*(__le64 *) rhf_addr)); 1251 if (ipath_debug & __IPATH_ERRPKTDBG) { 1252 u32 j, *d, dw = rsize-2; 1253 if (rsize > (tlen>>2)) 1254 dw = tlen>>2; 1255 d = (u32 *)hdr; 1256 printk(KERN_DEBUG "EPkt rcvhdr(%x dw):\n", 1257 dw); 1258 for (j = 0; j < dw; j++) 1259 printk(KERN_DEBUG "%8x%s", d[j], 1260 (j%8) == 7 ? "\n" : " "); 1261 printk(KERN_DEBUG ".\n"); 1262 } 1263 } 1264 l += rsize; 1265 if (l >= maxcnt) 1266 l = 0; 1267 rhf_addr = (__le32 *) pd->port_rcvhdrq + 1268 l + dd->ipath_rhf_offset; 1269 if (dd->ipath_flags & IPATH_NODMA_RTAIL) { 1270 u32 seq = ipath_hdrget_seq(rhf_addr); 1271 1272 if (++pd->port_seq_cnt > 13) 1273 pd->port_seq_cnt = 1; 1274 if (seq != pd->port_seq_cnt) 1275 last = 1; 1276 } else if (l == hdrqtail) 1277 last = 1; 1278 /* 1279 * update head regs on last packet, and every 16 packets. 1280 * Reduce bus traffic, while still trying to prevent 1281 * rcvhdrq overflows, for when the queue is nearly full 1282 */ 1283 if (last || !(i & 0xf)) { 1284 u64 lval = l; 1285 1286 /* request IBA6120 and 7220 interrupt only on last */ 1287 if (last) 1288 lval |= dd->ipath_rhdrhead_intr_off; 1289 ipath_write_ureg(dd, ur_rcvhdrhead, lval, 1290 pd->port_port); 1291 if (updegr) { 1292 ipath_write_ureg(dd, ur_rcvegrindexhead, 1293 etail, pd->port_port); 1294 updegr = 0; 1295 } 1296 } 1297 } 1298 1299 if (!dd->ipath_rhdrhead_intr_off && !reloop && 1300 !(dd->ipath_flags & IPATH_NODMA_RTAIL)) { 1301 u32 hqtail = ipath_get_rcvhdrtail(pd); 1302 if (hqtail != hdrqtail) { 1303 hdrqtail = hqtail; 1304 reloop = 1; /* loop 1 extra time at most */ 1305 goto reloop; 1306 } 1307 } 1308 1309 pkttot += i; 1310 1311 pd->port_head = l; 1312 1313 if (pkttot > ipath_stats.sps_maxpkts_call) 1314 ipath_stats.sps_maxpkts_call = pkttot; 1315 ipath_stats.sps_port0pkts += pkttot; 1316 ipath_stats.sps_avgpkts_call = 1317 ipath_stats.sps_port0pkts / ++totcalls; 1318 1319bail:; 1320} 1321 1322/** 1323 * ipath_update_pio_bufs - update shadow copy of the PIO availability map 1324 * @dd: the infinipath device 1325 * 1326 * called whenever our local copy indicates we have run out of send buffers 1327 * NOTE: This can be called from interrupt context by some code 1328 * and from non-interrupt context by ipath_getpiobuf(). 1329 */ 1330 1331static void ipath_update_pio_bufs(struct ipath_devdata *dd) 1332{ 1333 unsigned long flags; 1334 int i; 1335 const unsigned piobregs = (unsigned)dd->ipath_pioavregs; 1336 1337 /* If the generation (check) bits have changed, then we update the 1338 * busy bit for the corresponding PIO buffer. This algorithm will 1339 * modify positions to the value they already have in some cases 1340 * (i.e., no change), but it's faster than changing only the bits 1341 * that have changed. 1342 * 1343 * We would like to do this atomicly, to avoid spinlocks in the 1344 * critical send path, but that's not really possible, given the 1345 * type of changes, and that this routine could be called on 1346 * multiple cpu's simultaneously, so we lock in this routine only, 1347 * to avoid conflicting updates; all we change is the shadow, and 1348 * it's a single 64 bit memory location, so by definition the update 1349 * is atomic in terms of what other cpu's can see in testing the 1350 * bits. The spin_lock overhead isn't too bad, since it only 1351 * happens when all buffers are in use, so only cpu overhead, not 1352 * latency or bandwidth is affected. 1353 */ 1354 if (!dd->ipath_pioavailregs_dma) { 1355 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n"); 1356 return; 1357 } 1358 if (ipath_debug & __IPATH_VERBDBG) { 1359 /* only if packet debug and verbose */ 1360 volatile __le64 *dma = dd->ipath_pioavailregs_dma; 1361 unsigned long *shadow = dd->ipath_pioavailshadow; 1362 1363 ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, " 1364 "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx " 1365 "s3=%lx\n", 1366 (unsigned long long) le64_to_cpu(dma[0]), 1367 shadow[0], 1368 (unsigned long long) le64_to_cpu(dma[1]), 1369 shadow[1], 1370 (unsigned long long) le64_to_cpu(dma[2]), 1371 shadow[2], 1372 (unsigned long long) le64_to_cpu(dma[3]), 1373 shadow[3]); 1374 if (piobregs > 4) 1375 ipath_cdbg( 1376 PKT, "2nd group, dma4=%llx shad4=%lx, " 1377 "d5=%llx s5=%lx, d6=%llx s6=%lx, " 1378 "d7=%llx s7=%lx\n", 1379 (unsigned long long) le64_to_cpu(dma[4]), 1380 shadow[4], 1381 (unsigned long long) le64_to_cpu(dma[5]), 1382 shadow[5], 1383 (unsigned long long) le64_to_cpu(dma[6]), 1384 shadow[6], 1385 (unsigned long long) le64_to_cpu(dma[7]), 1386 shadow[7]); 1387 } 1388 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1389 for (i = 0; i < piobregs; i++) { 1390 u64 pchbusy, pchg, piov, pnew; 1391 /* 1392 * Chip Errata: bug 6641; even and odd qwords>3 are swapped 1393 */ 1394 if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) 1395 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]); 1396 else 1397 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); 1398 pchg = dd->ipath_pioavailkernel[i] & 1399 ~(dd->ipath_pioavailshadow[i] ^ piov); 1400 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT; 1401 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) { 1402 pnew = dd->ipath_pioavailshadow[i] & ~pchbusy; 1403 pnew |= piov & pchbusy; 1404 dd->ipath_pioavailshadow[i] = pnew; 1405 } 1406 } 1407 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1408} 1409 1410/* 1411 * used to force update of pioavailshadow if we can't get a pio buffer. 1412 * Needed primarily due to exitting freeze mode after recovering 1413 * from errors. Done lazily, because it's safer (known to not 1414 * be writing pio buffers). 1415 */ 1416static void ipath_reset_availshadow(struct ipath_devdata *dd) 1417{ 1418 int i, im; 1419 unsigned long flags; 1420 1421 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1422 for (i = 0; i < dd->ipath_pioavregs; i++) { 1423 u64 val, oldval; 1424 /* deal with 6110 chip bug on high register #s */ 1425 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? 1426 i ^ 1 : i; 1427 val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]); 1428 /* 1429 * busy out the buffers not in the kernel avail list, 1430 * without changing the generation bits. 1431 */ 1432 oldval = dd->ipath_pioavailshadow[i]; 1433 dd->ipath_pioavailshadow[i] = val | 1434 ((~dd->ipath_pioavailkernel[i] << 1435 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) & 1436 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */ 1437 if (oldval != dd->ipath_pioavailshadow[i]) 1438 ipath_dbg("shadow[%d] was %Lx, now %lx\n", 1439 i, (unsigned long long) oldval, 1440 dd->ipath_pioavailshadow[i]); 1441 } 1442 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1443} 1444 1445/** 1446 * ipath_setrcvhdrsize - set the receive header size 1447 * @dd: the infinipath device 1448 * @rhdrsize: the receive header size 1449 * 1450 * called from user init code, and also layered driver init 1451 */ 1452int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize) 1453{ 1454 int ret = 0; 1455 1456 if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) { 1457 if (dd->ipath_rcvhdrsize != rhdrsize) { 1458 dev_info(&dd->pcidev->dev, 1459 "Error: can't set protocol header " 1460 "size %u, already %u\n", 1461 rhdrsize, dd->ipath_rcvhdrsize); 1462 ret = -EAGAIN; 1463 } else 1464 ipath_cdbg(VERBOSE, "Reuse same protocol header " 1465 "size %u\n", dd->ipath_rcvhdrsize); 1466 } else if (rhdrsize > (dd->ipath_rcvhdrentsize - 1467 (sizeof(u64) / sizeof(u32)))) { 1468 ipath_dbg("Error: can't set protocol header size %u " 1469 "(> max %u)\n", rhdrsize, 1470 dd->ipath_rcvhdrentsize - 1471 (u32) (sizeof(u64) / sizeof(u32))); 1472 ret = -EOVERFLOW; 1473 } else { 1474 dd->ipath_flags |= IPATH_RCVHDRSZ_SET; 1475 dd->ipath_rcvhdrsize = rhdrsize; 1476 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize, 1477 dd->ipath_rcvhdrsize); 1478 ipath_cdbg(VERBOSE, "Set protocol header size to %u\n", 1479 dd->ipath_rcvhdrsize); 1480 } 1481 return ret; 1482} 1483 1484/* 1485 * debugging code and stats updates if no pio buffers available. 1486 */ 1487static noinline void no_pio_bufs(struct ipath_devdata *dd) 1488{ 1489 unsigned long *shadow = dd->ipath_pioavailshadow; 1490 __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma; 1491 1492 dd->ipath_upd_pio_shadow = 1; 1493 1494 /* 1495 * not atomic, but if we lose a stat count in a while, that's OK 1496 */ 1497 ipath_stats.sps_nopiobufs++; 1498 if (!(++dd->ipath_consec_nopiobuf % 100000)) { 1499 ipath_force_pio_avail_update(dd); /* at start */ 1500 ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: " 1501 "%llx %llx %llx %llx\n" 1502 "ipath shadow: %lx %lx %lx %lx\n", 1503 dd->ipath_consec_nopiobuf, 1504 (unsigned long)get_cycles(), 1505 (unsigned long long) le64_to_cpu(dma[0]), 1506 (unsigned long long) le64_to_cpu(dma[1]), 1507 (unsigned long long) le64_to_cpu(dma[2]), 1508 (unsigned long long) le64_to_cpu(dma[3]), 1509 shadow[0], shadow[1], shadow[2], shadow[3]); 1510 /* 1511 * 4 buffers per byte, 4 registers above, cover rest 1512 * below 1513 */ 1514 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 1515 (sizeof(shadow[0]) * 4 * 4)) 1516 ipath_dbg("2nd group: dmacopy: " 1517 "%llx %llx %llx %llx\n" 1518 "ipath shadow: %lx %lx %lx %lx\n", 1519 (unsigned long long)le64_to_cpu(dma[4]), 1520 (unsigned long long)le64_to_cpu(dma[5]), 1521 (unsigned long long)le64_to_cpu(dma[6]), 1522 (unsigned long long)le64_to_cpu(dma[7]), 1523 shadow[4], shadow[5], shadow[6], shadow[7]); 1524 1525 /* at end, so update likely happened */ 1526 ipath_reset_availshadow(dd); 1527 } 1528} 1529 1530/* 1531 * common code for normal driver pio buffer allocation, and reserved 1532 * allocation. 1533 * 1534 * do appropriate marking as busy, etc. 1535 * returns buffer number if one found (>=0), negative number is error. 1536 */ 1537static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd, 1538 u32 *pbufnum, u32 first, u32 last, u32 firsti) 1539{ 1540 int i, j, updated = 0; 1541 unsigned piobcnt; 1542 unsigned long flags; 1543 unsigned long *shadow = dd->ipath_pioavailshadow; 1544 u32 __iomem *buf; 1545 1546 piobcnt = last - first; 1547 if (dd->ipath_upd_pio_shadow) { 1548 /* 1549 * Minor optimization. If we had no buffers on last call, 1550 * start out by doing the update; continue and do scan even 1551 * if no buffers were updated, to be paranoid 1552 */ 1553 ipath_update_pio_bufs(dd); 1554 updated++; 1555 i = first; 1556 } else 1557 i = firsti; 1558rescan: 1559 /* 1560 * while test_and_set_bit() is atomic, we do that and then the 1561 * change_bit(), and the pair is not. See if this is the cause 1562 * of the remaining armlaunch errors. 1563 */ 1564 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1565 for (j = 0; j < piobcnt; j++, i++) { 1566 if (i >= last) 1567 i = first; 1568 if (__test_and_set_bit((2 * i) + 1, shadow)) 1569 continue; 1570 /* flip generation bit */ 1571 __change_bit(2 * i, shadow); 1572 break; 1573 } 1574 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1575 1576 if (j == piobcnt) { 1577 if (!updated) { 1578 /* 1579 * first time through; shadow exhausted, but may be 1580 * buffers available, try an update and then rescan. 1581 */ 1582 ipath_update_pio_bufs(dd); 1583 updated++; 1584 i = first; 1585 goto rescan; 1586 } else if (updated == 1 && piobcnt <= 1587 ((dd->ipath_sendctrl 1588 >> INFINIPATH_S_UPDTHRESH_SHIFT) & 1589 INFINIPATH_S_UPDTHRESH_MASK)) { 1590 /* 1591 * for chips supporting and using the update 1592 * threshold we need to force an update of the 1593 * in-memory copy if the count is less than the 1594 * thershold, then check one more time. 1595 */ 1596 ipath_force_pio_avail_update(dd); 1597 ipath_update_pio_bufs(dd); 1598 updated++; 1599 i = first; 1600 goto rescan; 1601 } 1602 1603 no_pio_bufs(dd); 1604 buf = NULL; 1605 } else { 1606 if (i < dd->ipath_piobcnt2k) 1607 buf = (u32 __iomem *) (dd->ipath_pio2kbase + 1608 i * dd->ipath_palign); 1609 else 1610 buf = (u32 __iomem *) 1611 (dd->ipath_pio4kbase + 1612 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign); 1613 if (pbufnum) 1614 *pbufnum = i; 1615 } 1616 1617 return buf; 1618} 1619 1620/** 1621 * ipath_getpiobuf - find an available pio buffer 1622 * @dd: the infinipath device 1623 * @plen: the size of the PIO buffer needed in 32-bit words 1624 * @pbufnum: the buffer number is placed here 1625 */ 1626u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum) 1627{ 1628 u32 __iomem *buf; 1629 u32 pnum, nbufs; 1630 u32 first, lasti; 1631 1632 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) { 1633 first = dd->ipath_piobcnt2k; 1634 lasti = dd->ipath_lastpioindexl; 1635 } else { 1636 first = 0; 1637 lasti = dd->ipath_lastpioindex; 1638 } 1639 nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k; 1640 buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti); 1641 1642 if (buf) { 1643 /* 1644 * Set next starting place. It's just an optimization, 1645 * it doesn't matter who wins on this, so no locking 1646 */ 1647 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) 1648 dd->ipath_lastpioindexl = pnum + 1; 1649 else 1650 dd->ipath_lastpioindex = pnum + 1; 1651 if (dd->ipath_upd_pio_shadow) 1652 dd->ipath_upd_pio_shadow = 0; 1653 if (dd->ipath_consec_nopiobuf) 1654 dd->ipath_consec_nopiobuf = 0; 1655 ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n", 1656 pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf); 1657 if (pbufnum) 1658 *pbufnum = pnum; 1659 1660 } 1661 return buf; 1662} 1663 1664/** 1665 * ipath_chg_pioavailkernel - change which send buffers are available for kernel 1666 * @dd: the infinipath device 1667 * @start: the starting send buffer number 1668 * @len: the number of send buffers 1669 * @avail: true if the buffers are available for kernel use, false otherwise 1670 */ 1671void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start, 1672 unsigned len, int avail) 1673{ 1674 unsigned long flags; 1675 unsigned end, cnt = 0; 1676 1677 /* There are two bits per send buffer (busy and generation) */ 1678 start *= 2; 1679 end = start + len * 2; 1680 1681 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1682 /* Set or clear the busy bit in the shadow. */ 1683 while (start < end) { 1684 if (avail) { 1685 unsigned long dma; 1686 int i, im; 1687 /* 1688 * the BUSY bit will never be set, because we disarm 1689 * the user buffers before we hand them back to the 1690 * kernel. We do have to make sure the generation 1691 * bit is set correctly in shadow, since it could 1692 * have changed many times while allocated to user. 1693 * We can't use the bitmap functions on the full 1694 * dma array because it is always little-endian, so 1695 * we have to flip to host-order first. 1696 * BITS_PER_LONG is slightly wrong, since it's 1697 * always 64 bits per register in chip... 1698 * We only work on 64 bit kernels, so that's OK. 1699 */ 1700 /* deal with 6110 chip bug on high register #s */ 1701 i = start / BITS_PER_LONG; 1702 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ? 1703 i ^ 1 : i; 1704 __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT 1705 + start, dd->ipath_pioavailshadow); 1706 dma = (unsigned long) le64_to_cpu( 1707 dd->ipath_pioavailregs_dma[im]); 1708 if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1709 + start) % BITS_PER_LONG, &dma)) 1710 __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1711 + start, dd->ipath_pioavailshadow); 1712 else 1713 __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT 1714 + start, dd->ipath_pioavailshadow); 1715 __set_bit(start, dd->ipath_pioavailkernel); 1716 } else { 1717 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, 1718 dd->ipath_pioavailshadow); 1719 __clear_bit(start, dd->ipath_pioavailkernel); 1720 } 1721 start += 2; 1722 } 1723 1724 if (dd->ipath_pioupd_thresh) { 1725 end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); 1726 cnt = bitmap_weight(dd->ipath_pioavailkernel, end); 1727 } 1728 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1729 1730 /* 1731 * When moving buffers from kernel to user, if number assigned to 1732 * the user is less than the pio update threshold, and threshold 1733 * is supported (cnt was computed > 0), drop the update threshold 1734 * so we update at least once per allocated number of buffers. 1735 * In any case, if the kernel buffers are less than the threshold, 1736 * drop the threshold. We don't bother increasing it, having once 1737 * decreased it, since it would typically just cycle back and forth. 1738 * If we don't decrease below buffers in use, we can wait a long 1739 * time for an update, until some other context uses PIO buffers. 1740 */ 1741 if (!avail && len < cnt) 1742 cnt = len; 1743 if (cnt < dd->ipath_pioupd_thresh) { 1744 dd->ipath_pioupd_thresh = cnt; 1745 ipath_dbg("Decreased pio update threshold to %u\n", 1746 dd->ipath_pioupd_thresh); 1747 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1748 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK 1749 << INFINIPATH_S_UPDTHRESH_SHIFT); 1750 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh 1751 << INFINIPATH_S_UPDTHRESH_SHIFT; 1752 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1753 dd->ipath_sendctrl); 1754 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1755 } 1756} 1757 1758/** 1759 * ipath_create_rcvhdrq - create a receive header queue 1760 * @dd: the infinipath device 1761 * @pd: the port data 1762 * 1763 * this must be contiguous memory (from an i/o perspective), and must be 1764 * DMA'able (which means for some systems, it will go through an IOMMU, 1765 * or be forced into a low address range). 1766 */ 1767int ipath_create_rcvhdrq(struct ipath_devdata *dd, 1768 struct ipath_portdata *pd) 1769{ 1770 int ret = 0; 1771 1772 if (!pd->port_rcvhdrq) { 1773 dma_addr_t phys_hdrqtail; 1774 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 1775 int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * 1776 sizeof(u32), PAGE_SIZE); 1777 1778 pd->port_rcvhdrq = dma_alloc_coherent( 1779 &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys, 1780 gfp_flags); 1781 1782 if (!pd->port_rcvhdrq) { 1783 ipath_dev_err(dd, "attempt to allocate %d bytes " 1784 "for port %u rcvhdrq failed\n", 1785 amt, pd->port_port); 1786 ret = -ENOMEM; 1787 goto bail; 1788 } 1789 1790 if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { 1791 pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent( 1792 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1793 GFP_KERNEL); 1794 if (!pd->port_rcvhdrtail_kvaddr) { 1795 ipath_dev_err(dd, "attempt to allocate 1 page " 1796 "for port %u rcvhdrqtailaddr " 1797 "failed\n", pd->port_port); 1798 ret = -ENOMEM; 1799 dma_free_coherent(&dd->pcidev->dev, amt, 1800 pd->port_rcvhdrq, 1801 pd->port_rcvhdrq_phys); 1802 pd->port_rcvhdrq = NULL; 1803 goto bail; 1804 } 1805 pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail; 1806 ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx " 1807 "physical\n", pd->port_port, 1808 (unsigned long long) phys_hdrqtail); 1809 } 1810 1811 pd->port_rcvhdrq_size = amt; 1812 1813 ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu " 1814 "for port %u rcvhdr Q\n", 1815 amt >> PAGE_SHIFT, pd->port_rcvhdrq, 1816 (unsigned long) pd->port_rcvhdrq_phys, 1817 (unsigned long) pd->port_rcvhdrq_size, 1818 pd->port_port); 1819 } 1820 else 1821 ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; " 1822 "hdrtailaddr@%p %llx physical\n", 1823 pd->port_port, pd->port_rcvhdrq, 1824 (unsigned long long) pd->port_rcvhdrq_phys, 1825 pd->port_rcvhdrtail_kvaddr, (unsigned long long) 1826 pd->port_rcvhdrqtailaddr_phys); 1827 1828 /* clear for security and sanity on each use */ 1829 memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size); 1830 if (pd->port_rcvhdrtail_kvaddr) 1831 memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE); 1832 1833 /* 1834 * tell chip each time we init it, even if we are re-using previous 1835 * memory (we zero the register at process close) 1836 */ 1837 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr, 1838 pd->port_port, pd->port_rcvhdrqtailaddr_phys); 1839 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, 1840 pd->port_port, pd->port_rcvhdrq_phys); 1841 1842bail: 1843 return ret; 1844} 1845 1846 1847/* 1848 * Flush all sends that might be in the ready to send state, as well as any 1849 * that are in the process of being sent. Used whenever we need to be 1850 * sure the send side is idle. Cleans up all buffer state by canceling 1851 * all pio buffers, and issuing an abort, which cleans up anything in the 1852 * launch fifo. The cancel is superfluous on some chip versions, but 1853 * it's safer to always do it. 1854 * PIOAvail bits are updated by the chip as if normal send had happened. 1855 */ 1856void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl) 1857{ 1858 unsigned long flags; 1859 1860 if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) { 1861 ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n"); 1862 goto bail; 1863 } 1864 /* 1865 * If we have SDMA, and it's not disabled, we have to kick off the 1866 * abort state machine, provided we aren't already aborting. 1867 * If we are in the process of aborting SDMA (!DISABLED, but ABORTING), 1868 * we skip the rest of this routine. It is already "in progress" 1869 */ 1870 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { 1871 int skip_cancel; 1872 unsigned long *statp = &dd->ipath_sdma_status; 1873 1874 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 1875 skip_cancel = 1876 test_and_set_bit(IPATH_SDMA_ABORTING, statp) 1877 && !test_bit(IPATH_SDMA_DISABLED, statp); 1878 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1879 if (skip_cancel) 1880 goto bail; 1881 } 1882 1883 ipath_dbg("Cancelling all in-progress send buffers\n"); 1884 1885 /* skip armlaunch errs for a while */ 1886 dd->ipath_lastcancel = jiffies + HZ / 2; 1887 1888 /* 1889 * The abort bit is auto-clearing. We also don't want pioavail 1890 * update happening during this, and we don't want any other 1891 * sends going out, so turn those off for the duration. We read 1892 * the scratch register to be sure that cancels and the abort 1893 * have taken effect in the chip. Otherwise two parts are same 1894 * as ipath_force_pio_avail_update() 1895 */ 1896 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1897 dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD 1898 | INFINIPATH_S_PIOENABLE); 1899 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1900 dd->ipath_sendctrl | INFINIPATH_S_ABORT); 1901 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1902 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1903 1904 /* disarm all send buffers */ 1905 ipath_disarm_piobufs(dd, 0, 1906 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); 1907 1908 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 1909 set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status); 1910 1911 if (restore_sendctrl) { 1912 /* else done by caller later if needed */ 1913 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1914 dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD | 1915 INFINIPATH_S_PIOENABLE; 1916 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1917 dd->ipath_sendctrl); 1918 /* and again, be sure all have hit the chip */ 1919 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1920 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1921 } 1922 1923 if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) && 1924 !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) && 1925 test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) { 1926 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 1927 /* only wait so long for intr */ 1928 dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; 1929 dd->ipath_sdma_reset_wait = 200; 1930 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) 1931 tasklet_hi_schedule(&dd->ipath_sdma_abort_task); 1932 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1933 } 1934bail:; 1935} 1936 1937/* 1938 * Force an update of in-memory copy of the pioavail registers, when 1939 * needed for any of a variety of reasons. We read the scratch register 1940 * to make it highly likely that the update will have happened by the 1941 * time we return. If already off (as in cancel_sends above), this 1942 * routine is a nop, on the assumption that the caller will "do the 1943 * right thing". 1944 */ 1945void ipath_force_pio_avail_update(struct ipath_devdata *dd) 1946{ 1947 unsigned long flags; 1948 1949 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1950 if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) { 1951 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1952 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD); 1953 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1954 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 1955 dd->ipath_sendctrl); 1956 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1957 } 1958 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1959} 1960 1961static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, 1962 int linitcmd) 1963{ 1964 u64 mod_wd; 1965 static const char *what[4] = { 1966 [0] = "NOP", 1967 [INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN", 1968 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", 1969 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" 1970 }; 1971 1972 if (linitcmd == INFINIPATH_IBCC_LINKINITCMD_DISABLE) { 1973 /* 1974 * If we are told to disable, note that so link-recovery 1975 * code does not attempt to bring us back up. 1976 */ 1977 preempt_disable(); 1978 dd->ipath_flags |= IPATH_IB_LINK_DISABLED; 1979 preempt_enable(); 1980 } else if (linitcmd) { 1981 /* 1982 * Any other linkinitcmd will lead to LINKDOWN and then 1983 * to INIT (if all is well), so clear flag to let 1984 * link-recovery code attempt to bring us back up. 1985 */ 1986 preempt_disable(); 1987 dd->ipath_flags &= ~IPATH_IB_LINK_DISABLED; 1988 preempt_enable(); 1989 } 1990 1991 mod_wd = (linkcmd << dd->ibcc_lc_shift) | 1992 (linitcmd << INFINIPATH_IBCC_LINKINITCMD_SHIFT); 1993 ipath_cdbg(VERBOSE, 1994 "Moving unit %u to %s (initcmd=0x%x), current ltstate is %s\n", 1995 dd->ipath_unit, what[linkcmd], linitcmd, 1996 ipath_ibcstatus_str[ipath_ib_linktrstate(dd, 1997 ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]); 1998 1999 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2000 dd->ipath_ibcctrl | mod_wd); 2001 /* read from chip so write is flushed */ 2002 (void) ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 2003} 2004 2005int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate) 2006{ 2007 u32 lstate; 2008 int ret; 2009 2010 switch (newstate) { 2011 case IPATH_IB_LINKDOWN_ONLY: 2012 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 0); 2013 /* don't wait */ 2014 ret = 0; 2015 goto bail; 2016 2017 case IPATH_IB_LINKDOWN: 2018 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2019 INFINIPATH_IBCC_LINKINITCMD_POLL); 2020 /* don't wait */ 2021 ret = 0; 2022 goto bail; 2023 2024 case IPATH_IB_LINKDOWN_SLEEP: 2025 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2026 INFINIPATH_IBCC_LINKINITCMD_SLEEP); 2027 /* don't wait */ 2028 ret = 0; 2029 goto bail; 2030 2031 case IPATH_IB_LINKDOWN_DISABLE: 2032 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN, 2033 INFINIPATH_IBCC_LINKINITCMD_DISABLE); 2034 /* don't wait */ 2035 ret = 0; 2036 goto bail; 2037 2038 case IPATH_IB_LINKARM: 2039 if (dd->ipath_flags & IPATH_LINKARMED) { 2040 ret = 0; 2041 goto bail; 2042 } 2043 if (!(dd->ipath_flags & 2044 (IPATH_LINKINIT | IPATH_LINKACTIVE))) { 2045 ret = -EINVAL; 2046 goto bail; 2047 } 2048 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED, 0); 2049 2050 /* 2051 * Since the port can transition to ACTIVE by receiving 2052 * a non VL 15 packet, wait for either state. 2053 */ 2054 lstate = IPATH_LINKARMED | IPATH_LINKACTIVE; 2055 break; 2056 2057 case IPATH_IB_LINKACTIVE: 2058 if (dd->ipath_flags & IPATH_LINKACTIVE) { 2059 ret = 0; 2060 goto bail; 2061 } 2062 if (!(dd->ipath_flags & IPATH_LINKARMED)) { 2063 ret = -EINVAL; 2064 goto bail; 2065 } 2066 ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE, 0); 2067 lstate = IPATH_LINKACTIVE; 2068 break; 2069 2070 case IPATH_IB_LINK_LOOPBACK: 2071 dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n"); 2072 dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK; 2073 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2074 dd->ipath_ibcctrl); 2075 2076 /* turn heartbeat off, as it causes loopback to fail */ 2077 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2078 IPATH_IB_HRTBT_OFF); 2079 /* don't wait */ 2080 ret = 0; 2081 goto bail; 2082 2083 case IPATH_IB_LINK_EXTERNAL: 2084 dev_info(&dd->pcidev->dev, 2085 "Disabling IB local loopback (normal)\n"); 2086 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2087 IPATH_IB_HRTBT_ON); 2088 dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK; 2089 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2090 dd->ipath_ibcctrl); 2091 /* don't wait */ 2092 ret = 0; 2093 goto bail; 2094 2095 /* 2096 * Heartbeat can be explicitly enabled by the user via 2097 * "hrtbt_enable" "file", and if disabled, trying to enable here 2098 * will have no effect. Implicit changes (heartbeat off when 2099 * loopback on, and vice versa) are included to ease testing. 2100 */ 2101 case IPATH_IB_LINK_HRTBT: 2102 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2103 IPATH_IB_HRTBT_ON); 2104 goto bail; 2105 2106 case IPATH_IB_LINK_NO_HRTBT: 2107 ret = dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_HRTBT, 2108 IPATH_IB_HRTBT_OFF); 2109 goto bail; 2110 2111 default: 2112 ipath_dbg("Invalid linkstate 0x%x requested\n", newstate); 2113 ret = -EINVAL; 2114 goto bail; 2115 } 2116 ret = ipath_wait_linkstate(dd, lstate, 2000); 2117 2118bail: 2119 return ret; 2120} 2121 2122/** 2123 * ipath_set_mtu - set the MTU 2124 * @dd: the infinipath device 2125 * @arg: the new MTU 2126 * 2127 * we can handle "any" incoming size, the issue here is whether we 2128 * need to restrict our outgoing size. For now, we don't do any 2129 * sanity checking on this, and we don't deal with what happens to 2130 * programs that are already running when the size changes. 2131 * NOTE: changing the MTU will usually cause the IBC to go back to 2132 * link INIT state... 2133 */ 2134int ipath_set_mtu(struct ipath_devdata *dd, u16 arg) 2135{ 2136 u32 piosize; 2137 int changed = 0; 2138 int ret; 2139 2140 /* 2141 * mtu is IB data payload max. It's the largest power of 2 less 2142 * than piosize (or even larger, since it only really controls the 2143 * largest we can receive; we can send the max of the mtu and 2144 * piosize). We check that it's one of the valid IB sizes. 2145 */ 2146 if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && 2147 (arg != 4096 || !ipath_mtu4096)) { 2148 ipath_dbg("Trying to set invalid mtu %u, failing\n", arg); 2149 ret = -EINVAL; 2150 goto bail; 2151 } 2152 if (dd->ipath_ibmtu == arg) { 2153 ret = 0; /* same as current */ 2154 goto bail; 2155 } 2156 2157 piosize = dd->ipath_ibmaxlen; 2158 dd->ipath_ibmtu = arg; 2159 2160 if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) { 2161 /* Only if it's not the initial value (or reset to it) */ 2162 if (piosize != dd->ipath_init_ibmaxlen) { 2163 if (arg > piosize && arg <= dd->ipath_init_ibmaxlen) 2164 piosize = dd->ipath_init_ibmaxlen; 2165 dd->ipath_ibmaxlen = piosize; 2166 changed = 1; 2167 } 2168 } else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) { 2169 piosize = arg + IPATH_PIO_MAXIBHDR; 2170 ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x " 2171 "(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize, 2172 arg); 2173 dd->ipath_ibmaxlen = piosize; 2174 changed = 1; 2175 } 2176 2177 if (changed) { 2178 u64 ibc = dd->ipath_ibcctrl, ibdw; 2179 /* 2180 * update our housekeeping variables, and set IBC max 2181 * size, same as init code; max IBC is max we allow in 2182 * buffer, less the qword pbc, plus 1 for ICRC, in dwords 2183 */ 2184 dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32); 2185 ibdw = (dd->ipath_ibmaxlen >> 2) + 1; 2186 ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK << 2187 dd->ibcc_mpl_shift); 2188 ibc |= ibdw << dd->ibcc_mpl_shift; 2189 dd->ipath_ibcctrl = ibc; 2190 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 2191 dd->ipath_ibcctrl); 2192 dd->ipath_f_tidtemplate(dd); 2193 } 2194 2195 ret = 0; 2196 2197bail: 2198 return ret; 2199} 2200 2201int ipath_set_lid(struct ipath_devdata *dd, u32 lid, u8 lmc) 2202{ 2203 dd->ipath_lid = lid; 2204 dd->ipath_lmc = lmc; 2205 2206 dd->ipath_f_set_ib_cfg(dd, IPATH_IB_CFG_LIDLMC, lid | 2207 (~((1U << lmc) - 1)) << 16); 2208 2209 dev_info(&dd->pcidev->dev, "We got a lid: 0x%x\n", lid); 2210 2211 return 0; 2212} 2213 2214 2215/** 2216 * ipath_write_kreg_port - write a device's per-port 64-bit kernel register 2217 * @dd: the infinipath device 2218 * @regno: the register number to write 2219 * @port: the port containing the register 2220 * @value: the value to write 2221 * 2222 * Registers that vary with the chip implementation constants (port) 2223 * use this routine. 2224 */ 2225void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno, 2226 unsigned port, u64 value) 2227{ 2228 u16 where; 2229 2230 if (port < dd->ipath_portcnt && 2231 (regno == dd->ipath_kregs->kr_rcvhdraddr || 2232 regno == dd->ipath_kregs->kr_rcvhdrtailaddr)) 2233 where = regno + port; 2234 else 2235 where = -1; 2236 2237 ipath_write_kreg(dd, where, value); 2238} 2239 2240/* 2241 * Following deal with the "obviously simple" task of overriding the state 2242 * of the LEDS, which normally indicate link physical and logical status. 2243 * The complications arise in dealing with different hardware mappings 2244 * and the board-dependent routine being called from interrupts. 2245 * and then there's the requirement to _flash_ them. 2246 */ 2247#define LED_OVER_FREQ_SHIFT 8 2248#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT) 2249/* Below is "non-zero" to force override, but both actual LEDs are off */ 2250#define LED_OVER_BOTH_OFF (8) 2251 2252static void ipath_run_led_override(unsigned long opaque) 2253{ 2254 struct ipath_devdata *dd = (struct ipath_devdata *)opaque; 2255 int timeoff; 2256 int pidx; 2257 u64 lstate, ltstate, val; 2258 2259 if (!(dd->ipath_flags & IPATH_INITTED)) 2260 return; 2261 2262 pidx = dd->ipath_led_override_phase++ & 1; 2263 dd->ipath_led_override = dd->ipath_led_override_vals[pidx]; 2264 timeoff = dd->ipath_led_override_timeoff; 2265 2266 /* 2267 * below potentially restores the LED values per current status, 2268 * should also possibly setup the traffic-blink register, 2269 * but leave that to per-chip functions. 2270 */ 2271 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 2272 ltstate = ipath_ib_linktrstate(dd, val); 2273 lstate = ipath_ib_linkstate(dd, val); 2274 2275 dd->ipath_f_setextled(dd, lstate, ltstate); 2276 mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff); 2277} 2278 2279void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val) 2280{ 2281 int timeoff, freq; 2282 2283 if (!(dd->ipath_flags & IPATH_INITTED)) 2284 return; 2285 2286 /* First check if we are blinking. If not, use 1HZ polling */ 2287 timeoff = HZ; 2288 freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; 2289 2290 if (freq) { 2291 /* For blink, set each phase from one nybble of val */ 2292 dd->ipath_led_override_vals[0] = val & 0xF; 2293 dd->ipath_led_override_vals[1] = (val >> 4) & 0xF; 2294 timeoff = (HZ << 4)/freq; 2295 } else { 2296 /* Non-blink set both phases the same. */ 2297 dd->ipath_led_override_vals[0] = val & 0xF; 2298 dd->ipath_led_override_vals[1] = val & 0xF; 2299 } 2300 dd->ipath_led_override_timeoff = timeoff; 2301 2302 /* 2303 * If the timer has not already been started, do so. Use a "quick" 2304 * timeout so the function will be called soon, to look at our request. 2305 */ 2306 if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) { 2307 /* Need to start timer */ 2308 init_timer(&dd->ipath_led_override_timer); 2309 dd->ipath_led_override_timer.function = 2310 ipath_run_led_override; 2311 dd->ipath_led_override_timer.data = (unsigned long) dd; 2312 dd->ipath_led_override_timer.expires = jiffies + 1; 2313 add_timer(&dd->ipath_led_override_timer); 2314 } else 2315 atomic_dec(&dd->ipath_led_override_timer_active); 2316} 2317 2318/** 2319 * ipath_shutdown_device - shut down a device 2320 * @dd: the infinipath device 2321 * 2322 * This is called to make the device quiet when we are about to 2323 * unload the driver, and also when the device is administratively 2324 * disabled. It does not free any data structures. 2325 * Everything it does has to be setup again by ipath_init_chip(dd,1) 2326 */ 2327void ipath_shutdown_device(struct ipath_devdata *dd) 2328{ 2329 unsigned long flags; 2330 2331 ipath_dbg("Shutting down the device\n"); 2332 2333 ipath_hol_up(dd); /* make sure user processes aren't suspended */ 2334 2335 dd->ipath_flags |= IPATH_LINKUNK; 2336 dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN | 2337 IPATH_LINKINIT | IPATH_LINKARMED | 2338 IPATH_LINKACTIVE); 2339 *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF | 2340 IPATH_STATUS_IB_READY); 2341 2342 /* mask interrupts, but not errors */ 2343 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 2344 2345 dd->ipath_rcvctrl = 0; 2346 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 2347 dd->ipath_rcvctrl); 2348 2349 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 2350 teardown_sdma(dd); 2351 2352 /* 2353 * gracefully stop all sends allowing any in progress to trickle out 2354 * first. 2355 */ 2356 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 2357 dd->ipath_sendctrl = 0; 2358 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl); 2359 /* flush it */ 2360 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 2361 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 2362 2363 /* 2364 * enough for anything that's going to trickle out to have actually 2365 * done so. 2366 */ 2367 udelay(5); 2368 2369 dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */ 2370 2371 ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE); 2372 ipath_cancel_sends(dd, 0); 2373 2374 /* 2375 * we are shutting down, so tell components that care. We don't do 2376 * this on just a link state change, much like ethernet, a cable 2377 * unplug, etc. doesn't change driver state 2378 */ 2379 signal_ib_event(dd, IB_EVENT_PORT_ERR); 2380 2381 /* disable IBC */ 2382 dd->ipath_control &= ~INFINIPATH_C_LINKENABLE; 2383 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 2384 dd->ipath_control | INFINIPATH_C_FREEZEMODE); 2385 2386 /* 2387 * clear SerdesEnable and turn the leds off; do this here because 2388 * we are unloading, so don't count on interrupts to move along 2389 * Turn the LEDs off explictly for the same reason. 2390 */ 2391 dd->ipath_f_quiet_serdes(dd); 2392 2393 /* stop all the timers that might still be running */ 2394 del_timer_sync(&dd->ipath_hol_timer); 2395 if (dd->ipath_stats_timer_active) { 2396 del_timer_sync(&dd->ipath_stats_timer); 2397 dd->ipath_stats_timer_active = 0; 2398 } 2399 if (dd->ipath_intrchk_timer.data) { 2400 del_timer_sync(&dd->ipath_intrchk_timer); 2401 dd->ipath_intrchk_timer.data = 0; 2402 } 2403 if (atomic_read(&dd->ipath_led_override_timer_active)) { 2404 del_timer_sync(&dd->ipath_led_override_timer); 2405 atomic_set(&dd->ipath_led_override_timer_active, 0); 2406 } 2407 2408 /* 2409 * clear all interrupts and errors, so that the next time the driver 2410 * is loaded or device is enabled, we know that whatever is set 2411 * happened while we were unloaded 2412 */ 2413 ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 2414 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); 2415 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); 2416 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); 2417 2418 ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n"); 2419 ipath_update_eeprom_log(dd); 2420} 2421 2422/** 2423 * ipath_free_pddata - free a port's allocated data 2424 * @dd: the infinipath device 2425 * @pd: the portdata structure 2426 * 2427 * free up any allocated data for a port 2428 * This should not touch anything that would affect a simultaneous 2429 * re-allocation of port data, because it is called after ipath_mutex 2430 * is released (and can be called from reinit as well). 2431 * It should never change any chip state, or global driver state. 2432 * (The only exception to global state is freeing the port0 port0_skbs.) 2433 */ 2434void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd) 2435{ 2436 if (!pd) 2437 return; 2438 2439 if (pd->port_rcvhdrq) { 2440 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p " 2441 "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq, 2442 (unsigned long) pd->port_rcvhdrq_size); 2443 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size, 2444 pd->port_rcvhdrq, pd->port_rcvhdrq_phys); 2445 pd->port_rcvhdrq = NULL; 2446 if (pd->port_rcvhdrtail_kvaddr) { 2447 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, 2448 pd->port_rcvhdrtail_kvaddr, 2449 pd->port_rcvhdrqtailaddr_phys); 2450 pd->port_rcvhdrtail_kvaddr = NULL; 2451 } 2452 } 2453 if (pd->port_port && pd->port_rcvegrbuf) { 2454 unsigned e; 2455 2456 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { 2457 void *base = pd->port_rcvegrbuf[e]; 2458 size_t size = pd->port_rcvegrbuf_size; 2459 2460 ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), " 2461 "chunk %u/%u\n", base, 2462 (unsigned long) size, 2463 e, pd->port_rcvegrbuf_chunks); 2464 dma_free_coherent(&dd->pcidev->dev, size, 2465 base, pd->port_rcvegrbuf_phys[e]); 2466 } 2467 kfree(pd->port_rcvegrbuf); 2468 pd->port_rcvegrbuf = NULL; 2469 kfree(pd->port_rcvegrbuf_phys); 2470 pd->port_rcvegrbuf_phys = NULL; 2471 pd->port_rcvegrbuf_chunks = 0; 2472 } else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) { 2473 unsigned e; 2474 struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo; 2475 2476 dd->ipath_port0_skbinfo = NULL; 2477 ipath_cdbg(VERBOSE, "free closed port %d " 2478 "ipath_port0_skbinfo @ %p\n", pd->port_port, 2479 skbinfo); 2480 for (e = 0; e < dd->ipath_p0_rcvegrcnt; e++) 2481 if (skbinfo[e].skb) { 2482 pci_unmap_single(dd->pcidev, skbinfo[e].phys, 2483 dd->ipath_ibmaxlen, 2484 PCI_DMA_FROMDEVICE); 2485 dev_kfree_skb(skbinfo[e].skb); 2486 } 2487 vfree(skbinfo); 2488 } 2489 kfree(pd->port_tid_pg_list); 2490 vfree(pd->subport_uregbase); 2491 vfree(pd->subport_rcvegrbuf); 2492 vfree(pd->subport_rcvhdr_base); 2493 kfree(pd); 2494} 2495 2496static int __init infinipath_init(void) 2497{ 2498 int ret; 2499 2500 if (ipath_debug & __IPATH_DBG) 2501 printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version); 2502 2503 /* 2504 * These must be called before the driver is registered with 2505 * the PCI subsystem. 2506 */ 2507 idr_init(&unit_table); 2508 if (!idr_pre_get(&unit_table, GFP_KERNEL)) { 2509 printk(KERN_ERR IPATH_DRV_NAME ": idr_pre_get() failed\n"); 2510 ret = -ENOMEM; 2511 goto bail; 2512 } 2513 2514 ret = pci_register_driver(&ipath_driver); 2515 if (ret < 0) { 2516 printk(KERN_ERR IPATH_DRV_NAME 2517 ": Unable to register driver: error %d\n", -ret); 2518 goto bail_unit; 2519 } 2520 2521 ret = ipath_init_ipathfs(); 2522 if (ret < 0) { 2523 printk(KERN_ERR IPATH_DRV_NAME ": Unable to create " 2524 "ipathfs: error %d\n", -ret); 2525 goto bail_pci; 2526 } 2527 2528 goto bail; 2529 2530bail_pci: 2531 pci_unregister_driver(&ipath_driver); 2532 2533bail_unit: 2534 idr_destroy(&unit_table); 2535 2536bail: 2537 return ret; 2538} 2539 2540static void __exit infinipath_cleanup(void) 2541{ 2542 ipath_exit_ipathfs(); 2543 2544 ipath_cdbg(VERBOSE, "Unregistering pci driver\n"); 2545 pci_unregister_driver(&ipath_driver); 2546 2547 idr_destroy(&unit_table); 2548} 2549 2550/** 2551 * ipath_reset_device - reset the chip if possible 2552 * @unit: the device to reset 2553 * 2554 * Whether or not reset is successful, we attempt to re-initialize the chip 2555 * (that is, much like a driver unload/reload). We clear the INITTED flag 2556 * so that the various entry points will fail until we reinitialize. For 2557 * now, we only allow this if no user ports are open that use chip resources 2558 */ 2559int ipath_reset_device(int unit) 2560{ 2561 int ret, i; 2562 struct ipath_devdata *dd = ipath_lookup(unit); 2563 unsigned long flags; 2564 2565 if (!dd) { 2566 ret = -ENODEV; 2567 goto bail; 2568 } 2569 2570 if (atomic_read(&dd->ipath_led_override_timer_active)) { 2571 /* Need to stop LED timer, _then_ shut off LEDs */ 2572 del_timer_sync(&dd->ipath_led_override_timer); 2573 atomic_set(&dd->ipath_led_override_timer_active, 0); 2574 } 2575 2576 /* Shut off LEDs after we are sure timer is not running */ 2577 dd->ipath_led_override = LED_OVER_BOTH_OFF; 2578 dd->ipath_f_setextled(dd, 0, 0); 2579 2580 dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit); 2581 2582 if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) { 2583 dev_info(&dd->pcidev->dev, "Invalid unit number %u or " 2584 "not initialized or not present\n", unit); 2585 ret = -ENXIO; 2586 goto bail; 2587 } 2588 2589 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 2590 if (dd->ipath_pd) 2591 for (i = 1; i < dd->ipath_cfgports; i++) { 2592 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) 2593 continue; 2594 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2595 ipath_dbg("unit %u port %d is in use " 2596 "(PID %u cmd %s), can't reset\n", 2597 unit, i, 2598 pid_nr(dd->ipath_pd[i]->port_pid), 2599 dd->ipath_pd[i]->port_comm); 2600 ret = -EBUSY; 2601 goto bail; 2602 } 2603 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2604 2605 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 2606 teardown_sdma(dd); 2607 2608 dd->ipath_flags &= ~IPATH_INITTED; 2609 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 2610 ret = dd->ipath_f_reset(dd); 2611 if (ret == 1) { 2612 ipath_dbg("Reinitializing unit %u after reset attempt\n", 2613 unit); 2614 ret = ipath_init_chip(dd, 1); 2615 } else 2616 ret = -EAGAIN; 2617 if (ret) 2618 ipath_dev_err(dd, "Reinitialize unit %u after " 2619 "reset failed with %d\n", unit, ret); 2620 else 2621 dev_info(&dd->pcidev->dev, "Reinitialized unit %u after " 2622 "resetting\n", unit); 2623 2624bail: 2625 return ret; 2626} 2627 2628/* 2629 * send a signal to all the processes that have the driver open 2630 * through the normal interfaces (i.e., everything other than diags 2631 * interface). Returns number of signalled processes. 2632 */ 2633static int ipath_signal_procs(struct ipath_devdata *dd, int sig) 2634{ 2635 int i, sub, any = 0; 2636 struct pid *pid; 2637 unsigned long flags; 2638 2639 if (!dd->ipath_pd) 2640 return 0; 2641 2642 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 2643 for (i = 1; i < dd->ipath_cfgports; i++) { 2644 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt) 2645 continue; 2646 pid = dd->ipath_pd[i]->port_pid; 2647 if (!pid) 2648 continue; 2649 2650 dev_info(&dd->pcidev->dev, "context %d in use " 2651 "(PID %u), sending signal %d\n", 2652 i, pid_nr(pid), sig); 2653 kill_pid(pid, sig, 1); 2654 any++; 2655 for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { 2656 pid = dd->ipath_pd[i]->port_subpid[sub]; 2657 if (!pid) 2658 continue; 2659 dev_info(&dd->pcidev->dev, "sub-context " 2660 "%d:%d in use (PID %u), sending " 2661 "signal %d\n", i, sub, pid_nr(pid), sig); 2662 kill_pid(pid, sig, 1); 2663 any++; 2664 } 2665 } 2666 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2667 return any; 2668} 2669 2670static void ipath_hol_signal_down(struct ipath_devdata *dd) 2671{ 2672 if (ipath_signal_procs(dd, SIGSTOP)) 2673 ipath_dbg("Stopped some processes\n"); 2674 ipath_cancel_sends(dd, 1); 2675} 2676 2677 2678static void ipath_hol_signal_up(struct ipath_devdata *dd) 2679{ 2680 if (ipath_signal_procs(dd, SIGCONT)) 2681 ipath_dbg("Continued some processes\n"); 2682} 2683 2684/* 2685 * link is down, stop any users processes, and flush pending sends 2686 * to prevent HoL blocking, then start the HoL timer that 2687 * periodically continues, then stop procs, so they can detect 2688 * link down if they want, and do something about it. 2689 * Timer may already be running, so use mod_timer, not add_timer. 2690 */ 2691void ipath_hol_down(struct ipath_devdata *dd) 2692{ 2693 dd->ipath_hol_state = IPATH_HOL_DOWN; 2694 ipath_hol_signal_down(dd); 2695 dd->ipath_hol_next = IPATH_HOL_DOWNCONT; 2696 dd->ipath_hol_timer.expires = jiffies + 2697 msecs_to_jiffies(ipath_hol_timeout_ms); 2698 mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires); 2699} 2700 2701/* 2702 * link is up, continue any user processes, and ensure timer 2703 * is a nop, if running. Let timer keep running, if set; it 2704 * will nop when it sees the link is up 2705 */ 2706void ipath_hol_up(struct ipath_devdata *dd) 2707{ 2708 ipath_hol_signal_up(dd); 2709 dd->ipath_hol_state = IPATH_HOL_UP; 2710} 2711 2712/* 2713 * toggle the running/not running state of user proceses 2714 * to prevent HoL blocking on chip resources, but still allow 2715 * user processes to do link down special case handling. 2716 * Should only be called via the timer 2717 */ 2718void ipath_hol_event(unsigned long opaque) 2719{ 2720 struct ipath_devdata *dd = (struct ipath_devdata *)opaque; 2721 2722 if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP 2723 && dd->ipath_hol_state != IPATH_HOL_UP) { 2724 dd->ipath_hol_next = IPATH_HOL_DOWNCONT; 2725 ipath_dbg("Stopping processes\n"); 2726 ipath_hol_signal_down(dd); 2727 } else { /* may do "extra" if also in ipath_hol_up() */ 2728 dd->ipath_hol_next = IPATH_HOL_DOWNSTOP; 2729 ipath_dbg("Continuing processes\n"); 2730 ipath_hol_signal_up(dd); 2731 } 2732 if (dd->ipath_hol_state == IPATH_HOL_UP) 2733 ipath_dbg("link's up, don't resched timer\n"); 2734 else { 2735 dd->ipath_hol_timer.expires = jiffies + 2736 msecs_to_jiffies(ipath_hol_timeout_ms); 2737 mod_timer(&dd->ipath_hol_timer, 2738 dd->ipath_hol_timer.expires); 2739 } 2740} 2741 2742int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv) 2743{ 2744 u64 val; 2745 2746 if (new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK) 2747 return -1; 2748 if (dd->ipath_rx_pol_inv != new_pol_inv) { 2749 dd->ipath_rx_pol_inv = new_pol_inv; 2750 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig); 2751 val &= ~(INFINIPATH_XGXS_RX_POL_MASK << 2752 INFINIPATH_XGXS_RX_POL_SHIFT); 2753 val |= ((u64)dd->ipath_rx_pol_inv) << 2754 INFINIPATH_XGXS_RX_POL_SHIFT; 2755 ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val); 2756 } 2757 return 0; 2758} 2759 2760/* 2761 * Disable and enable the armlaunch error. Used for PIO bandwidth testing on 2762 * the 7220, which is count-based, rather than trigger-based. Safe for the 2763 * driver check, since it's at init. Not completely safe when used for 2764 * user-mode checking, since some error checking can be lost, but not 2765 * particularly risky, and only has problematic side-effects in the face of 2766 * very buggy user code. There is no reference counting, but that's also 2767 * fine, given the intended use. 2768 */ 2769void ipath_enable_armlaunch(struct ipath_devdata *dd) 2770{ 2771 dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH; 2772 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 2773 INFINIPATH_E_SPIOARMLAUNCH); 2774 dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH; 2775 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 2776 dd->ipath_errormask); 2777} 2778 2779void ipath_disable_armlaunch(struct ipath_devdata *dd) 2780{ 2781 /* so don't re-enable if already set */ 2782 dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH; 2783 dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH; 2784 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 2785 dd->ipath_errormask); 2786} 2787 2788module_init(infinipath_init); 2789module_exit(infinipath_cleanup); 2790