1/* 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <linux/pci.h> 35#include <linux/poll.h> 36#include <linux/cdev.h> 37#include <linux/swap.h> 38#include <linux/vmalloc.h> 39#include <linux/slab.h> 40#include <linux/highmem.h> 41#include <linux/io.h> 42#include <linux/jiffies.h> 43#include <linux/smp_lock.h> 44#include <asm/pgtable.h> 45 46#include "ipath_kernel.h" 47#include "ipath_common.h" 48#include "ipath_user_sdma.h" 49 50static int ipath_open(struct inode *, struct file *); 51static int ipath_close(struct inode *, struct file *); 52static ssize_t ipath_write(struct file *, const char __user *, size_t, 53 loff_t *); 54static ssize_t ipath_writev(struct kiocb *, const struct iovec *, 55 unsigned long , loff_t); 56static unsigned int ipath_poll(struct file *, struct poll_table_struct *); 57static int ipath_mmap(struct file *, struct vm_area_struct *); 58 59static const struct file_operations ipath_file_ops = { 60 .owner = THIS_MODULE, 61 .write = ipath_write, 62 .aio_write = ipath_writev, 63 .open = ipath_open, 64 .release = ipath_close, 65 .poll = ipath_poll, 66 .mmap = ipath_mmap 67}; 68 69/* 70 * Convert kernel virtual addresses to physical addresses so they don't 71 * potentially conflict with the chip addresses used as mmap offsets. 72 * It doesn't really matter what mmap offset we use as long as we can 73 * interpret it correctly. 74 */ 75static u64 cvt_kvaddr(void *p) 76{ 77 struct page *page; 78 u64 paddr = 0; 79 80 page = vmalloc_to_page(p); 81 if (page) 82 paddr = page_to_pfn(page) << PAGE_SHIFT; 83 84 return paddr; 85} 86 87static int ipath_get_base_info(struct file *fp, 88 void __user *ubase, size_t ubase_size) 89{ 90 struct ipath_portdata *pd = port_fp(fp); 91 int ret = 0; 92 struct ipath_base_info *kinfo = NULL; 93 struct ipath_devdata *dd = pd->port_dd; 94 unsigned subport_cnt; 95 int shared, master; 96 size_t sz; 97 98 subport_cnt = pd->port_subport_cnt; 99 if (!subport_cnt) { 100 shared = 0; 101 master = 0; 102 subport_cnt = 1; 103 } else { 104 shared = 1; 105 master = !subport_fp(fp); 106 } 107 108 sz = sizeof(*kinfo); 109 /* If port sharing is not requested, allow the old size structure */ 110 if (!shared) 111 sz -= 7 * sizeof(u64); 112 if (ubase_size < sz) { 113 ipath_cdbg(PROC, 114 "Base size %zu, need %zu (version mismatch?)\n", 115 ubase_size, sz); 116 ret = -EINVAL; 117 goto bail; 118 } 119 120 kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL); 121 if (kinfo == NULL) { 122 ret = -ENOMEM; 123 goto bail; 124 } 125 126 ret = dd->ipath_f_get_base_info(pd, kinfo); 127 if (ret < 0) 128 goto bail; 129 130 kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt; 131 kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize; 132 kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt; 133 kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize; 134 /* 135 * have to mmap whole thing 136 */ 137 kinfo->spi_rcv_egrbuftotlen = 138 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; 139 kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk; 140 kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen / 141 pd->port_rcvegrbuf_chunks; 142 kinfo->spi_tidcnt = dd->ipath_rcvtidcnt / subport_cnt; 143 if (master) 144 kinfo->spi_tidcnt += dd->ipath_rcvtidcnt % subport_cnt; 145 /* 146 * for this use, may be ipath_cfgports summed over all chips that 147 * are are configured and present 148 */ 149 kinfo->spi_nports = dd->ipath_cfgports; 150 /* unit (chip/board) our port is on */ 151 kinfo->spi_unit = dd->ipath_unit; 152 /* for now, only a single page */ 153 kinfo->spi_tid_maxsize = PAGE_SIZE; 154 155 /* 156 * Doing this per port, and based on the skip value, etc. This has 157 * to be the actual buffer size, since the protocol code treats it 158 * as an array. 159 * 160 * These have to be set to user addresses in the user code via mmap. 161 * These values are used on return to user code for the mmap target 162 * addresses only. For 32 bit, same 44 bit address problem, so use 163 * the physical address, not virtual. Before 2.6.11, using the 164 * page_address() macro worked, but in 2.6.11, even that returns the 165 * full 64 bit address (upper bits all 1's). So far, using the 166 * physical addresses (or chip offsets, for chip mapping) works, but 167 * no doubt some future kernel release will change that, and we'll be 168 * on to yet another method of dealing with this. 169 */ 170 kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; 171 kinfo->spi_rcvhdr_tailaddr = (u64) pd->port_rcvhdrqtailaddr_phys; 172 kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; 173 kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; 174 kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + 175 (void *) dd->ipath_statusp - 176 (void *) dd->ipath_pioavailregs_dma; 177 if (!shared) { 178 kinfo->spi_piocnt = pd->port_piocnt; 179 kinfo->spi_piobufbase = (u64) pd->port_piobufs; 180 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + 181 dd->ipath_ureg_align * pd->port_port; 182 } else if (master) { 183 kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) + 184 (pd->port_piocnt % subport_cnt); 185 /* Master's PIO buffers are after all the slave's */ 186 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 187 dd->ipath_palign * 188 (pd->port_piocnt - kinfo->spi_piocnt); 189 } else { 190 unsigned slave = subport_fp(fp) - 1; 191 192 kinfo->spi_piocnt = pd->port_piocnt / subport_cnt; 193 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 194 dd->ipath_palign * kinfo->spi_piocnt * slave; 195 } 196 197 if (shared) { 198 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + 199 dd->ipath_ureg_align * pd->port_port; 200 kinfo->spi_port_rcvegrbuf = kinfo->spi_rcv_egrbufs; 201 kinfo->spi_port_rcvhdr_base = kinfo->spi_rcvhdr_base; 202 kinfo->spi_port_rcvhdr_tailaddr = kinfo->spi_rcvhdr_tailaddr; 203 204 kinfo->__spi_uregbase = cvt_kvaddr(pd->subport_uregbase + 205 PAGE_SIZE * subport_fp(fp)); 206 207 kinfo->spi_rcvhdr_base = cvt_kvaddr(pd->subport_rcvhdr_base + 208 pd->port_rcvhdrq_size * subport_fp(fp)); 209 kinfo->spi_rcvhdr_tailaddr = 0; 210 kinfo->spi_rcv_egrbufs = cvt_kvaddr(pd->subport_rcvegrbuf + 211 pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size * 212 subport_fp(fp)); 213 214 kinfo->spi_subport_uregbase = 215 cvt_kvaddr(pd->subport_uregbase); 216 kinfo->spi_subport_rcvegrbuf = 217 cvt_kvaddr(pd->subport_rcvegrbuf); 218 kinfo->spi_subport_rcvhdr_base = 219 cvt_kvaddr(pd->subport_rcvhdr_base); 220 ipath_cdbg(PROC, "port %u flags %x %llx %llx %llx\n", 221 kinfo->spi_port, kinfo->spi_runtime_flags, 222 (unsigned long long) kinfo->spi_subport_uregbase, 223 (unsigned long long) kinfo->spi_subport_rcvegrbuf, 224 (unsigned long long) kinfo->spi_subport_rcvhdr_base); 225 } 226 227 /* 228 * All user buffers are 2KB buffers. If we ever support 229 * giving 4KB buffers to user processes, this will need some 230 * work. 231 */ 232 kinfo->spi_pioindex = (kinfo->spi_piobufbase - 233 (dd->ipath_piobufbase & 0xffffffff)) / dd->ipath_palign; 234 kinfo->spi_pioalign = dd->ipath_palign; 235 236 kinfo->spi_qpair = IPATH_KD_QP; 237 /* 238 * user mode PIO buffers are always 2KB, even when 4KB can 239 * be received, and sent via the kernel; this is ibmaxlen 240 * for 2K MTU. 241 */ 242 kinfo->spi_piosize = dd->ipath_piosize2k - 2 * sizeof(u32); 243 kinfo->spi_mtu = dd->ipath_ibmaxlen; /* maxlen, not ibmtu */ 244 kinfo->spi_port = pd->port_port; 245 kinfo->spi_subport = subport_fp(fp); 246 kinfo->spi_sw_version = IPATH_KERN_SWVERSION; 247 kinfo->spi_hw_version = dd->ipath_revision; 248 249 if (master) { 250 kinfo->spi_runtime_flags |= IPATH_RUNTIME_MASTER; 251 } 252 253 sz = (ubase_size < sizeof(*kinfo)) ? ubase_size : sizeof(*kinfo); 254 if (copy_to_user(ubase, kinfo, sz)) 255 ret = -EFAULT; 256 257bail: 258 kfree(kinfo); 259 return ret; 260} 261 262/** 263 * ipath_tid_update - update a port TID 264 * @pd: the port 265 * @fp: the ipath device file 266 * @ti: the TID information 267 * 268 * The new implementation as of Oct 2004 is that the driver assigns 269 * the tid and returns it to the caller. To make it easier to 270 * catch bugs, and to reduce search time, we keep a cursor for 271 * each port, walking the shadow tid array to find one that's not 272 * in use. 273 * 274 * For now, if we can't allocate the full list, we fail, although 275 * in the long run, we'll allocate as many as we can, and the 276 * caller will deal with that by trying the remaining pages later. 277 * That means that when we fail, we have to mark the tids as not in 278 * use again, in our shadow copy. 279 * 280 * It's up to the caller to free the tids when they are done. 281 * We'll unlock the pages as they free them. 282 * 283 * Also, right now we are locking one page at a time, but since 284 * the intended use of this routine is for a single group of 285 * virtually contiguous pages, that should change to improve 286 * performance. 287 */ 288static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp, 289 const struct ipath_tid_info *ti) 290{ 291 int ret = 0, ntids; 292 u32 tid, porttid, cnt, i, tidcnt, tidoff; 293 u16 *tidlist; 294 struct ipath_devdata *dd = pd->port_dd; 295 u64 physaddr; 296 unsigned long vaddr; 297 u64 __iomem *tidbase; 298 unsigned long tidmap[8]; 299 struct page **pagep = NULL; 300 unsigned subport = subport_fp(fp); 301 302 if (!dd->ipath_pageshadow) { 303 ret = -ENOMEM; 304 goto done; 305 } 306 307 cnt = ti->tidcnt; 308 if (!cnt) { 309 ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n", 310 (unsigned long long) ti->tidlist); 311 /* 312 * Should we treat as success? likely a bug 313 */ 314 ret = -EFAULT; 315 goto done; 316 } 317 porttid = pd->port_port * dd->ipath_rcvtidcnt; 318 if (!pd->port_subport_cnt) { 319 tidcnt = dd->ipath_rcvtidcnt; 320 tid = pd->port_tidcursor; 321 tidoff = 0; 322 } else if (!subport) { 323 tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + 324 (dd->ipath_rcvtidcnt % pd->port_subport_cnt); 325 tidoff = dd->ipath_rcvtidcnt - tidcnt; 326 porttid += tidoff; 327 tid = tidcursor_fp(fp); 328 } else { 329 tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; 330 tidoff = tidcnt * (subport - 1); 331 porttid += tidoff; 332 tid = tidcursor_fp(fp); 333 } 334 if (cnt > tidcnt) { 335 /* make sure it all fits in port_tid_pg_list */ 336 dev_info(&dd->pcidev->dev, "Process tried to allocate %u " 337 "TIDs, only trying max (%u)\n", cnt, tidcnt); 338 cnt = tidcnt; 339 } 340 pagep = &((struct page **) pd->port_tid_pg_list)[tidoff]; 341 tidlist = &((u16 *) &pagep[dd->ipath_rcvtidcnt])[tidoff]; 342 343 memset(tidmap, 0, sizeof(tidmap)); 344 /* before decrement; chip actual # */ 345 ntids = tidcnt; 346 tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) + 347 dd->ipath_rcvtidbase + 348 porttid * sizeof(*tidbase)); 349 350 ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n", 351 pd->port_port, cnt, tid, tidbase); 352 353 /* virtual address of first page in transfer */ 354 vaddr = ti->tidvaddr; 355 if (!access_ok(VERIFY_WRITE, (void __user *) vaddr, 356 cnt * PAGE_SIZE)) { 357 ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n", 358 (void *)vaddr, cnt); 359 ret = -EFAULT; 360 goto done; 361 } 362 ret = ipath_get_user_pages(vaddr, cnt, pagep); 363 if (ret) { 364 if (ret == -EBUSY) { 365 ipath_dbg("Failed to lock addr %p, %u pages " 366 "(already locked)\n", 367 (void *) vaddr, cnt); 368 /* 369 * for now, continue, and see what happens but with 370 * the new implementation, this should never happen, 371 * unless perhaps the user has mpin'ed the pages 372 * themselves (something we need to test) 373 */ 374 ret = 0; 375 } else { 376 dev_info(&dd->pcidev->dev, 377 "Failed to lock addr %p, %u pages: " 378 "errno %d\n", (void *) vaddr, cnt, -ret); 379 goto done; 380 } 381 } 382 for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) { 383 for (; ntids--; tid++) { 384 if (tid == tidcnt) 385 tid = 0; 386 if (!dd->ipath_pageshadow[porttid + tid]) 387 break; 388 } 389 if (ntids < 0) { 390 /* 391 * oops, wrapped all the way through their TIDs, 392 * and didn't have enough free; see comments at 393 * start of routine 394 */ 395 ipath_dbg("Not enough free TIDs for %u pages " 396 "(index %d), failing\n", cnt, i); 397 i--; /* last tidlist[i] not filled in */ 398 ret = -ENOMEM; 399 break; 400 } 401 tidlist[i] = tid + tidoff; 402 ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, " 403 "vaddr %lx\n", i, tid + tidoff, vaddr); 404 /* we "know" system pages and TID pages are same size */ 405 dd->ipath_pageshadow[porttid + tid] = pagep[i]; 406 dd->ipath_physshadow[porttid + tid] = ipath_map_page( 407 dd->pcidev, pagep[i], 0, PAGE_SIZE, 408 PCI_DMA_FROMDEVICE); 409 /* 410 * don't need atomic or it's overhead 411 */ 412 __set_bit(tid, tidmap); 413 physaddr = dd->ipath_physshadow[porttid + tid]; 414 ipath_stats.sps_pagelocks++; 415 ipath_cdbg(VERBOSE, 416 "TID %u, vaddr %lx, physaddr %llx pgp %p\n", 417 tid, vaddr, (unsigned long long) physaddr, 418 pagep[i]); 419 dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED, 420 physaddr); 421 /* 422 * don't check this tid in ipath_portshadow, since we 423 * just filled it in; start with the next one. 424 */ 425 tid++; 426 } 427 428 if (ret) { 429 u32 limit; 430 cleanup: 431 /* jump here if copy out of updated info failed... */ 432 ipath_dbg("After failure (ret=%d), undo %d of %d entries\n", 433 -ret, i, cnt); 434 /* same code that's in ipath_free_tid() */ 435 limit = sizeof(tidmap) * BITS_PER_BYTE; 436 if (limit > tidcnt) 437 /* just in case size changes in future */ 438 limit = tidcnt; 439 tid = find_first_bit((const unsigned long *)tidmap, limit); 440 for (; tid < limit; tid++) { 441 if (!test_bit(tid, tidmap)) 442 continue; 443 if (dd->ipath_pageshadow[porttid + tid]) { 444 ipath_cdbg(VERBOSE, "Freeing TID %u\n", 445 tid); 446 dd->ipath_f_put_tid(dd, &tidbase[tid], 447 RCVHQ_RCV_TYPE_EXPECTED, 448 dd->ipath_tidinvalid); 449 pci_unmap_page(dd->pcidev, 450 dd->ipath_physshadow[porttid + tid], 451 PAGE_SIZE, PCI_DMA_FROMDEVICE); 452 dd->ipath_pageshadow[porttid + tid] = NULL; 453 ipath_stats.sps_pageunlocks++; 454 } 455 } 456 ipath_release_user_pages(pagep, cnt); 457 } else { 458 /* 459 * Copy the updated array, with ipath_tid's filled in, back 460 * to user. Since we did the copy in already, this "should 461 * never fail" If it does, we have to clean up... 462 */ 463 if (copy_to_user((void __user *) 464 (unsigned long) ti->tidlist, 465 tidlist, cnt * sizeof(*tidlist))) { 466 ret = -EFAULT; 467 goto cleanup; 468 } 469 if (copy_to_user((void __user *) (unsigned long) ti->tidmap, 470 tidmap, sizeof tidmap)) { 471 ret = -EFAULT; 472 goto cleanup; 473 } 474 if (tid == tidcnt) 475 tid = 0; 476 if (!pd->port_subport_cnt) 477 pd->port_tidcursor = tid; 478 else 479 tidcursor_fp(fp) = tid; 480 } 481 482done: 483 if (ret) 484 ipath_dbg("Failed to map %u TID pages, failing with %d\n", 485 ti->tidcnt, -ret); 486 return ret; 487} 488 489/** 490 * ipath_tid_free - free a port TID 491 * @pd: the port 492 * @subport: the subport 493 * @ti: the TID info 494 * 495 * right now we are unlocking one page at a time, but since 496 * the intended use of this routine is for a single group of 497 * virtually contiguous pages, that should change to improve 498 * performance. We check that the TID is in range for this port 499 * but otherwise don't check validity; if user has an error and 500 * frees the wrong tid, it's only their own data that can thereby 501 * be corrupted. We do check that the TID was in use, for sanity 502 * We always use our idea of the saved address, not the address that 503 * they pass in to us. 504 */ 505 506static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport, 507 const struct ipath_tid_info *ti) 508{ 509 int ret = 0; 510 u32 tid, porttid, cnt, limit, tidcnt; 511 struct ipath_devdata *dd = pd->port_dd; 512 u64 __iomem *tidbase; 513 unsigned long tidmap[8]; 514 515 if (!dd->ipath_pageshadow) { 516 ret = -ENOMEM; 517 goto done; 518 } 519 520 if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap, 521 sizeof tidmap)) { 522 ret = -EFAULT; 523 goto done; 524 } 525 526 porttid = pd->port_port * dd->ipath_rcvtidcnt; 527 if (!pd->port_subport_cnt) 528 tidcnt = dd->ipath_rcvtidcnt; 529 else if (!subport) { 530 tidcnt = (dd->ipath_rcvtidcnt / pd->port_subport_cnt) + 531 (dd->ipath_rcvtidcnt % pd->port_subport_cnt); 532 porttid += dd->ipath_rcvtidcnt - tidcnt; 533 } else { 534 tidcnt = dd->ipath_rcvtidcnt / pd->port_subport_cnt; 535 porttid += tidcnt * (subport - 1); 536 } 537 tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) + 538 dd->ipath_rcvtidbase + 539 porttid * sizeof(*tidbase)); 540 541 limit = sizeof(tidmap) * BITS_PER_BYTE; 542 if (limit > tidcnt) 543 /* just in case size changes in future */ 544 limit = tidcnt; 545 tid = find_first_bit(tidmap, limit); 546 ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) " 547 "set is %d, porttid %u\n", pd->port_port, ti->tidcnt, 548 limit, tid, porttid); 549 for (cnt = 0; tid < limit; tid++) { 550 /* 551 * small optimization; if we detect a run of 3 or so without 552 * any set, use find_first_bit again. That's mainly to 553 * accelerate the case where we wrapped, so we have some at 554 * the beginning, and some at the end, and a big gap 555 * in the middle. 556 */ 557 if (!test_bit(tid, tidmap)) 558 continue; 559 cnt++; 560 if (dd->ipath_pageshadow[porttid + tid]) { 561 struct page *p; 562 p = dd->ipath_pageshadow[porttid + tid]; 563 dd->ipath_pageshadow[porttid + tid] = NULL; 564 ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", 565 pid_nr(pd->port_pid), tid); 566 dd->ipath_f_put_tid(dd, &tidbase[tid], 567 RCVHQ_RCV_TYPE_EXPECTED, 568 dd->ipath_tidinvalid); 569 pci_unmap_page(dd->pcidev, 570 dd->ipath_physshadow[porttid + tid], 571 PAGE_SIZE, PCI_DMA_FROMDEVICE); 572 ipath_release_user_pages(&p, 1); 573 ipath_stats.sps_pageunlocks++; 574 } else 575 ipath_dbg("Unused tid %u, ignoring\n", tid); 576 } 577 if (cnt != ti->tidcnt) 578 ipath_dbg("passed in tidcnt %d, only %d bits set in map\n", 579 ti->tidcnt, cnt); 580done: 581 if (ret) 582 ipath_dbg("Failed to unmap %u TID pages, failing with %d\n", 583 ti->tidcnt, -ret); 584 return ret; 585} 586 587/** 588 * ipath_set_part_key - set a partition key 589 * @pd: the port 590 * @key: the key 591 * 592 * We can have up to 4 active at a time (other than the default, which is 593 * always allowed). This is somewhat tricky, since multiple ports may set 594 * the same key, so we reference count them, and clean up at exit. All 4 595 * partition keys are packed into a single infinipath register. It's an 596 * error for a process to set the same pkey multiple times. We provide no 597 * mechanism to de-allocate a pkey at this time, we may eventually need to 598 * do that. I've used the atomic operations, and no locking, and only make 599 * a single pass through what's available. This should be more than 600 * adequate for some time. I'll think about spinlocks or the like if and as 601 * it's necessary. 602 */ 603static int ipath_set_part_key(struct ipath_portdata *pd, u16 key) 604{ 605 struct ipath_devdata *dd = pd->port_dd; 606 int i, any = 0, pidx = -1; 607 u16 lkey = key & 0x7FFF; 608 int ret; 609 610 if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) { 611 /* nothing to do; this key always valid */ 612 ret = 0; 613 goto bail; 614 } 615 616 ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys " 617 "%hx:%x %hx:%x %hx:%x %hx:%x\n", 618 pd->port_port, key, dd->ipath_pkeys[0], 619 atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1], 620 atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2], 621 atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3], 622 atomic_read(&dd->ipath_pkeyrefs[3])); 623 624 if (!lkey) { 625 ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n", 626 pd->port_port); 627 ret = -EINVAL; 628 goto bail; 629 } 630 631 /* 632 * Set the full membership bit, because it has to be 633 * set in the register or the packet, and it seems 634 * cleaner to set in the register than to force all 635 * callers to set it. (see bug 4331) 636 */ 637 key |= 0x8000; 638 639 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { 640 if (!pd->port_pkeys[i] && pidx == -1) 641 pidx = i; 642 if (pd->port_pkeys[i] == key) { 643 ipath_cdbg(VERBOSE, "p%u tries to set same pkey " 644 "(%x) more than once\n", 645 pd->port_port, key); 646 ret = -EEXIST; 647 goto bail; 648 } 649 } 650 if (pidx == -1) { 651 ipath_dbg("All pkeys for port %u already in use, " 652 "can't set %x\n", pd->port_port, key); 653 ret = -EBUSY; 654 goto bail; 655 } 656 for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { 657 if (!dd->ipath_pkeys[i]) { 658 any++; 659 continue; 660 } 661 if (dd->ipath_pkeys[i] == key) { 662 atomic_t *pkrefs = &dd->ipath_pkeyrefs[i]; 663 664 if (atomic_inc_return(pkrefs) > 1) { 665 pd->port_pkeys[pidx] = key; 666 ipath_cdbg(VERBOSE, "p%u set key %x " 667 "matches #%d, count now %d\n", 668 pd->port_port, key, i, 669 atomic_read(pkrefs)); 670 ret = 0; 671 goto bail; 672 } else { 673 /* 674 * lost race, decrement count, catch below 675 */ 676 atomic_dec(pkrefs); 677 ipath_cdbg(VERBOSE, "Lost race, count was " 678 "0, after dec, it's %d\n", 679 atomic_read(pkrefs)); 680 any++; 681 } 682 } 683 if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) { 684 /* 685 * It makes no sense to have both the limited and 686 * full membership PKEY set at the same time since 687 * the unlimited one will disable the limited one. 688 */ 689 ret = -EEXIST; 690 goto bail; 691 } 692 } 693 if (!any) { 694 ipath_dbg("port %u, all pkeys already in use, " 695 "can't set %x\n", pd->port_port, key); 696 ret = -EBUSY; 697 goto bail; 698 } 699 for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) { 700 if (!dd->ipath_pkeys[i] && 701 atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) { 702 u64 pkey; 703 704 /* for ipathstats, etc. */ 705 ipath_stats.sps_pkeys[i] = lkey; 706 pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key; 707 pkey = 708 (u64) dd->ipath_pkeys[0] | 709 ((u64) dd->ipath_pkeys[1] << 16) | 710 ((u64) dd->ipath_pkeys[2] << 32) | 711 ((u64) dd->ipath_pkeys[3] << 48); 712 ipath_cdbg(PROC, "p%u set key %x in #%d, " 713 "portidx %d, new pkey reg %llx\n", 714 pd->port_port, key, i, pidx, 715 (unsigned long long) pkey); 716 ipath_write_kreg( 717 dd, dd->ipath_kregs->kr_partitionkey, pkey); 718 719 ret = 0; 720 goto bail; 721 } 722 } 723 ipath_dbg("port %u, all pkeys already in use 2nd pass, " 724 "can't set %x\n", pd->port_port, key); 725 ret = -EBUSY; 726 727bail: 728 return ret; 729} 730 731/** 732 * ipath_manage_rcvq - manage a port's receive queue 733 * @pd: the port 734 * @subport: the subport 735 * @start_stop: action to carry out 736 * 737 * start_stop == 0 disables receive on the port, for use in queue 738 * overflow conditions. start_stop==1 re-enables, to be used to 739 * re-init the software copy of the head register 740 */ 741static int ipath_manage_rcvq(struct ipath_portdata *pd, unsigned subport, 742 int start_stop) 743{ 744 struct ipath_devdata *dd = pd->port_dd; 745 746 ipath_cdbg(PROC, "%sabling rcv for unit %u port %u:%u\n", 747 start_stop ? "en" : "dis", dd->ipath_unit, 748 pd->port_port, subport); 749 if (subport) 750 goto bail; 751 /* atomically clear receive enable port. */ 752 if (start_stop) { 753 /* 754 * On enable, force in-memory copy of the tail register to 755 * 0, so that protocol code doesn't have to worry about 756 * whether or not the chip has yet updated the in-memory 757 * copy or not on return from the system call. The chip 758 * always resets it's tail register back to 0 on a 759 * transition from disabled to enabled. This could cause a 760 * problem if software was broken, and did the enable w/o 761 * the disable, but eventually the in-memory copy will be 762 * updated and correct itself, even in the face of software 763 * bugs. 764 */ 765 if (pd->port_rcvhdrtail_kvaddr) 766 ipath_clear_rcvhdrtail(pd); 767 set_bit(dd->ipath_r_portenable_shift + pd->port_port, 768 &dd->ipath_rcvctrl); 769 } else 770 clear_bit(dd->ipath_r_portenable_shift + pd->port_port, 771 &dd->ipath_rcvctrl); 772 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 773 dd->ipath_rcvctrl); 774 /* now be sure chip saw it before we return */ 775 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 776 if (start_stop) { 777 /* 778 * And try to be sure that tail reg update has happened too. 779 * This should in theory interlock with the RXE changes to 780 * the tail register. Don't assign it to the tail register 781 * in memory copy, since we could overwrite an update by the 782 * chip if we did. 783 */ 784 ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); 785 } 786 /* always; new head should be equal to new tail; see above */ 787bail: 788 return 0; 789} 790 791static void ipath_clean_part_key(struct ipath_portdata *pd, 792 struct ipath_devdata *dd) 793{ 794 int i, j, pchanged = 0; 795 u64 oldpkey; 796 797 /* for debugging only */ 798 oldpkey = (u64) dd->ipath_pkeys[0] | 799 ((u64) dd->ipath_pkeys[1] << 16) | 800 ((u64) dd->ipath_pkeys[2] << 32) | 801 ((u64) dd->ipath_pkeys[3] << 48); 802 803 for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) { 804 if (!pd->port_pkeys[i]) 805 continue; 806 ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i, 807 pd->port_pkeys[i]); 808 for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) { 809 /* check for match independent of the global bit */ 810 if ((dd->ipath_pkeys[j] & 0x7fff) != 811 (pd->port_pkeys[i] & 0x7fff)) 812 continue; 813 if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) { 814 ipath_cdbg(VERBOSE, "p%u clear key " 815 "%x matches #%d\n", 816 pd->port_port, 817 pd->port_pkeys[i], j); 818 ipath_stats.sps_pkeys[j] = 819 dd->ipath_pkeys[j] = 0; 820 pchanged++; 821 } 822 else ipath_cdbg( 823 VERBOSE, "p%u key %x matches #%d, " 824 "but ref still %d\n", pd->port_port, 825 pd->port_pkeys[i], j, 826 atomic_read(&dd->ipath_pkeyrefs[j])); 827 break; 828 } 829 pd->port_pkeys[i] = 0; 830 } 831 if (pchanged) { 832 u64 pkey = (u64) dd->ipath_pkeys[0] | 833 ((u64) dd->ipath_pkeys[1] << 16) | 834 ((u64) dd->ipath_pkeys[2] << 32) | 835 ((u64) dd->ipath_pkeys[3] << 48); 836 ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, " 837 "new pkey reg %llx\n", pd->port_port, 838 (unsigned long long) oldpkey, 839 (unsigned long long) pkey); 840 ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, 841 pkey); 842 } 843} 844 845/* 846 * Initialize the port data with the receive buffer sizes 847 * so this can be done while the master port is locked. 848 * Otherwise, there is a race with a slave opening the port 849 * and seeing these fields uninitialized. 850 */ 851static void init_user_egr_sizes(struct ipath_portdata *pd) 852{ 853 struct ipath_devdata *dd = pd->port_dd; 854 unsigned egrperchunk, egrcnt, size; 855 856 /* 857 * to avoid wasting a lot of memory, we allocate 32KB chunks of 858 * physically contiguous memory, advance through it until used up 859 * and then allocate more. Of course, we need memory to store those 860 * extra pointers, now. Started out with 256KB, but under heavy 861 * memory pressure (creating large files and then copying them over 862 * NFS while doing lots of MPI jobs), we hit some allocation 863 * failures, even though we can sleep... (2.6.10) Still get 864 * failures at 64K. 32K is the lowest we can go without wasting 865 * additional memory. 866 */ 867 size = 0x8000; 868 egrperchunk = size / dd->ipath_rcvegrbufsize; 869 egrcnt = dd->ipath_rcvegrcnt; 870 pd->port_rcvegrbuf_chunks = (egrcnt + egrperchunk - 1) / egrperchunk; 871 pd->port_rcvegrbufs_perchunk = egrperchunk; 872 pd->port_rcvegrbuf_size = size; 873} 874 875/** 876 * ipath_create_user_egr - allocate eager TID buffers 877 * @pd: the port to allocate TID buffers for 878 * 879 * This routine is now quite different for user and kernel, because 880 * the kernel uses skb's, for the accelerated network performance 881 * This is the user port version 882 * 883 * Allocate the eager TID buffers and program them into infinipath 884 * They are no longer completely contiguous, we do multiple allocation 885 * calls. 886 */ 887static int ipath_create_user_egr(struct ipath_portdata *pd) 888{ 889 struct ipath_devdata *dd = pd->port_dd; 890 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; 891 size_t size; 892 int ret; 893 gfp_t gfp_flags; 894 895 /* 896 * GFP_USER, but without GFP_FS, so buffer cache can be 897 * coalesced (we hope); otherwise, even at order 4, 898 * heavy filesystem activity makes these fail, and we can 899 * use compound pages. 900 */ 901 gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP; 902 903 egrcnt = dd->ipath_rcvegrcnt; 904 /* TID number offset for this port */ 905 egroff = (pd->port_port - 1) * egrcnt + dd->ipath_p0_rcvegrcnt; 906 egrsize = dd->ipath_rcvegrbufsize; 907 ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid " 908 "offset %x, egrsize %u\n", egrcnt, egroff, egrsize); 909 910 chunk = pd->port_rcvegrbuf_chunks; 911 egrperchunk = pd->port_rcvegrbufs_perchunk; 912 size = pd->port_rcvegrbuf_size; 913 pd->port_rcvegrbuf = kmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]), 914 GFP_KERNEL); 915 if (!pd->port_rcvegrbuf) { 916 ret = -ENOMEM; 917 goto bail; 918 } 919 pd->port_rcvegrbuf_phys = 920 kmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]), 921 GFP_KERNEL); 922 if (!pd->port_rcvegrbuf_phys) { 923 ret = -ENOMEM; 924 goto bail_rcvegrbuf; 925 } 926 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { 927 928 pd->port_rcvegrbuf[e] = dma_alloc_coherent( 929 &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e], 930 gfp_flags); 931 932 if (!pd->port_rcvegrbuf[e]) { 933 ret = -ENOMEM; 934 goto bail_rcvegrbuf_phys; 935 } 936 } 937 938 pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0]; 939 940 for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) { 941 dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk]; 942 unsigned i; 943 944 for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { 945 dd->ipath_f_put_tid(dd, e + egroff + 946 (u64 __iomem *) 947 ((char __iomem *) 948 dd->ipath_kregbase + 949 dd->ipath_rcvegrbase), 950 RCVHQ_RCV_TYPE_EAGER, pa); 951 pa += egrsize; 952 } 953 cond_resched(); /* don't hog the cpu */ 954 } 955 956 ret = 0; 957 goto bail; 958 959bail_rcvegrbuf_phys: 960 for (e = 0; e < pd->port_rcvegrbuf_chunks && 961 pd->port_rcvegrbuf[e]; e++) { 962 dma_free_coherent(&dd->pcidev->dev, size, 963 pd->port_rcvegrbuf[e], 964 pd->port_rcvegrbuf_phys[e]); 965 966 } 967 kfree(pd->port_rcvegrbuf_phys); 968 pd->port_rcvegrbuf_phys = NULL; 969bail_rcvegrbuf: 970 kfree(pd->port_rcvegrbuf); 971 pd->port_rcvegrbuf = NULL; 972bail: 973 return ret; 974} 975 976 977/* common code for the mappings on dma_alloc_coherent mem */ 978static int ipath_mmap_mem(struct vm_area_struct *vma, 979 struct ipath_portdata *pd, unsigned len, int write_ok, 980 void *kvaddr, char *what) 981{ 982 struct ipath_devdata *dd = pd->port_dd; 983 unsigned long pfn; 984 int ret; 985 986 if ((vma->vm_end - vma->vm_start) > len) { 987 dev_info(&dd->pcidev->dev, 988 "FAIL on %s: len %lx > %x\n", what, 989 vma->vm_end - vma->vm_start, len); 990 ret = -EFAULT; 991 goto bail; 992 } 993 994 if (!write_ok) { 995 if (vma->vm_flags & VM_WRITE) { 996 dev_info(&dd->pcidev->dev, 997 "%s must be mapped readonly\n", what); 998 ret = -EPERM; 999 goto bail; 1000 } 1001 1002 /* don't allow them to later change with mprotect */ 1003 vma->vm_flags &= ~VM_MAYWRITE; 1004 } 1005 1006 pfn = virt_to_phys(kvaddr) >> PAGE_SHIFT; 1007 ret = remap_pfn_range(vma, vma->vm_start, pfn, 1008 len, vma->vm_page_prot); 1009 if (ret) 1010 dev_info(&dd->pcidev->dev, "%s port%u mmap of %lx, %x " 1011 "bytes r%c failed: %d\n", what, pd->port_port, 1012 pfn, len, write_ok?'w':'o', ret); 1013 else 1014 ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes " 1015 "r%c\n", what, pd->port_port, pfn, len, 1016 write_ok?'w':'o'); 1017bail: 1018 return ret; 1019} 1020 1021static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, 1022 u64 ureg) 1023{ 1024 unsigned long phys; 1025 int ret; 1026 1027 /* 1028 * This is real hardware, so use io_remap. This is the mechanism 1029 * for the user process to update the head registers for their port 1030 * in the chip. 1031 */ 1032 if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { 1033 dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen " 1034 "%lx > PAGE\n", vma->vm_end - vma->vm_start); 1035 ret = -EFAULT; 1036 } else { 1037 phys = dd->ipath_physaddr + ureg; 1038 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1039 1040 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; 1041 ret = io_remap_pfn_range(vma, vma->vm_start, 1042 phys >> PAGE_SHIFT, 1043 vma->vm_end - vma->vm_start, 1044 vma->vm_page_prot); 1045 } 1046 return ret; 1047} 1048 1049static int mmap_piobufs(struct vm_area_struct *vma, 1050 struct ipath_devdata *dd, 1051 struct ipath_portdata *pd, 1052 unsigned piobufs, unsigned piocnt) 1053{ 1054 unsigned long phys; 1055 int ret; 1056 1057 /* 1058 * When we map the PIO buffers in the chip, we want to map them as 1059 * writeonly, no read possible. This prevents access to previous 1060 * process data, and catches users who might try to read the i/o 1061 * space due to a bug. 1062 */ 1063 if ((vma->vm_end - vma->vm_start) > (piocnt * dd->ipath_palign)) { 1064 dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " 1065 "reqlen %lx > PAGE\n", 1066 vma->vm_end - vma->vm_start); 1067 ret = -EINVAL; 1068 goto bail; 1069 } 1070 1071 phys = dd->ipath_physaddr + piobufs; 1072 1073#if defined(__powerpc__) 1074 /* There isn't a generic way to specify writethrough mappings */ 1075 pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; 1076 pgprot_val(vma->vm_page_prot) |= _PAGE_WRITETHRU; 1077 pgprot_val(vma->vm_page_prot) &= ~_PAGE_GUARDED; 1078#endif 1079 1080 /* 1081 * don't allow them to later change to readable with mprotect (for when 1082 * not initially mapped readable, as is normally the case) 1083 */ 1084 vma->vm_flags &= ~VM_MAYREAD; 1085 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; 1086 1087 ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, 1088 vma->vm_end - vma->vm_start, 1089 vma->vm_page_prot); 1090bail: 1091 return ret; 1092} 1093 1094static int mmap_rcvegrbufs(struct vm_area_struct *vma, 1095 struct ipath_portdata *pd) 1096{ 1097 struct ipath_devdata *dd = pd->port_dd; 1098 unsigned long start, size; 1099 size_t total_size, i; 1100 unsigned long pfn; 1101 int ret; 1102 1103 size = pd->port_rcvegrbuf_size; 1104 total_size = pd->port_rcvegrbuf_chunks * size; 1105 if ((vma->vm_end - vma->vm_start) > total_size) { 1106 dev_info(&dd->pcidev->dev, "FAIL on egr bufs: " 1107 "reqlen %lx > actual %lx\n", 1108 vma->vm_end - vma->vm_start, 1109 (unsigned long) total_size); 1110 ret = -EINVAL; 1111 goto bail; 1112 } 1113 1114 if (vma->vm_flags & VM_WRITE) { 1115 dev_info(&dd->pcidev->dev, "Can't map eager buffers as " 1116 "writable (flags=%lx)\n", vma->vm_flags); 1117 ret = -EPERM; 1118 goto bail; 1119 } 1120 /* don't allow them to later change to writeable with mprotect */ 1121 vma->vm_flags &= ~VM_MAYWRITE; 1122 1123 start = vma->vm_start; 1124 1125 for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { 1126 pfn = virt_to_phys(pd->port_rcvegrbuf[i]) >> PAGE_SHIFT; 1127 ret = remap_pfn_range(vma, start, pfn, size, 1128 vma->vm_page_prot); 1129 if (ret < 0) 1130 goto bail; 1131 } 1132 ret = 0; 1133 1134bail: 1135 return ret; 1136} 1137 1138/* 1139 * ipath_file_vma_fault - handle a VMA page fault. 1140 */ 1141static int ipath_file_vma_fault(struct vm_area_struct *vma, 1142 struct vm_fault *vmf) 1143{ 1144 struct page *page; 1145 1146 page = vmalloc_to_page((void *)(vmf->pgoff << PAGE_SHIFT)); 1147 if (!page) 1148 return VM_FAULT_SIGBUS; 1149 get_page(page); 1150 vmf->page = page; 1151 1152 return 0; 1153} 1154 1155static const struct vm_operations_struct ipath_file_vm_ops = { 1156 .fault = ipath_file_vma_fault, 1157}; 1158 1159static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr, 1160 struct ipath_portdata *pd, unsigned subport) 1161{ 1162 unsigned long len; 1163 struct ipath_devdata *dd; 1164 void *addr; 1165 size_t size; 1166 int ret = 0; 1167 1168 /* If the port is not shared, all addresses should be physical */ 1169 if (!pd->port_subport_cnt) 1170 goto bail; 1171 1172 dd = pd->port_dd; 1173 size = pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size; 1174 1175 /* 1176 * Each process has all the subport uregbase, rcvhdrq, and 1177 * rcvegrbufs mmapped - as an array for all the processes, 1178 * and also separately for this process. 1179 */ 1180 if (pgaddr == cvt_kvaddr(pd->subport_uregbase)) { 1181 addr = pd->subport_uregbase; 1182 size = PAGE_SIZE * pd->port_subport_cnt; 1183 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base)) { 1184 addr = pd->subport_rcvhdr_base; 1185 size = pd->port_rcvhdrq_size * pd->port_subport_cnt; 1186 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf)) { 1187 addr = pd->subport_rcvegrbuf; 1188 size *= pd->port_subport_cnt; 1189 } else if (pgaddr == cvt_kvaddr(pd->subport_uregbase + 1190 PAGE_SIZE * subport)) { 1191 addr = pd->subport_uregbase + PAGE_SIZE * subport; 1192 size = PAGE_SIZE; 1193 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvhdr_base + 1194 pd->port_rcvhdrq_size * subport)) { 1195 addr = pd->subport_rcvhdr_base + 1196 pd->port_rcvhdrq_size * subport; 1197 size = pd->port_rcvhdrq_size; 1198 } else if (pgaddr == cvt_kvaddr(pd->subport_rcvegrbuf + 1199 size * subport)) { 1200 addr = pd->subport_rcvegrbuf + size * subport; 1201 /* rcvegrbufs are read-only on the slave */ 1202 if (vma->vm_flags & VM_WRITE) { 1203 dev_info(&dd->pcidev->dev, 1204 "Can't map eager buffers as " 1205 "writable (flags=%lx)\n", vma->vm_flags); 1206 ret = -EPERM; 1207 goto bail; 1208 } 1209 /* 1210 * Don't allow permission to later change to writeable 1211 * with mprotect. 1212 */ 1213 vma->vm_flags &= ~VM_MAYWRITE; 1214 } else { 1215 goto bail; 1216 } 1217 len = vma->vm_end - vma->vm_start; 1218 if (len > size) { 1219 ipath_cdbg(MM, "FAIL: reqlen %lx > %zx\n", len, size); 1220 ret = -EINVAL; 1221 goto bail; 1222 } 1223 1224 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; 1225 vma->vm_ops = &ipath_file_vm_ops; 1226 vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; 1227 ret = 1; 1228 1229bail: 1230 return ret; 1231} 1232 1233/** 1234 * ipath_mmap - mmap various structures into user space 1235 * @fp: the file pointer 1236 * @vma: the VM area 1237 * 1238 * We use this to have a shared buffer between the kernel and the user code 1239 * for the rcvhdr queue, egr buffers, and the per-port user regs and pio 1240 * buffers in the chip. We have the open and close entries so we can bump 1241 * the ref count and keep the driver from being unloaded while still mapped. 1242 */ 1243static int ipath_mmap(struct file *fp, struct vm_area_struct *vma) 1244{ 1245 struct ipath_portdata *pd; 1246 struct ipath_devdata *dd; 1247 u64 pgaddr, ureg; 1248 unsigned piobufs, piocnt; 1249 int ret; 1250 1251 pd = port_fp(fp); 1252 if (!pd) { 1253 ret = -EINVAL; 1254 goto bail; 1255 } 1256 dd = pd->port_dd; 1257 1258 /* 1259 * This is the ipath_do_user_init() code, mapping the shared buffers 1260 * into the user process. The address referred to by vm_pgoff is the 1261 * file offset passed via mmap(). For shared ports, this is the 1262 * kernel vmalloc() address of the pages to share with the master. 1263 * For non-shared or master ports, this is a physical address. 1264 * We only do one mmap for each space mapped. 1265 */ 1266 pgaddr = vma->vm_pgoff << PAGE_SHIFT; 1267 1268 /* 1269 * Check for 0 in case one of the allocations failed, but user 1270 * called mmap anyway. 1271 */ 1272 if (!pgaddr) { 1273 ret = -EINVAL; 1274 goto bail; 1275 } 1276 1277 ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u:%u\n", 1278 (unsigned long long) pgaddr, vma->vm_start, 1279 vma->vm_end - vma->vm_start, dd->ipath_unit, 1280 pd->port_port, subport_fp(fp)); 1281 1282 /* 1283 * Physical addresses must fit in 40 bits for our hardware. 1284 * Check for kernel virtual addresses first, anything else must 1285 * match a HW or memory address. 1286 */ 1287 ret = mmap_kvaddr(vma, pgaddr, pd, subport_fp(fp)); 1288 if (ret) { 1289 if (ret > 0) 1290 ret = 0; 1291 goto bail; 1292 } 1293 1294 ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; 1295 if (!pd->port_subport_cnt) { 1296 /* port is not shared */ 1297 piocnt = pd->port_piocnt; 1298 piobufs = pd->port_piobufs; 1299 } else if (!subport_fp(fp)) { 1300 /* caller is the master */ 1301 piocnt = (pd->port_piocnt / pd->port_subport_cnt) + 1302 (pd->port_piocnt % pd->port_subport_cnt); 1303 piobufs = pd->port_piobufs + 1304 dd->ipath_palign * (pd->port_piocnt - piocnt); 1305 } else { 1306 unsigned slave = subport_fp(fp) - 1; 1307 1308 /* caller is a slave */ 1309 piocnt = pd->port_piocnt / pd->port_subport_cnt; 1310 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; 1311 } 1312 1313 if (pgaddr == ureg) 1314 ret = mmap_ureg(vma, dd, ureg); 1315 else if (pgaddr == piobufs) 1316 ret = mmap_piobufs(vma, dd, pd, piobufs, piocnt); 1317 else if (pgaddr == dd->ipath_pioavailregs_phys) 1318 /* in-memory copy of pioavail registers */ 1319 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, 1320 (void *) dd->ipath_pioavailregs_dma, 1321 "pioavail registers"); 1322 else if (pgaddr == pd->port_rcvegr_phys) 1323 ret = mmap_rcvegrbufs(vma, pd); 1324 else if (pgaddr == (u64) pd->port_rcvhdrq_phys) 1325 /* 1326 * The rcvhdrq itself; readonly except on HT (so have 1327 * to allow writable mapping), multiple pages, contiguous 1328 * from an i/o perspective. 1329 */ 1330 ret = ipath_mmap_mem(vma, pd, pd->port_rcvhdrq_size, 1, 1331 pd->port_rcvhdrq, 1332 "rcvhdrq"); 1333 else if (pgaddr == (u64) pd->port_rcvhdrqtailaddr_phys) 1334 /* in-memory copy of rcvhdrq tail register */ 1335 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0, 1336 pd->port_rcvhdrtail_kvaddr, 1337 "rcvhdrq tail"); 1338 else 1339 ret = -EINVAL; 1340 1341 vma->vm_private_data = NULL; 1342 1343 if (ret < 0) 1344 dev_info(&dd->pcidev->dev, 1345 "Failure %d on off %llx len %lx\n", 1346 -ret, (unsigned long long)pgaddr, 1347 vma->vm_end - vma->vm_start); 1348bail: 1349 return ret; 1350} 1351 1352static unsigned ipath_poll_hdrqfull(struct ipath_portdata *pd) 1353{ 1354 unsigned pollflag = 0; 1355 1356 if ((pd->poll_type & IPATH_POLL_TYPE_OVERFLOW) && 1357 pd->port_hdrqfull != pd->port_hdrqfull_poll) { 1358 pollflag |= POLLIN | POLLRDNORM; 1359 pd->port_hdrqfull_poll = pd->port_hdrqfull; 1360 } 1361 1362 return pollflag; 1363} 1364 1365static unsigned int ipath_poll_urgent(struct ipath_portdata *pd, 1366 struct file *fp, 1367 struct poll_table_struct *pt) 1368{ 1369 unsigned pollflag = 0; 1370 struct ipath_devdata *dd; 1371 1372 dd = pd->port_dd; 1373 1374 /* variable access in ipath_poll_hdrqfull() needs this */ 1375 rmb(); 1376 pollflag = ipath_poll_hdrqfull(pd); 1377 1378 if (pd->port_urgent != pd->port_urgent_poll) { 1379 pollflag |= POLLIN | POLLRDNORM; 1380 pd->port_urgent_poll = pd->port_urgent; 1381 } 1382 1383 if (!pollflag) { 1384 /* this saves a spin_lock/unlock in interrupt handler... */ 1385 set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag); 1386 /* flush waiting flag so don't miss an event... */ 1387 wmb(); 1388 poll_wait(fp, &pd->port_wait, pt); 1389 } 1390 1391 return pollflag; 1392} 1393 1394static unsigned int ipath_poll_next(struct ipath_portdata *pd, 1395 struct file *fp, 1396 struct poll_table_struct *pt) 1397{ 1398 u32 head; 1399 u32 tail; 1400 unsigned pollflag = 0; 1401 struct ipath_devdata *dd; 1402 1403 dd = pd->port_dd; 1404 1405 /* variable access in ipath_poll_hdrqfull() needs this */ 1406 rmb(); 1407 pollflag = ipath_poll_hdrqfull(pd); 1408 1409 head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port); 1410 if (pd->port_rcvhdrtail_kvaddr) 1411 tail = ipath_get_rcvhdrtail(pd); 1412 else 1413 tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port); 1414 1415 if (head != tail) 1416 pollflag |= POLLIN | POLLRDNORM; 1417 else { 1418 /* this saves a spin_lock/unlock in interrupt handler */ 1419 set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag); 1420 /* flush waiting flag so we don't miss an event */ 1421 wmb(); 1422 1423 set_bit(pd->port_port + dd->ipath_r_intravail_shift, 1424 &dd->ipath_rcvctrl); 1425 1426 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1427 dd->ipath_rcvctrl); 1428 1429 if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */ 1430 ipath_write_ureg(dd, ur_rcvhdrhead, 1431 dd->ipath_rhdrhead_intr_off | head, 1432 pd->port_port); 1433 1434 poll_wait(fp, &pd->port_wait, pt); 1435 } 1436 1437 return pollflag; 1438} 1439 1440static unsigned int ipath_poll(struct file *fp, 1441 struct poll_table_struct *pt) 1442{ 1443 struct ipath_portdata *pd; 1444 unsigned pollflag; 1445 1446 pd = port_fp(fp); 1447 if (!pd) 1448 pollflag = 0; 1449 else if (pd->poll_type & IPATH_POLL_TYPE_URGENT) 1450 pollflag = ipath_poll_urgent(pd, fp, pt); 1451 else 1452 pollflag = ipath_poll_next(pd, fp, pt); 1453 1454 return pollflag; 1455} 1456 1457static int ipath_supports_subports(int user_swmajor, int user_swminor) 1458{ 1459 /* no subport implementation prior to software version 1.3 */ 1460 return (user_swmajor > 1) || (user_swminor >= 3); 1461} 1462 1463static int ipath_compatible_subports(int user_swmajor, int user_swminor) 1464{ 1465 /* this code is written long-hand for clarity */ 1466 if (IPATH_USER_SWMAJOR != user_swmajor) { 1467 /* no promise of compatibility if major mismatch */ 1468 return 0; 1469 } 1470 if (IPATH_USER_SWMAJOR == 1) { 1471 switch (IPATH_USER_SWMINOR) { 1472 case 0: 1473 case 1: 1474 case 2: 1475 /* no subport implementation so cannot be compatible */ 1476 return 0; 1477 case 3: 1478 /* 3 is only compatible with itself */ 1479 return user_swminor == 3; 1480 default: 1481 /* >= 4 are compatible (or are expected to be) */ 1482 return user_swminor >= 4; 1483 } 1484 } 1485 /* make no promises yet for future major versions */ 1486 return 0; 1487} 1488 1489static int init_subports(struct ipath_devdata *dd, 1490 struct ipath_portdata *pd, 1491 const struct ipath_user_info *uinfo) 1492{ 1493 int ret = 0; 1494 unsigned num_subports; 1495 size_t size; 1496 1497 /* 1498 * If the user is requesting zero subports, 1499 * skip the subport allocation. 1500 */ 1501 if (uinfo->spu_subport_cnt <= 0) 1502 goto bail; 1503 1504 /* Self-consistency check for ipath_compatible_subports() */ 1505 if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) && 1506 !ipath_compatible_subports(IPATH_USER_SWMAJOR, 1507 IPATH_USER_SWMINOR)) { 1508 dev_info(&dd->pcidev->dev, 1509 "Inconsistent ipath_compatible_subports()\n"); 1510 goto bail; 1511 } 1512 1513 /* Check for subport compatibility */ 1514 if (!ipath_compatible_subports(uinfo->spu_userversion >> 16, 1515 uinfo->spu_userversion & 0xffff)) { 1516 dev_info(&dd->pcidev->dev, 1517 "Mismatched user version (%d.%d) and driver " 1518 "version (%d.%d) while port sharing. Ensure " 1519 "that driver and library are from the same " 1520 "release.\n", 1521 (int) (uinfo->spu_userversion >> 16), 1522 (int) (uinfo->spu_userversion & 0xffff), 1523 IPATH_USER_SWMAJOR, 1524 IPATH_USER_SWMINOR); 1525 goto bail; 1526 } 1527 if (uinfo->spu_subport_cnt > INFINIPATH_MAX_SUBPORT) { 1528 ret = -EINVAL; 1529 goto bail; 1530 } 1531 1532 num_subports = uinfo->spu_subport_cnt; 1533 pd->subport_uregbase = vmalloc(PAGE_SIZE * num_subports); 1534 if (!pd->subport_uregbase) { 1535 ret = -ENOMEM; 1536 goto bail; 1537 } 1538 /* Note: pd->port_rcvhdrq_size isn't initialized yet. */ 1539 size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize * 1540 sizeof(u32), PAGE_SIZE) * num_subports; 1541 pd->subport_rcvhdr_base = vmalloc(size); 1542 if (!pd->subport_rcvhdr_base) { 1543 ret = -ENOMEM; 1544 goto bail_ureg; 1545 } 1546 1547 pd->subport_rcvegrbuf = vmalloc(pd->port_rcvegrbuf_chunks * 1548 pd->port_rcvegrbuf_size * 1549 num_subports); 1550 if (!pd->subport_rcvegrbuf) { 1551 ret = -ENOMEM; 1552 goto bail_rhdr; 1553 } 1554 1555 pd->port_subport_cnt = uinfo->spu_subport_cnt; 1556 pd->port_subport_id = uinfo->spu_subport_id; 1557 pd->active_slaves = 1; 1558 set_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); 1559 memset(pd->subport_uregbase, 0, PAGE_SIZE * num_subports); 1560 memset(pd->subport_rcvhdr_base, 0, size); 1561 memset(pd->subport_rcvegrbuf, 0, pd->port_rcvegrbuf_chunks * 1562 pd->port_rcvegrbuf_size * 1563 num_subports); 1564 goto bail; 1565 1566bail_rhdr: 1567 vfree(pd->subport_rcvhdr_base); 1568bail_ureg: 1569 vfree(pd->subport_uregbase); 1570 pd->subport_uregbase = NULL; 1571bail: 1572 return ret; 1573} 1574 1575static int try_alloc_port(struct ipath_devdata *dd, int port, 1576 struct file *fp, 1577 const struct ipath_user_info *uinfo) 1578{ 1579 struct ipath_portdata *pd; 1580 int ret; 1581 1582 if (!(pd = dd->ipath_pd[port])) { 1583 void *ptmp; 1584 1585 pd = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL); 1586 1587 /* 1588 * Allocate memory for use in ipath_tid_update() just once 1589 * at open, not per call. Reduces cost of expected send 1590 * setup. 1591 */ 1592 ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) + 1593 dd->ipath_rcvtidcnt * sizeof(struct page **), 1594 GFP_KERNEL); 1595 if (!pd || !ptmp) { 1596 ipath_dev_err(dd, "Unable to allocate portdata " 1597 "memory, failing open\n"); 1598 ret = -ENOMEM; 1599 kfree(pd); 1600 kfree(ptmp); 1601 goto bail; 1602 } 1603 dd->ipath_pd[port] = pd; 1604 dd->ipath_pd[port]->port_port = port; 1605 dd->ipath_pd[port]->port_dd = dd; 1606 dd->ipath_pd[port]->port_tid_pg_list = ptmp; 1607 init_waitqueue_head(&dd->ipath_pd[port]->port_wait); 1608 } 1609 if (!pd->port_cnt) { 1610 pd->userversion = uinfo->spu_userversion; 1611 init_user_egr_sizes(pd); 1612 if ((ret = init_subports(dd, pd, uinfo)) != 0) 1613 goto bail; 1614 ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n", 1615 current->comm, current->pid, dd->ipath_unit, 1616 port); 1617 pd->port_cnt = 1; 1618 port_fp(fp) = pd; 1619 pd->port_pid = get_pid(task_pid(current)); 1620 strlcpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); 1621 ipath_stats.sps_ports++; 1622 ret = 0; 1623 } else 1624 ret = -EBUSY; 1625 1626bail: 1627 return ret; 1628} 1629 1630static inline int usable(struct ipath_devdata *dd) 1631{ 1632 return dd && 1633 (dd->ipath_flags & IPATH_PRESENT) && 1634 dd->ipath_kregbase && 1635 dd->ipath_lid && 1636 !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED 1637 | IPATH_LINKUNK)); 1638} 1639 1640static int find_free_port(int unit, struct file *fp, 1641 const struct ipath_user_info *uinfo) 1642{ 1643 struct ipath_devdata *dd = ipath_lookup(unit); 1644 int ret, i; 1645 1646 if (!dd) { 1647 ret = -ENODEV; 1648 goto bail; 1649 } 1650 1651 if (!usable(dd)) { 1652 ret = -ENETDOWN; 1653 goto bail; 1654 } 1655 1656 for (i = 1; i < dd->ipath_cfgports; i++) { 1657 ret = try_alloc_port(dd, i, fp, uinfo); 1658 if (ret != -EBUSY) 1659 goto bail; 1660 } 1661 ret = -EBUSY; 1662 1663bail: 1664 return ret; 1665} 1666 1667static int find_best_unit(struct file *fp, 1668 const struct ipath_user_info *uinfo) 1669{ 1670 int ret = 0, i, prefunit = -1, devmax; 1671 int maxofallports, npresent, nup; 1672 int ndev; 1673 1674 devmax = ipath_count_units(&npresent, &nup, &maxofallports); 1675 1676 /* 1677 * This code is present to allow a knowledgeable person to 1678 * specify the layout of processes to processors before opening 1679 * this driver, and then we'll assign the process to the "closest" 1680 * InfiniPath chip to that processor (we assume reasonable connectivity, 1681 * for now). This code assumes that if affinity has been set 1682 * before this point, that at most one cpu is set; for now this 1683 * is reasonable. I check for both cpumask_empty() and cpumask_full(), 1684 * in case some kernel variant sets none of the bits when no 1685 * affinity is set. 2.6.11 and 12 kernels have all present 1686 * cpus set. Some day we'll have to fix it up further to handle 1687 * a cpu subset. This algorithm fails for two HT chips connected 1688 * in tunnel fashion. Eventually this needs real topology 1689 * information. There may be some issues with dual core numbering 1690 * as well. This needs more work prior to release. 1691 */ 1692 if (!cpumask_empty(¤t->cpus_allowed) && 1693 !cpumask_full(¤t->cpus_allowed)) { 1694 int ncpus = num_online_cpus(), curcpu = -1, nset = 0; 1695 for (i = 0; i < ncpus; i++) 1696 if (cpumask_test_cpu(i, ¤t->cpus_allowed)) { 1697 ipath_cdbg(PROC, "%s[%u] affinity set for " 1698 "cpu %d/%d\n", current->comm, 1699 current->pid, i, ncpus); 1700 curcpu = i; 1701 nset++; 1702 } 1703 if (curcpu != -1 && nset != ncpus) { 1704 if (npresent) { 1705 prefunit = curcpu / (ncpus / npresent); 1706 ipath_cdbg(PROC,"%s[%u] %d chips, %d cpus, " 1707 "%d cpus/chip, select unit %d\n", 1708 current->comm, current->pid, 1709 npresent, ncpus, ncpus / npresent, 1710 prefunit); 1711 } 1712 } 1713 } 1714 1715 /* 1716 * user ports start at 1, kernel port is 0 1717 * For now, we do round-robin access across all chips 1718 */ 1719 1720 if (prefunit != -1) 1721 devmax = prefunit + 1; 1722recheck: 1723 for (i = 1; i < maxofallports; i++) { 1724 for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax; 1725 ndev++) { 1726 struct ipath_devdata *dd = ipath_lookup(ndev); 1727 1728 if (!usable(dd)) 1729 continue; /* can't use this unit */ 1730 if (i >= dd->ipath_cfgports) 1731 /* 1732 * Maxed out on users of this unit. Try 1733 * next. 1734 */ 1735 continue; 1736 ret = try_alloc_port(dd, i, fp, uinfo); 1737 if (!ret) 1738 goto done; 1739 } 1740 } 1741 1742 if (npresent) { 1743 if (nup == 0) { 1744 ret = -ENETDOWN; 1745 ipath_dbg("No ports available (none initialized " 1746 "and ready)\n"); 1747 } else { 1748 if (prefunit > 0) { 1749 /* if started above 0, retry from 0 */ 1750 ipath_cdbg(PROC, 1751 "%s[%u] no ports on prefunit " 1752 "%d, clear and re-check\n", 1753 current->comm, current->pid, 1754 prefunit); 1755 devmax = ipath_count_units(NULL, NULL, 1756 NULL); 1757 prefunit = -1; 1758 goto recheck; 1759 } 1760 ret = -EBUSY; 1761 ipath_dbg("No ports available\n"); 1762 } 1763 } else { 1764 ret = -ENXIO; 1765 ipath_dbg("No boards found\n"); 1766 } 1767 1768done: 1769 return ret; 1770} 1771 1772static int find_shared_port(struct file *fp, 1773 const struct ipath_user_info *uinfo) 1774{ 1775 int devmax, ndev, i; 1776 int ret = 0; 1777 1778 devmax = ipath_count_units(NULL, NULL, NULL); 1779 1780 for (ndev = 0; ndev < devmax; ndev++) { 1781 struct ipath_devdata *dd = ipath_lookup(ndev); 1782 1783 if (!usable(dd)) 1784 continue; 1785 for (i = 1; i < dd->ipath_cfgports; i++) { 1786 struct ipath_portdata *pd = dd->ipath_pd[i]; 1787 1788 /* Skip ports which are not yet open */ 1789 if (!pd || !pd->port_cnt) 1790 continue; 1791 /* Skip port if it doesn't match the requested one */ 1792 if (pd->port_subport_id != uinfo->spu_subport_id) 1793 continue; 1794 /* Verify the sharing process matches the master */ 1795 if (pd->port_subport_cnt != uinfo->spu_subport_cnt || 1796 pd->userversion != uinfo->spu_userversion || 1797 pd->port_cnt >= pd->port_subport_cnt) { 1798 ret = -EINVAL; 1799 goto done; 1800 } 1801 port_fp(fp) = pd; 1802 subport_fp(fp) = pd->port_cnt++; 1803 pd->port_subpid[subport_fp(fp)] = 1804 get_pid(task_pid(current)); 1805 tidcursor_fp(fp) = 0; 1806 pd->active_slaves |= 1 << subport_fp(fp); 1807 ipath_cdbg(PROC, 1808 "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", 1809 current->comm, current->pid, 1810 subport_fp(fp), 1811 pd->port_comm, pid_nr(pd->port_pid), 1812 dd->ipath_unit, pd->port_port); 1813 ret = 1; 1814 goto done; 1815 } 1816 } 1817 1818done: 1819 return ret; 1820} 1821 1822static int ipath_open(struct inode *in, struct file *fp) 1823{ 1824 /* The real work is performed later in ipath_assign_port() */ 1825 fp->private_data = kzalloc(sizeof(struct ipath_filedata), GFP_KERNEL); 1826 return fp->private_data ? 0 : -ENOMEM; 1827} 1828 1829/* Get port early, so can set affinity prior to memory allocation */ 1830static int ipath_assign_port(struct file *fp, 1831 const struct ipath_user_info *uinfo) 1832{ 1833 int ret; 1834 int i_minor; 1835 unsigned swmajor, swminor; 1836 1837 /* Check to be sure we haven't already initialized this file */ 1838 if (port_fp(fp)) { 1839 ret = -EINVAL; 1840 goto done; 1841 } 1842 1843 /* for now, if major version is different, bail */ 1844 swmajor = uinfo->spu_userversion >> 16; 1845 if (swmajor != IPATH_USER_SWMAJOR) { 1846 ipath_dbg("User major version %d not same as driver " 1847 "major %d\n", uinfo->spu_userversion >> 16, 1848 IPATH_USER_SWMAJOR); 1849 ret = -ENODEV; 1850 goto done; 1851 } 1852 1853 swminor = uinfo->spu_userversion & 0xffff; 1854 if (swminor != IPATH_USER_SWMINOR) 1855 ipath_dbg("User minor version %d not same as driver " 1856 "minor %d\n", swminor, IPATH_USER_SWMINOR); 1857 1858 mutex_lock(&ipath_mutex); 1859 1860 if (ipath_compatible_subports(swmajor, swminor) && 1861 uinfo->spu_subport_cnt && 1862 (ret = find_shared_port(fp, uinfo))) { 1863 if (ret > 0) 1864 ret = 0; 1865 goto done_chk_sdma; 1866 } 1867 1868 i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE; 1869 ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n", 1870 (long)fp->f_path.dentry->d_inode->i_rdev, i_minor); 1871 1872 if (i_minor) 1873 ret = find_free_port(i_minor - 1, fp, uinfo); 1874 else 1875 ret = find_best_unit(fp, uinfo); 1876 1877done_chk_sdma: 1878 if (!ret) { 1879 struct ipath_filedata *fd = fp->private_data; 1880 const struct ipath_portdata *pd = fd->pd; 1881 const struct ipath_devdata *dd = pd->port_dd; 1882 1883 fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev, 1884 dd->ipath_unit, 1885 pd->port_port, 1886 fd->subport); 1887 1888 if (!fd->pq) 1889 ret = -ENOMEM; 1890 } 1891 1892 mutex_unlock(&ipath_mutex); 1893 1894done: 1895 return ret; 1896} 1897 1898 1899static int ipath_do_user_init(struct file *fp, 1900 const struct ipath_user_info *uinfo) 1901{ 1902 int ret; 1903 struct ipath_portdata *pd = port_fp(fp); 1904 struct ipath_devdata *dd; 1905 u32 head32; 1906 1907 /* Subports don't need to initialize anything since master did it. */ 1908 if (subport_fp(fp)) { 1909 ret = wait_event_interruptible(pd->port_wait, 1910 !test_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag)); 1911 goto done; 1912 } 1913 1914 dd = pd->port_dd; 1915 1916 if (uinfo->spu_rcvhdrsize) { 1917 ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize); 1918 if (ret) 1919 goto done; 1920 } 1921 1922 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ 1923 1924 /* some ports may get extra buffers, calculate that here */ 1925 if (pd->port_port <= dd->ipath_ports_extrabuf) 1926 pd->port_piocnt = dd->ipath_pbufsport + 1; 1927 else 1928 pd->port_piocnt = dd->ipath_pbufsport; 1929 1930 /* for right now, kernel piobufs are at end, so port 1 is at 0 */ 1931 if (pd->port_port <= dd->ipath_ports_extrabuf) 1932 pd->port_pio_base = (dd->ipath_pbufsport + 1) 1933 * (pd->port_port - 1); 1934 else 1935 pd->port_pio_base = dd->ipath_ports_extrabuf + 1936 dd->ipath_pbufsport * (pd->port_port - 1); 1937 pd->port_piobufs = dd->ipath_piobufbase + 1938 pd->port_pio_base * dd->ipath_palign; 1939 ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u," 1940 " first pio %u\n", pd->port_port, pd->port_piobufs, 1941 pd->port_piocnt, pd->port_pio_base); 1942 ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0); 1943 1944 /* 1945 * Now allocate the rcvhdr Q and eager TIDs; skip the TID 1946 * array for time being. If pd->port_port > chip-supported, 1947 * we need to do extra stuff here to handle by handling overflow 1948 * through port 0, someday 1949 */ 1950 ret = ipath_create_rcvhdrq(dd, pd); 1951 if (!ret) 1952 ret = ipath_create_user_egr(pd); 1953 if (ret) 1954 goto done; 1955 1956 /* 1957 * set the eager head register for this port to the current values 1958 * of the tail pointers, since we don't know if they were 1959 * updated on last use of the port. 1960 */ 1961 head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); 1962 ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); 1963 pd->port_lastrcvhdrqtail = -1; 1964 ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n", 1965 pd->port_port, head32); 1966 pd->port_tidcursor = 0; /* start at beginning after open */ 1967 1968 /* initialize poll variables... */ 1969 pd->port_urgent = 0; 1970 pd->port_urgent_poll = 0; 1971 pd->port_hdrqfull_poll = pd->port_hdrqfull; 1972 1973 /* 1974 * Now enable the port for receive. 1975 * For chips that are set to DMA the tail register to memory 1976 * when they change (and when the update bit transitions from 1977 * 0 to 1. So for those chips, we turn it off and then back on. 1978 * This will (very briefly) affect any other open ports, but the 1979 * duration is very short, and therefore isn't an issue. We 1980 * explictly set the in-memory tail copy to 0 beforehand, so we 1981 * don't have to wait to be sure the DMA update has happened 1982 * (chip resets head/tail to 0 on transition to enable). 1983 */ 1984 set_bit(dd->ipath_r_portenable_shift + pd->port_port, 1985 &dd->ipath_rcvctrl); 1986 if (!(dd->ipath_flags & IPATH_NODMA_RTAIL)) { 1987 if (pd->port_rcvhdrtail_kvaddr) 1988 ipath_clear_rcvhdrtail(pd); 1989 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1990 dd->ipath_rcvctrl & 1991 ~(1ULL << dd->ipath_r_tailupd_shift)); 1992 } 1993 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 1994 dd->ipath_rcvctrl); 1995 /* Notify any waiting slaves */ 1996 if (pd->port_subport_cnt) { 1997 clear_bit(IPATH_PORT_MASTER_UNINIT, &pd->port_flag); 1998 wake_up(&pd->port_wait); 1999 } 2000done: 2001 return ret; 2002} 2003 2004/** 2005 * unlock_exptid - unlock any expected TID entries port still had in use 2006 * @pd: port 2007 * 2008 * We don't actually update the chip here, because we do a bulk update 2009 * below, using ipath_f_clear_tids. 2010 */ 2011static void unlock_expected_tids(struct ipath_portdata *pd) 2012{ 2013 struct ipath_devdata *dd = pd->port_dd; 2014 int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt; 2015 int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt; 2016 2017 ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n", 2018 pd->port_port); 2019 for (i = port_tidbase; i < maxtid; i++) { 2020 struct page *ps = dd->ipath_pageshadow[i]; 2021 2022 if (!ps) 2023 continue; 2024 2025 dd->ipath_pageshadow[i] = NULL; 2026 pci_unmap_page(dd->pcidev, dd->ipath_physshadow[i], 2027 PAGE_SIZE, PCI_DMA_FROMDEVICE); 2028 ipath_release_user_pages_on_close(&ps, 1); 2029 cnt++; 2030 ipath_stats.sps_pageunlocks++; 2031 } 2032 if (cnt) 2033 ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n", 2034 pd->port_port, cnt); 2035 2036 if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks) 2037 ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n", 2038 (unsigned long long) ipath_stats.sps_pagelocks, 2039 (unsigned long long) 2040 ipath_stats.sps_pageunlocks); 2041} 2042 2043static int ipath_close(struct inode *in, struct file *fp) 2044{ 2045 int ret = 0; 2046 struct ipath_filedata *fd; 2047 struct ipath_portdata *pd; 2048 struct ipath_devdata *dd; 2049 unsigned long flags; 2050 unsigned port; 2051 struct pid *pid; 2052 2053 ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n", 2054 (long)in->i_rdev, fp->private_data); 2055 2056 mutex_lock(&ipath_mutex); 2057 2058 fd = fp->private_data; 2059 fp->private_data = NULL; 2060 pd = fd->pd; 2061 if (!pd) { 2062 mutex_unlock(&ipath_mutex); 2063 goto bail; 2064 } 2065 2066 dd = pd->port_dd; 2067 2068 /* drain user sdma queue */ 2069 ipath_user_sdma_queue_drain(dd, fd->pq); 2070 ipath_user_sdma_queue_destroy(fd->pq); 2071 2072 if (--pd->port_cnt) { 2073 pd->active_slaves &= ~(1 << fd->subport); 2074 put_pid(pd->port_subpid[fd->subport]); 2075 pd->port_subpid[fd->subport] = NULL; 2076 mutex_unlock(&ipath_mutex); 2077 goto bail; 2078 } 2079 /* early; no interrupt users after this */ 2080 spin_lock_irqsave(&dd->ipath_uctxt_lock, flags); 2081 port = pd->port_port; 2082 dd->ipath_pd[port] = NULL; 2083 pid = pd->port_pid; 2084 pd->port_pid = NULL; 2085 spin_unlock_irqrestore(&dd->ipath_uctxt_lock, flags); 2086 2087 if (pd->port_rcvwait_to || pd->port_piowait_to 2088 || pd->port_rcvnowait || pd->port_pionowait) { 2089 ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; " 2090 "%u rcv %u, pio already\n", 2091 pd->port_port, pd->port_rcvwait_to, 2092 pd->port_piowait_to, pd->port_rcvnowait, 2093 pd->port_pionowait); 2094 pd->port_rcvwait_to = pd->port_piowait_to = 2095 pd->port_rcvnowait = pd->port_pionowait = 0; 2096 } 2097 if (pd->port_flag) { 2098 ipath_cdbg(PROC, "port %u port_flag set: 0x%lx\n", 2099 pd->port_port, pd->port_flag); 2100 pd->port_flag = 0; 2101 } 2102 2103 if (dd->ipath_kregbase) { 2104 /* atomically clear receive enable port and intr avail. */ 2105 clear_bit(dd->ipath_r_portenable_shift + port, 2106 &dd->ipath_rcvctrl); 2107 clear_bit(pd->port_port + dd->ipath_r_intravail_shift, 2108 &dd->ipath_rcvctrl); 2109 ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl, 2110 dd->ipath_rcvctrl); 2111 /* and read back from chip to be sure that nothing 2112 * else is in flight when we do the rest */ 2113 (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 2114 2115 /* clean up the pkeys for this port user */ 2116 ipath_clean_part_key(pd, dd); 2117 /* 2118 * be paranoid, and never write 0's to these, just use an 2119 * unused part of the port 0 tail page. Of course, 2120 * rcvhdraddr points to a large chunk of memory, so this 2121 * could still trash things, but at least it won't trash 2122 * page 0, and by disabling the port, it should stop "soon", 2123 * even if a packet or two is in already in flight after we 2124 * disabled the port. 2125 */ 2126 ipath_write_kreg_port(dd, 2127 dd->ipath_kregs->kr_rcvhdrtailaddr, port, 2128 dd->ipath_dummy_hdrq_phys); 2129 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, 2130 pd->port_port, dd->ipath_dummy_hdrq_phys); 2131 2132 ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt); 2133 ipath_chg_pioavailkernel(dd, pd->port_pio_base, 2134 pd->port_piocnt, 1); 2135 2136 dd->ipath_f_clear_tids(dd, pd->port_port); 2137 2138 if (dd->ipath_pageshadow) 2139 unlock_expected_tids(pd); 2140 ipath_stats.sps_ports--; 2141 ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", 2142 pd->port_comm, pid_nr(pid), 2143 dd->ipath_unit, port); 2144 } 2145 2146 put_pid(pid); 2147 mutex_unlock(&ipath_mutex); 2148 ipath_free_pddata(dd, pd); /* after releasing the mutex */ 2149 2150bail: 2151 kfree(fd); 2152 return ret; 2153} 2154 2155static int ipath_port_info(struct ipath_portdata *pd, u16 subport, 2156 struct ipath_port_info __user *uinfo) 2157{ 2158 struct ipath_port_info info; 2159 int nup; 2160 int ret; 2161 size_t sz; 2162 2163 (void) ipath_count_units(NULL, &nup, NULL); 2164 info.num_active = nup; 2165 info.unit = pd->port_dd->ipath_unit; 2166 info.port = pd->port_port; 2167 info.subport = subport; 2168 /* Don't return new fields if old library opened the port. */ 2169 if (ipath_supports_subports(pd->userversion >> 16, 2170 pd->userversion & 0xffff)) { 2171 /* Number of user ports available for this device. */ 2172 info.num_ports = pd->port_dd->ipath_cfgports - 1; 2173 info.num_subports = pd->port_subport_cnt; 2174 sz = sizeof(info); 2175 } else 2176 sz = sizeof(info) - 2 * sizeof(u16); 2177 2178 if (copy_to_user(uinfo, &info, sz)) { 2179 ret = -EFAULT; 2180 goto bail; 2181 } 2182 ret = 0; 2183 2184bail: 2185 return ret; 2186} 2187 2188static int ipath_get_slave_info(struct ipath_portdata *pd, 2189 void __user *slave_mask_addr) 2190{ 2191 int ret = 0; 2192 2193 if (copy_to_user(slave_mask_addr, &pd->active_slaves, sizeof(u32))) 2194 ret = -EFAULT; 2195 return ret; 2196} 2197 2198static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq, 2199 u32 __user *inflightp) 2200{ 2201 const u32 val = ipath_user_sdma_inflight_counter(pq); 2202 2203 if (put_user(val, inflightp)) 2204 return -EFAULT; 2205 2206 return 0; 2207} 2208 2209static int ipath_sdma_get_complete(struct ipath_devdata *dd, 2210 struct ipath_user_sdma_queue *pq, 2211 u32 __user *completep) 2212{ 2213 u32 val; 2214 int err; 2215 2216 err = ipath_user_sdma_make_progress(dd, pq); 2217 if (err < 0) 2218 return err; 2219 2220 val = ipath_user_sdma_complete_counter(pq); 2221 if (put_user(val, completep)) 2222 return -EFAULT; 2223 2224 return 0; 2225} 2226 2227static ssize_t ipath_write(struct file *fp, const char __user *data, 2228 size_t count, loff_t *off) 2229{ 2230 const struct ipath_cmd __user *ucmd; 2231 struct ipath_portdata *pd; 2232 const void __user *src; 2233 size_t consumed, copy; 2234 struct ipath_cmd cmd; 2235 ssize_t ret = 0; 2236 void *dest; 2237 2238 if (count < sizeof(cmd.type)) { 2239 ret = -EINVAL; 2240 goto bail; 2241 } 2242 2243 ucmd = (const struct ipath_cmd __user *) data; 2244 2245 if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) { 2246 ret = -EFAULT; 2247 goto bail; 2248 } 2249 2250 consumed = sizeof(cmd.type); 2251 2252 switch (cmd.type) { 2253 case IPATH_CMD_ASSIGN_PORT: 2254 case __IPATH_CMD_USER_INIT: 2255 case IPATH_CMD_USER_INIT: 2256 copy = sizeof(cmd.cmd.user_info); 2257 dest = &cmd.cmd.user_info; 2258 src = &ucmd->cmd.user_info; 2259 break; 2260 case IPATH_CMD_RECV_CTRL: 2261 copy = sizeof(cmd.cmd.recv_ctrl); 2262 dest = &cmd.cmd.recv_ctrl; 2263 src = &ucmd->cmd.recv_ctrl; 2264 break; 2265 case IPATH_CMD_PORT_INFO: 2266 copy = sizeof(cmd.cmd.port_info); 2267 dest = &cmd.cmd.port_info; 2268 src = &ucmd->cmd.port_info; 2269 break; 2270 case IPATH_CMD_TID_UPDATE: 2271 case IPATH_CMD_TID_FREE: 2272 copy = sizeof(cmd.cmd.tid_info); 2273 dest = &cmd.cmd.tid_info; 2274 src = &ucmd->cmd.tid_info; 2275 break; 2276 case IPATH_CMD_SET_PART_KEY: 2277 copy = sizeof(cmd.cmd.part_key); 2278 dest = &cmd.cmd.part_key; 2279 src = &ucmd->cmd.part_key; 2280 break; 2281 case __IPATH_CMD_SLAVE_INFO: 2282 copy = sizeof(cmd.cmd.slave_mask_addr); 2283 dest = &cmd.cmd.slave_mask_addr; 2284 src = &ucmd->cmd.slave_mask_addr; 2285 break; 2286 case IPATH_CMD_PIOAVAILUPD: // force an update of PIOAvail reg 2287 copy = 0; 2288 src = NULL; 2289 dest = NULL; 2290 break; 2291 case IPATH_CMD_POLL_TYPE: 2292 copy = sizeof(cmd.cmd.poll_type); 2293 dest = &cmd.cmd.poll_type; 2294 src = &ucmd->cmd.poll_type; 2295 break; 2296 case IPATH_CMD_ARMLAUNCH_CTRL: 2297 copy = sizeof(cmd.cmd.armlaunch_ctrl); 2298 dest = &cmd.cmd.armlaunch_ctrl; 2299 src = &ucmd->cmd.armlaunch_ctrl; 2300 break; 2301 case IPATH_CMD_SDMA_INFLIGHT: 2302 copy = sizeof(cmd.cmd.sdma_inflight); 2303 dest = &cmd.cmd.sdma_inflight; 2304 src = &ucmd->cmd.sdma_inflight; 2305 break; 2306 case IPATH_CMD_SDMA_COMPLETE: 2307 copy = sizeof(cmd.cmd.sdma_complete); 2308 dest = &cmd.cmd.sdma_complete; 2309 src = &ucmd->cmd.sdma_complete; 2310 break; 2311 default: 2312 ret = -EINVAL; 2313 goto bail; 2314 } 2315 2316 if (copy) { 2317 if ((count - consumed) < copy) { 2318 ret = -EINVAL; 2319 goto bail; 2320 } 2321 2322 if (copy_from_user(dest, src, copy)) { 2323 ret = -EFAULT; 2324 goto bail; 2325 } 2326 2327 consumed += copy; 2328 } 2329 2330 pd = port_fp(fp); 2331 if (!pd && cmd.type != __IPATH_CMD_USER_INIT && 2332 cmd.type != IPATH_CMD_ASSIGN_PORT) { 2333 ret = -EINVAL; 2334 goto bail; 2335 } 2336 2337 switch (cmd.type) { 2338 case IPATH_CMD_ASSIGN_PORT: 2339 ret = ipath_assign_port(fp, &cmd.cmd.user_info); 2340 if (ret) 2341 goto bail; 2342 break; 2343 case __IPATH_CMD_USER_INIT: 2344 /* backwards compatibility, get port first */ 2345 ret = ipath_assign_port(fp, &cmd.cmd.user_info); 2346 if (ret) 2347 goto bail; 2348 /* and fall through to current version. */ 2349 case IPATH_CMD_USER_INIT: 2350 ret = ipath_do_user_init(fp, &cmd.cmd.user_info); 2351 if (ret) 2352 goto bail; 2353 ret = ipath_get_base_info( 2354 fp, (void __user *) (unsigned long) 2355 cmd.cmd.user_info.spu_base_info, 2356 cmd.cmd.user_info.spu_base_info_size); 2357 break; 2358 case IPATH_CMD_RECV_CTRL: 2359 ret = ipath_manage_rcvq(pd, subport_fp(fp), cmd.cmd.recv_ctrl); 2360 break; 2361 case IPATH_CMD_PORT_INFO: 2362 ret = ipath_port_info(pd, subport_fp(fp), 2363 (struct ipath_port_info __user *) 2364 (unsigned long) cmd.cmd.port_info); 2365 break; 2366 case IPATH_CMD_TID_UPDATE: 2367 ret = ipath_tid_update(pd, fp, &cmd.cmd.tid_info); 2368 break; 2369 case IPATH_CMD_TID_FREE: 2370 ret = ipath_tid_free(pd, subport_fp(fp), &cmd.cmd.tid_info); 2371 break; 2372 case IPATH_CMD_SET_PART_KEY: 2373 ret = ipath_set_part_key(pd, cmd.cmd.part_key); 2374 break; 2375 case __IPATH_CMD_SLAVE_INFO: 2376 ret = ipath_get_slave_info(pd, 2377 (void __user *) (unsigned long) 2378 cmd.cmd.slave_mask_addr); 2379 break; 2380 case IPATH_CMD_PIOAVAILUPD: 2381 ipath_force_pio_avail_update(pd->port_dd); 2382 break; 2383 case IPATH_CMD_POLL_TYPE: 2384 pd->poll_type = cmd.cmd.poll_type; 2385 break; 2386 case IPATH_CMD_ARMLAUNCH_CTRL: 2387 if (cmd.cmd.armlaunch_ctrl) 2388 ipath_enable_armlaunch(pd->port_dd); 2389 else 2390 ipath_disable_armlaunch(pd->port_dd); 2391 break; 2392 case IPATH_CMD_SDMA_INFLIGHT: 2393 ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp), 2394 (u32 __user *) (unsigned long) 2395 cmd.cmd.sdma_inflight); 2396 break; 2397 case IPATH_CMD_SDMA_COMPLETE: 2398 ret = ipath_sdma_get_complete(pd->port_dd, 2399 user_sdma_queue_fp(fp), 2400 (u32 __user *) (unsigned long) 2401 cmd.cmd.sdma_complete); 2402 break; 2403 } 2404 2405 if (ret >= 0) 2406 ret = consumed; 2407 2408bail: 2409 return ret; 2410} 2411 2412static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov, 2413 unsigned long dim, loff_t off) 2414{ 2415 struct file *filp = iocb->ki_filp; 2416 struct ipath_filedata *fp = filp->private_data; 2417 struct ipath_portdata *pd = port_fp(filp); 2418 struct ipath_user_sdma_queue *pq = fp->pq; 2419 2420 if (!dim) 2421 return -EINVAL; 2422 2423 return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim); 2424} 2425 2426static struct class *ipath_class; 2427 2428static int init_cdev(int minor, char *name, const struct file_operations *fops, 2429 struct cdev **cdevp, struct device **devp) 2430{ 2431 const dev_t dev = MKDEV(IPATH_MAJOR, minor); 2432 struct cdev *cdev = NULL; 2433 struct device *device = NULL; 2434 int ret; 2435 2436 cdev = cdev_alloc(); 2437 if (!cdev) { 2438 printk(KERN_ERR IPATH_DRV_NAME 2439 ": Could not allocate cdev for minor %d, %s\n", 2440 minor, name); 2441 ret = -ENOMEM; 2442 goto done; 2443 } 2444 2445 cdev->owner = THIS_MODULE; 2446 cdev->ops = fops; 2447 kobject_set_name(&cdev->kobj, name); 2448 2449 ret = cdev_add(cdev, dev, 1); 2450 if (ret < 0) { 2451 printk(KERN_ERR IPATH_DRV_NAME 2452 ": Could not add cdev for minor %d, %s (err %d)\n", 2453 minor, name, -ret); 2454 goto err_cdev; 2455 } 2456 2457 device = device_create(ipath_class, NULL, dev, NULL, name); 2458 2459 if (IS_ERR(device)) { 2460 ret = PTR_ERR(device); 2461 printk(KERN_ERR IPATH_DRV_NAME ": Could not create " 2462 "device for minor %d, %s (err %d)\n", 2463 minor, name, -ret); 2464 goto err_cdev; 2465 } 2466 2467 goto done; 2468 2469err_cdev: 2470 cdev_del(cdev); 2471 cdev = NULL; 2472 2473done: 2474 if (ret >= 0) { 2475 *cdevp = cdev; 2476 *devp = device; 2477 } else { 2478 *cdevp = NULL; 2479 *devp = NULL; 2480 } 2481 2482 return ret; 2483} 2484 2485int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, 2486 struct cdev **cdevp, struct device **devp) 2487{ 2488 return init_cdev(minor, name, fops, cdevp, devp); 2489} 2490 2491static void cleanup_cdev(struct cdev **cdevp, 2492 struct device **devp) 2493{ 2494 struct device *dev = *devp; 2495 2496 if (dev) { 2497 device_unregister(dev); 2498 *devp = NULL; 2499 } 2500 2501 if (*cdevp) { 2502 cdev_del(*cdevp); 2503 *cdevp = NULL; 2504 } 2505} 2506 2507void ipath_cdev_cleanup(struct cdev **cdevp, 2508 struct device **devp) 2509{ 2510 cleanup_cdev(cdevp, devp); 2511} 2512 2513static struct cdev *wildcard_cdev; 2514static struct device *wildcard_dev; 2515 2516static const dev_t dev = MKDEV(IPATH_MAJOR, 0); 2517 2518static int user_init(void) 2519{ 2520 int ret; 2521 2522 ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME); 2523 if (ret < 0) { 2524 printk(KERN_ERR IPATH_DRV_NAME ": Could not register " 2525 "chrdev region (err %d)\n", -ret); 2526 goto done; 2527 } 2528 2529 ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME); 2530 2531 if (IS_ERR(ipath_class)) { 2532 ret = PTR_ERR(ipath_class); 2533 printk(KERN_ERR IPATH_DRV_NAME ": Could not create " 2534 "device class (err %d)\n", -ret); 2535 goto bail; 2536 } 2537 2538 goto done; 2539bail: 2540 unregister_chrdev_region(dev, IPATH_NMINORS); 2541done: 2542 return ret; 2543} 2544 2545static void user_cleanup(void) 2546{ 2547 if (ipath_class) { 2548 class_destroy(ipath_class); 2549 ipath_class = NULL; 2550 } 2551 2552 unregister_chrdev_region(dev, IPATH_NMINORS); 2553} 2554 2555static atomic_t user_count = ATOMIC_INIT(0); 2556static atomic_t user_setup = ATOMIC_INIT(0); 2557 2558int ipath_user_add(struct ipath_devdata *dd) 2559{ 2560 char name[10]; 2561 int ret; 2562 2563 if (atomic_inc_return(&user_count) == 1) { 2564 ret = user_init(); 2565 if (ret < 0) { 2566 ipath_dev_err(dd, "Unable to set up user support: " 2567 "error %d\n", -ret); 2568 goto bail; 2569 } 2570 ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev, 2571 &wildcard_dev); 2572 if (ret < 0) { 2573 ipath_dev_err(dd, "Could not create wildcard " 2574 "minor: error %d\n", -ret); 2575 goto bail_user; 2576 } 2577 2578 atomic_set(&user_setup, 1); 2579 } 2580 2581 snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit); 2582 2583 ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops, 2584 &dd->user_cdev, &dd->user_dev); 2585 if (ret < 0) 2586 ipath_dev_err(dd, "Could not create user minor %d, %s\n", 2587 dd->ipath_unit + 1, name); 2588 2589 goto bail; 2590 2591bail_user: 2592 user_cleanup(); 2593bail: 2594 return ret; 2595} 2596 2597void ipath_user_remove(struct ipath_devdata *dd) 2598{ 2599 cleanup_cdev(&dd->user_cdev, &dd->user_dev); 2600 2601 if (atomic_dec_return(&user_count) == 0) { 2602 if (atomic_read(&user_setup) == 0) 2603 goto bail; 2604 2605 cleanup_cdev(&wildcard_cdev, &wildcard_dev); 2606 user_cleanup(); 2607 2608 atomic_set(&user_setup, 0); 2609 } 2610bail: 2611 return; 2612} 2613