1/*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/malloc.h> 32#include <sys/kernel.h> 33#include <sys/sysctl.h> 34#include <sys/lock.h> 35#include <sys/mutex.h> 36#include <sys/bus.h> 37#include <sys/fcntl.h> 38#include <sys/file.h> 39#include <sys/filio.h> 40 41#include <vm/vm.h> 42#include <vm/pmap.h> 43 44#include <machine/stdarg.h> 45#include <machine/pmap.h> 46 47#include <linux/kobject.h> 48#include <linux/device.h> 49#include <linux/slab.h> 50#include <linux/module.h> 51#include <linux/cdev.h> 52#include <linux/file.h> 53#include <linux/sysfs.h> 54#include <linux/mm.h> 55#include <linux/io.h> 56#include <linux/vmalloc.h> 57 58#include <vm/vm_pager.h> 59 60MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 61 62#include <linux/rbtree.h> 63/* Undo Linux compat changes. */ 64#undef RB_ROOT 65#undef file 66#undef cdev 67#define RB_ROOT(head) (head)->rbh_root 68#undef LIST_HEAD 69/* From sys/queue.h */ 70#define LIST_HEAD(name, type) \ 71struct name { \ 72 struct type *lh_first; /* first element */ \ 73} 74 75struct kobject class_root; 76struct device linux_rootdev; 77struct class miscclass; 78struct list_head pci_drivers; 79struct list_head pci_devices; 80spinlock_t pci_lock; 81 82int 83panic_cmp(struct rb_node *one, struct rb_node *two) 84{ 85 panic("no cmp"); 86} 87 88RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 89 90int 91kobject_set_name(struct kobject *kobj, const char *fmt, ...) 92{ 93 va_list args; 94 int error; 95 96 va_start(args, fmt); 97 error = kobject_set_name_vargs(kobj, fmt, args); 98 va_end(args); 99 100 return (error); 101} 102 103static inline int 104kobject_add_complete(struct kobject *kobj, struct kobject *parent) 105{ 106 struct kobj_type *t; 107 int error; 108 109 kobj->parent = kobject_get(parent); 110 error = sysfs_create_dir(kobj); 111 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 112 struct attribute **attr; 113 t = kobj->ktype; 114 115 for (attr = t->default_attrs; *attr != NULL; attr++) { 116 error = sysfs_create_file(kobj, *attr); 117 if (error) 118 break; 119 } 120 if (error) 121 sysfs_remove_dir(kobj); 122 123 } 124 return (error); 125} 126 127int 128kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 129{ 130 va_list args; 131 int error; 132 133 va_start(args, fmt); 134 error = kobject_set_name_vargs(kobj, fmt, args); 135 va_end(args); 136 if (error) 137 return (error); 138 139 return kobject_add_complete(kobj, parent); 140} 141 142void 143kobject_release(struct kref *kref) 144{ 145 struct kobject *kobj; 146 char *name; 147 148 kobj = container_of(kref, struct kobject, kref); 149 sysfs_remove_dir(kobj); 150 if (kobj->parent) 151 kobject_put(kobj->parent); 152 kobj->parent = NULL; 153 name = kobj->name; 154 if (kobj->ktype && kobj->ktype->release) 155 kobj->ktype->release(kobj); 156 kfree(name); 157} 158 159static void 160kobject_kfree(struct kobject *kobj) 161{ 162 163 kfree(kobj); 164} 165 166struct kobj_type kfree_type = { .release = kobject_kfree }; 167 168struct device * 169device_create(struct class *class, struct device *parent, dev_t devt, 170 void *drvdata, const char *fmt, ...) 171{ 172 struct device *dev; 173 va_list args; 174 175 dev = kzalloc(sizeof(*dev), M_WAITOK); 176 dev->parent = parent; 177 dev->class = class; 178 dev->devt = devt; 179 dev->driver_data = drvdata; 180 va_start(args, fmt); 181 kobject_set_name_vargs(&dev->kobj, fmt, args); 182 va_end(args); 183 device_register(dev); 184 185 return (dev); 186} 187 188int 189kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, 190 struct kobject *parent, const char *fmt, ...) 191{ 192 va_list args; 193 int error; 194 195 kobject_init(kobj, ktype); 196 kobj->ktype = ktype; 197 kobj->parent = parent; 198 kobj->name = NULL; 199 200 va_start(args, fmt); 201 error = kobject_set_name_vargs(kobj, fmt, args); 202 va_end(args); 203 if (error) 204 return (error); 205 return kobject_add_complete(kobj, parent); 206} 207 208static void 209linux_file_dtor(void *cdp) 210{ 211 struct linux_file *filp; 212 213 filp = cdp; 214 filp->f_op->release(filp->f_vnode, filp); 215 vdrop(filp->f_vnode); 216 kfree(filp); 217} 218 219static int 220linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 221{ 222 struct linux_cdev *ldev; 223 struct linux_file *filp; 224 struct file *file; 225 int error; 226 227 file = curthread->td_fpop; 228 ldev = dev->si_drv1; 229 if (ldev == NULL) 230 return (ENODEV); 231 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 232 filp->f_dentry = &filp->f_dentry_store; 233 filp->f_op = ldev->ops; 234 filp->f_flags = file->f_flag; 235 vhold(file->f_vnode); 236 filp->f_vnode = file->f_vnode; 237 if (filp->f_op->open) { 238 error = -filp->f_op->open(file->f_vnode, filp); 239 if (error) { 240 kfree(filp); 241 return (error); 242 } 243 } 244 error = devfs_set_cdevpriv(filp, linux_file_dtor); 245 if (error) { 246 filp->f_op->release(file->f_vnode, filp); 247 kfree(filp); 248 return (error); 249 } 250 251 return 0; 252} 253 254static int 255linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 256{ 257 struct linux_cdev *ldev; 258 struct linux_file *filp; 259 struct file *file; 260 int error; 261 262 file = curthread->td_fpop; 263 ldev = dev->si_drv1; 264 if (ldev == NULL) 265 return (0); 266 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 267 return (error); 268 filp->f_flags = file->f_flag; 269 270 return (0); 271} 272 273static int 274linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 275 struct thread *td) 276{ 277 struct linux_cdev *ldev; 278 struct linux_file *filp; 279 struct file *file; 280 int error; 281 282 file = curthread->td_fpop; 283 ldev = dev->si_drv1; 284 if (ldev == NULL) 285 return (0); 286 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 287 return (error); 288 filp->f_flags = file->f_flag; 289 /* 290 * Linux does not have a generic ioctl copyin/copyout layer. All 291 * linux ioctls must be converted to void ioctls which pass a 292 * pointer to the address of the data. We want the actual user 293 * address so we dereference here. 294 */ 295 data = *(void **)data; 296 if (filp->f_op->unlocked_ioctl) 297 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 298 else 299 error = ENOTTY; 300 301 return (error); 302} 303 304static int 305linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 306{ 307 struct linux_cdev *ldev; 308 struct linux_file *filp; 309 struct file *file; 310 ssize_t bytes; 311 int error; 312 313 file = curthread->td_fpop; 314 ldev = dev->si_drv1; 315 if (ldev == NULL) 316 return (0); 317 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 318 return (error); 319 filp->f_flags = file->f_flag; 320 if (uio->uio_iovcnt != 1) 321 panic("linux_dev_read: uio %p iovcnt %d", 322 uio, uio->uio_iovcnt); 323 if (filp->f_op->read) { 324 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 325 uio->uio_iov->iov_len, &uio->uio_offset); 326 if (bytes >= 0) { 327 uio->uio_iov->iov_base += bytes; 328 uio->uio_iov->iov_len -= bytes; 329 uio->uio_resid -= bytes; 330 } else 331 error = -bytes; 332 } else 333 error = ENXIO; 334 335 return (error); 336} 337 338static int 339linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 340{ 341 struct linux_cdev *ldev; 342 struct linux_file *filp; 343 struct file *file; 344 ssize_t bytes; 345 int error; 346 347 file = curthread->td_fpop; 348 ldev = dev->si_drv1; 349 if (ldev == NULL) 350 return (0); 351 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 352 return (error); 353 filp->f_flags = file->f_flag; 354 if (uio->uio_iovcnt != 1) 355 panic("linux_dev_write: uio %p iovcnt %d", 356 uio, uio->uio_iovcnt); 357 if (filp->f_op->write) { 358 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 359 uio->uio_iov->iov_len, &uio->uio_offset); 360 if (bytes >= 0) { 361 uio->uio_iov->iov_base += bytes; 362 uio->uio_iov->iov_len -= bytes; 363 uio->uio_resid -= bytes; 364 } else 365 error = -bytes; 366 } else 367 error = ENXIO; 368 369 return (error); 370} 371 372static int 373linux_dev_poll(struct cdev *dev, int events, struct thread *td) 374{ 375 struct linux_cdev *ldev; 376 struct linux_file *filp; 377 struct file *file; 378 int revents; 379 int error; 380 381 file = curthread->td_fpop; 382 ldev = dev->si_drv1; 383 if (ldev == NULL) 384 return (0); 385 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 386 return (error); 387 filp->f_flags = file->f_flag; 388 if (filp->f_op->poll) 389 revents = filp->f_op->poll(filp, NULL) & events; 390 else 391 revents = 0; 392 393 return (revents); 394} 395 396static int 397linux_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, 398 int nprot, vm_memattr_t *memattr) 399{ 400 401 /* XXX memattr not honored. */ 402 *paddr = offset; 403 return (0); 404} 405 406static int 407linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 408 vm_size_t size, struct vm_object **object, int nprot) 409{ 410 struct linux_cdev *ldev; 411 struct linux_file *filp; 412 struct file *file; 413 struct vm_area_struct vma; 414 vm_paddr_t paddr; 415 vm_page_t m; 416 int error; 417 418 file = curthread->td_fpop; 419 ldev = dev->si_drv1; 420 if (ldev == NULL) 421 return (ENODEV); 422 if (size != PAGE_SIZE) 423 return (EINVAL); 424 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 425 return (error); 426 filp->f_flags = file->f_flag; 427 vma.vm_start = 0; 428 vma.vm_end = PAGE_SIZE; 429 vma.vm_pgoff = *offset / PAGE_SIZE; 430 vma.vm_pfn = 0; 431 vma.vm_page_prot = 0; 432 if (filp->f_op->mmap) { 433 error = -filp->f_op->mmap(filp, &vma); 434 if (error == 0) { 435 paddr = (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT; 436 *offset = paddr; 437 m = PHYS_TO_VM_PAGE(paddr); 438 *object = vm_pager_allocate(OBJT_DEVICE, dev, 439 PAGE_SIZE, nprot, *offset, curthread->td_ucred); 440 if (*object == NULL) 441 return (EINVAL); 442 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) 443 pmap_page_set_memattr(m, vma.vm_page_prot); 444 } 445 } else 446 error = ENODEV; 447 448 return (error); 449} 450 451struct cdevsw linuxcdevsw = { 452 .d_version = D_VERSION, 453 .d_flags = D_TRACKCLOSE, 454 .d_open = linux_dev_open, 455 .d_close = linux_dev_close, 456 .d_read = linux_dev_read, 457 .d_write = linux_dev_write, 458 .d_ioctl = linux_dev_ioctl, 459 .d_mmap_single = linux_dev_mmap_single, 460 .d_mmap = linux_dev_mmap, 461 .d_poll = linux_dev_poll, 462}; 463 464static int 465linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 466 int flags, struct thread *td) 467{ 468 struct linux_file *filp; 469 ssize_t bytes; 470 int error; 471 472 error = 0; 473 filp = (struct linux_file *)file->f_data; 474 filp->f_flags = file->f_flag; 475 if (uio->uio_iovcnt != 1) 476 panic("linux_file_read: uio %p iovcnt %d", 477 uio, uio->uio_iovcnt); 478 if (filp->f_op->read) { 479 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 480 uio->uio_iov->iov_len, &uio->uio_offset); 481 if (bytes >= 0) { 482 uio->uio_iov->iov_base += bytes; 483 uio->uio_iov->iov_len -= bytes; 484 uio->uio_resid -= bytes; 485 } else 486 error = -bytes; 487 } else 488 error = ENXIO; 489 490 return (error); 491} 492 493static int 494linux_file_poll(struct file *file, int events, struct ucred *active_cred, 495 struct thread *td) 496{ 497 struct linux_file *filp; 498 int revents; 499 500 filp = (struct linux_file *)file->f_data; 501 filp->f_flags = file->f_flag; 502 if (filp->f_op->poll) 503 revents = filp->f_op->poll(filp, NULL) & events; 504 else 505 revents = 0; 506 507 return (0); 508} 509 510static int 511linux_file_close(struct file *file, struct thread *td) 512{ 513 struct linux_file *filp; 514 int error; 515 516 filp = (struct linux_file *)file->f_data; 517 filp->f_flags = file->f_flag; 518 error = -filp->f_op->release(NULL, filp); 519 funsetown(&filp->f_sigio); 520 kfree(filp); 521 522 return (error); 523} 524 525static int 526linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 527 struct thread *td) 528{ 529 struct linux_file *filp; 530 int error; 531 532 filp = (struct linux_file *)fp->f_data; 533 filp->f_flags = fp->f_flag; 534 error = 0; 535 536 switch (cmd) { 537 case FIONBIO: 538 break; 539 case FIOASYNC: 540 if (filp->f_op->fasync == NULL) 541 break; 542 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 543 break; 544 case FIOSETOWN: 545 error = fsetown(*(int *)data, &filp->f_sigio); 546 if (error == 0) 547 error = filp->f_op->fasync(0, filp, 548 fp->f_flag & FASYNC); 549 break; 550 case FIOGETOWN: 551 *(int *)data = fgetown(&filp->f_sigio); 552 break; 553 default: 554 error = ENOTTY; 555 break; 556 } 557 return (error); 558} 559 560struct fileops linuxfileops = { 561 .fo_read = linux_file_read, 562 .fo_poll = linux_file_poll, 563 .fo_close = linux_file_close, 564 .fo_ioctl = linux_file_ioctl, 565 .fo_chmod = invfo_chmod, 566 .fo_chown = invfo_chown, 567}; 568 569/* 570 * Hash of vmmap addresses. This is infrequently accessed and does not 571 * need to be particularly large. This is done because we must store the 572 * caller's idea of the map size to properly unmap. 573 */ 574struct vmmap { 575 LIST_ENTRY(vmmap) vm_next; 576 void *vm_addr; 577 unsigned long vm_size; 578}; 579 580LIST_HEAD(vmmaphd, vmmap); 581#define VMMAP_HASH_SIZE 64 582#define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 583#define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 584static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 585static struct mtx vmmaplock; 586 587static void 588vmmap_add(void *addr, unsigned long size) 589{ 590 struct vmmap *vmmap; 591 592 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 593 mtx_lock(&vmmaplock); 594 vmmap->vm_size = size; 595 vmmap->vm_addr = addr; 596 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 597 mtx_unlock(&vmmaplock); 598} 599 600static struct vmmap * 601vmmap_remove(void *addr) 602{ 603 struct vmmap *vmmap; 604 605 mtx_lock(&vmmaplock); 606 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 607 if (vmmap->vm_addr == addr) 608 break; 609 if (vmmap) 610 LIST_REMOVE(vmmap, vm_next); 611 mtx_unlock(&vmmaplock); 612 613 return (vmmap); 614} 615 616void * 617_ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 618{ 619 void *addr; 620 621 addr = pmap_mapdev_attr(phys_addr, size, attr); 622 if (addr == NULL) 623 return (NULL); 624 vmmap_add(addr, size); 625 626 return (addr); 627} 628 629void 630iounmap(void *addr) 631{ 632 struct vmmap *vmmap; 633 634 vmmap = vmmap_remove(addr); 635 if (vmmap == NULL) 636 return; 637 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 638 kfree(vmmap); 639} 640 641 642void * 643vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 644{ 645 vm_offset_t off; 646 size_t size; 647 648 size = count * PAGE_SIZE; 649 off = kmem_alloc_nofault(kernel_map, size); 650 if (off == 0) 651 return (NULL); 652 vmmap_add((void *)off, size); 653 pmap_qenter(off, pages, count); 654 655 return ((void *)off); 656} 657 658void 659vunmap(void *addr) 660{ 661 struct vmmap *vmmap; 662 663 vmmap = vmmap_remove(addr); 664 if (vmmap == NULL) 665 return; 666 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 667 kmem_free(kernel_map, (vm_offset_t)addr, vmmap->vm_size); 668 kfree(vmmap); 669} 670 671static void 672linux_compat_init(void) 673{ 674 struct sysctl_oid *rootoid; 675 int i; 676 677 rootoid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(), 678 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 679 kobject_init(&class_root, &class_ktype); 680 kobject_set_name(&class_root, "class"); 681 class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 682 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 683 kobject_init(&linux_rootdev.kobj, &dev_ktype); 684 kobject_set_name(&linux_rootdev.kobj, "device"); 685 linux_rootdev.kobj.oidp = SYSCTL_ADD_NODE(NULL, 686 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 687 "device"); 688 linux_rootdev.bsddev = root_bus; 689 miscclass.name = "misc"; 690 class_register(&miscclass); 691 INIT_LIST_HEAD(&pci_drivers); 692 INIT_LIST_HEAD(&pci_devices); 693 spin_lock_init(&pci_lock); 694 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 695 for (i = 0; i < VMMAP_HASH_SIZE; i++) 696 LIST_INIT(&vmmaphead[i]); 697} 698 699SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 700