linux_compat.c revision 255932
1/*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/malloc.h> 32#include <sys/kernel.h> 33#include <sys/sysctl.h> 34#include <sys/lock.h> 35#include <sys/mutex.h> 36#include <sys/bus.h> 37#include <sys/fcntl.h> 38#include <sys/file.h> 39#include <sys/filio.h> 40#include <sys/rwlock.h> 41 42#include <vm/vm.h> 43#include <vm/pmap.h> 44 45#include <machine/stdarg.h> 46#include <machine/pmap.h> 47 48#include <linux/kobject.h> 49#include <linux/device.h> 50#include <linux/slab.h> 51#include <linux/module.h> 52#include <linux/cdev.h> 53#include <linux/file.h> 54#include <linux/sysfs.h> 55#include <linux/mm.h> 56#include <linux/io.h> 57#include <linux/vmalloc.h> 58 59#include <vm/vm_pager.h> 60 61MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 62 63#include <linux/rbtree.h> 64/* Undo Linux compat changes. */ 65#undef RB_ROOT 66#undef file 67#undef cdev 68#define RB_ROOT(head) (head)->rbh_root 69#undef LIST_HEAD 70/* From sys/queue.h */ 71#define LIST_HEAD(name, type) \ 72struct name { \ 73 struct type *lh_first; /* first element */ \ 74} 75 76struct kobject class_root; 77struct device linux_rootdev; 78struct class miscclass; 79struct list_head pci_drivers; 80struct list_head pci_devices; 81spinlock_t pci_lock; 82 83int 84panic_cmp(struct rb_node *one, struct rb_node *two) 85{ 86 panic("no cmp"); 87} 88 89RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 90 91int 92kobject_set_name(struct kobject *kobj, const char *fmt, ...) 93{ 94 va_list args; 95 int error; 96 97 va_start(args, fmt); 98 error = kobject_set_name_vargs(kobj, fmt, args); 99 va_end(args); 100 101 return (error); 102} 103 104static inline int 105kobject_add_complete(struct kobject *kobj, struct kobject *parent) 106{ 107 struct kobj_type *t; 108 int error; 109 110 kobj->parent = kobject_get(parent); 111 error = sysfs_create_dir(kobj); 112 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 113 struct attribute **attr; 114 t = kobj->ktype; 115 116 for (attr = t->default_attrs; *attr != NULL; attr++) { 117 error = sysfs_create_file(kobj, *attr); 118 if (error) 119 break; 120 } 121 if (error) 122 sysfs_remove_dir(kobj); 123 124 } 125 return (error); 126} 127 128int 129kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 130{ 131 va_list args; 132 int error; 133 134 va_start(args, fmt); 135 error = kobject_set_name_vargs(kobj, fmt, args); 136 va_end(args); 137 if (error) 138 return (error); 139 140 return kobject_add_complete(kobj, parent); 141} 142 143void 144kobject_release(struct kref *kref) 145{ 146 struct kobject *kobj; 147 char *name; 148 149 kobj = container_of(kref, struct kobject, kref); 150 sysfs_remove_dir(kobj); 151 if (kobj->parent) 152 kobject_put(kobj->parent); 153 kobj->parent = NULL; 154 name = kobj->name; 155 if (kobj->ktype && kobj->ktype->release) 156 kobj->ktype->release(kobj); 157 kfree(name); 158} 159 160static void 161kobject_kfree(struct kobject *kobj) 162{ 163 164 kfree(kobj); 165} 166 167struct kobj_type kfree_type = { .release = kobject_kfree }; 168 169struct device * 170device_create(struct class *class, struct device *parent, dev_t devt, 171 void *drvdata, const char *fmt, ...) 172{ 173 struct device *dev; 174 va_list args; 175 176 dev = kzalloc(sizeof(*dev), M_WAITOK); 177 dev->parent = parent; 178 dev->class = class; 179 dev->devt = devt; 180 dev->driver_data = drvdata; 181 va_start(args, fmt); 182 kobject_set_name_vargs(&dev->kobj, fmt, args); 183 va_end(args); 184 device_register(dev); 185 186 return (dev); 187} 188 189int 190kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, 191 struct kobject *parent, const char *fmt, ...) 192{ 193 va_list args; 194 int error; 195 196 kobject_init(kobj, ktype); 197 kobj->ktype = ktype; 198 kobj->parent = parent; 199 kobj->name = NULL; 200 201 va_start(args, fmt); 202 error = kobject_set_name_vargs(kobj, fmt, args); 203 va_end(args); 204 if (error) 205 return (error); 206 return kobject_add_complete(kobj, parent); 207} 208 209static void 210linux_file_dtor(void *cdp) 211{ 212 struct linux_file *filp; 213 214 filp = cdp; 215 filp->f_op->release(filp->f_vnode, filp); 216 vdrop(filp->f_vnode); 217 kfree(filp); 218} 219 220static int 221linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 222{ 223 struct linux_cdev *ldev; 224 struct linux_file *filp; 225 struct file *file; 226 int error; 227 228 file = curthread->td_fpop; 229 ldev = dev->si_drv1; 230 if (ldev == NULL) 231 return (ENODEV); 232 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 233 filp->f_dentry = &filp->f_dentry_store; 234 filp->f_op = ldev->ops; 235 filp->f_flags = file->f_flag; 236 vhold(file->f_vnode); 237 filp->f_vnode = file->f_vnode; 238 if (filp->f_op->open) { 239 error = -filp->f_op->open(file->f_vnode, filp); 240 if (error) { 241 kfree(filp); 242 return (error); 243 } 244 } 245 error = devfs_set_cdevpriv(filp, linux_file_dtor); 246 if (error) { 247 filp->f_op->release(file->f_vnode, filp); 248 kfree(filp); 249 return (error); 250 } 251 252 return 0; 253} 254 255static int 256linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 257{ 258 struct linux_cdev *ldev; 259 struct linux_file *filp; 260 struct file *file; 261 int error; 262 263 file = curthread->td_fpop; 264 ldev = dev->si_drv1; 265 if (ldev == NULL) 266 return (0); 267 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 268 return (error); 269 filp->f_flags = file->f_flag; 270 devfs_clear_cdevpriv(); 271 272 273 return (0); 274} 275 276static int 277linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 278 struct thread *td) 279{ 280 struct linux_cdev *ldev; 281 struct linux_file *filp; 282 struct file *file; 283 int error; 284 285 file = curthread->td_fpop; 286 ldev = dev->si_drv1; 287 if (ldev == NULL) 288 return (0); 289 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 290 return (error); 291 filp->f_flags = file->f_flag; 292 /* 293 * Linux does not have a generic ioctl copyin/copyout layer. All 294 * linux ioctls must be converted to void ioctls which pass a 295 * pointer to the address of the data. We want the actual user 296 * address so we dereference here. 297 */ 298 data = *(void **)data; 299 if (filp->f_op->unlocked_ioctl) 300 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 301 else 302 error = ENOTTY; 303 304 return (error); 305} 306 307static int 308linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 309{ 310 struct linux_cdev *ldev; 311 struct linux_file *filp; 312 struct file *file; 313 ssize_t bytes; 314 int error; 315 316 file = curthread->td_fpop; 317 ldev = dev->si_drv1; 318 if (ldev == NULL) 319 return (0); 320 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 321 return (error); 322 filp->f_flags = file->f_flag; 323 if (uio->uio_iovcnt != 1) 324 panic("linux_dev_read: uio %p iovcnt %d", 325 uio, uio->uio_iovcnt); 326 if (filp->f_op->read) { 327 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 328 uio->uio_iov->iov_len, &uio->uio_offset); 329 if (bytes >= 0) { 330 uio->uio_iov->iov_base += bytes; 331 uio->uio_iov->iov_len -= bytes; 332 uio->uio_resid -= bytes; 333 } else 334 error = -bytes; 335 } else 336 error = ENXIO; 337 338 return (error); 339} 340 341static int 342linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 343{ 344 struct linux_cdev *ldev; 345 struct linux_file *filp; 346 struct file *file; 347 ssize_t bytes; 348 int error; 349 350 file = curthread->td_fpop; 351 ldev = dev->si_drv1; 352 if (ldev == NULL) 353 return (0); 354 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 355 return (error); 356 filp->f_flags = file->f_flag; 357 if (uio->uio_iovcnt != 1) 358 panic("linux_dev_write: uio %p iovcnt %d", 359 uio, uio->uio_iovcnt); 360 if (filp->f_op->write) { 361 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 362 uio->uio_iov->iov_len, &uio->uio_offset); 363 if (bytes >= 0) { 364 uio->uio_iov->iov_base += bytes; 365 uio->uio_iov->iov_len -= bytes; 366 uio->uio_resid -= bytes; 367 } else 368 error = -bytes; 369 } else 370 error = ENXIO; 371 372 return (error); 373} 374 375static int 376linux_dev_poll(struct cdev *dev, int events, struct thread *td) 377{ 378 struct linux_cdev *ldev; 379 struct linux_file *filp; 380 struct file *file; 381 int revents; 382 int error; 383 384 file = curthread->td_fpop; 385 ldev = dev->si_drv1; 386 if (ldev == NULL) 387 return (0); 388 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 389 return (error); 390 filp->f_flags = file->f_flag; 391 if (filp->f_op->poll) 392 revents = filp->f_op->poll(filp, NULL) & events; 393 else 394 revents = 0; 395 396 return (revents); 397} 398 399static int 400linux_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, 401 int nprot, vm_memattr_t *memattr) 402{ 403 404 /* XXX memattr not honored. */ 405 *paddr = offset; 406 return (0); 407} 408 409static int 410linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 411 vm_size_t size, struct vm_object **object, int nprot) 412{ 413 struct linux_cdev *ldev; 414 struct linux_file *filp; 415 struct file *file; 416 struct vm_area_struct vma; 417 vm_paddr_t paddr; 418 vm_page_t m; 419 int error; 420 421 file = curthread->td_fpop; 422 ldev = dev->si_drv1; 423 if (ldev == NULL) 424 return (ENODEV); 425 if (size != PAGE_SIZE) 426 return (EINVAL); 427 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 428 return (error); 429 filp->f_flags = file->f_flag; 430 vma.vm_start = 0; 431 vma.vm_end = PAGE_SIZE; 432 vma.vm_pgoff = *offset / PAGE_SIZE; 433 vma.vm_pfn = 0; 434 vma.vm_page_prot = 0; 435 if (filp->f_op->mmap) { 436 error = -filp->f_op->mmap(filp, &vma); 437 if (error == 0) { 438 paddr = (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT; 439 *offset = paddr; 440 m = PHYS_TO_VM_PAGE(paddr); 441 *object = vm_pager_allocate(OBJT_DEVICE, dev, 442 PAGE_SIZE, nprot, *offset, curthread->td_ucred); 443 if (*object == NULL) 444 return (EINVAL); 445 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) 446 pmap_page_set_memattr(m, vma.vm_page_prot); 447 } 448 } else 449 error = ENODEV; 450 451 return (error); 452} 453 454struct cdevsw linuxcdevsw = { 455 .d_version = D_VERSION, 456 .d_flags = D_TRACKCLOSE, 457 .d_open = linux_dev_open, 458 .d_close = linux_dev_close, 459 .d_read = linux_dev_read, 460 .d_write = linux_dev_write, 461 .d_ioctl = linux_dev_ioctl, 462 .d_mmap_single = linux_dev_mmap_single, 463 .d_mmap = linux_dev_mmap, 464 .d_poll = linux_dev_poll, 465}; 466 467static int 468linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 469 int flags, struct thread *td) 470{ 471 struct linux_file *filp; 472 ssize_t bytes; 473 int error; 474 475 error = 0; 476 filp = (struct linux_file *)file->f_data; 477 filp->f_flags = file->f_flag; 478 if (uio->uio_iovcnt != 1) 479 panic("linux_file_read: uio %p iovcnt %d", 480 uio, uio->uio_iovcnt); 481 if (filp->f_op->read) { 482 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 483 uio->uio_iov->iov_len, &uio->uio_offset); 484 if (bytes >= 0) { 485 uio->uio_iov->iov_base += bytes; 486 uio->uio_iov->iov_len -= bytes; 487 uio->uio_resid -= bytes; 488 } else 489 error = -bytes; 490 } else 491 error = ENXIO; 492 493 return (error); 494} 495 496static int 497linux_file_poll(struct file *file, int events, struct ucred *active_cred, 498 struct thread *td) 499{ 500 struct linux_file *filp; 501 int revents; 502 503 filp = (struct linux_file *)file->f_data; 504 filp->f_flags = file->f_flag; 505 if (filp->f_op->poll) 506 revents = filp->f_op->poll(filp, NULL) & events; 507 else 508 revents = 0; 509 510 return (0); 511} 512 513static int 514linux_file_close(struct file *file, struct thread *td) 515{ 516 struct linux_file *filp; 517 int error; 518 519 filp = (struct linux_file *)file->f_data; 520 filp->f_flags = file->f_flag; 521 error = -filp->f_op->release(NULL, filp); 522 funsetown(&filp->f_sigio); 523 kfree(filp); 524 525 return (error); 526} 527 528static int 529linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 530 struct thread *td) 531{ 532 struct linux_file *filp; 533 int error; 534 535 filp = (struct linux_file *)fp->f_data; 536 filp->f_flags = fp->f_flag; 537 error = 0; 538 539 switch (cmd) { 540 case FIONBIO: 541 break; 542 case FIOASYNC: 543 if (filp->f_op->fasync == NULL) 544 break; 545 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 546 break; 547 case FIOSETOWN: 548 error = fsetown(*(int *)data, &filp->f_sigio); 549 if (error == 0) 550 error = filp->f_op->fasync(0, filp, 551 fp->f_flag & FASYNC); 552 break; 553 case FIOGETOWN: 554 *(int *)data = fgetown(&filp->f_sigio); 555 break; 556 default: 557 error = ENOTTY; 558 break; 559 } 560 return (error); 561} 562 563struct fileops linuxfileops = { 564 .fo_read = linux_file_read, 565 .fo_poll = linux_file_poll, 566 .fo_close = linux_file_close, 567 .fo_ioctl = linux_file_ioctl, 568 .fo_chmod = invfo_chmod, 569 .fo_chown = invfo_chown, 570 .fo_sendfile = invfo_sendfile, 571}; 572 573/* 574 * Hash of vmmap addresses. This is infrequently accessed and does not 575 * need to be particularly large. This is done because we must store the 576 * caller's idea of the map size to properly unmap. 577 */ 578struct vmmap { 579 LIST_ENTRY(vmmap) vm_next; 580 void *vm_addr; 581 unsigned long vm_size; 582}; 583 584LIST_HEAD(vmmaphd, vmmap); 585#define VMMAP_HASH_SIZE 64 586#define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 587#define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 588static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 589static struct mtx vmmaplock; 590 591static void 592vmmap_add(void *addr, unsigned long size) 593{ 594 struct vmmap *vmmap; 595 596 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 597 mtx_lock(&vmmaplock); 598 vmmap->vm_size = size; 599 vmmap->vm_addr = addr; 600 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 601 mtx_unlock(&vmmaplock); 602} 603 604static struct vmmap * 605vmmap_remove(void *addr) 606{ 607 struct vmmap *vmmap; 608 609 mtx_lock(&vmmaplock); 610 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 611 if (vmmap->vm_addr == addr) 612 break; 613 if (vmmap) 614 LIST_REMOVE(vmmap, vm_next); 615 mtx_unlock(&vmmaplock); 616 617 return (vmmap); 618} 619 620void * 621_ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 622{ 623 void *addr; 624 625 addr = pmap_mapdev_attr(phys_addr, size, attr); 626 if (addr == NULL) 627 return (NULL); 628 vmmap_add(addr, size); 629 630 return (addr); 631} 632 633void 634iounmap(void *addr) 635{ 636 struct vmmap *vmmap; 637 638 vmmap = vmmap_remove(addr); 639 if (vmmap == NULL) 640 return; 641 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 642 kfree(vmmap); 643} 644 645 646void * 647vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 648{ 649 vm_offset_t off; 650 size_t size; 651 652 size = count * PAGE_SIZE; 653 off = kva_alloc(size); 654 if (off == 0) 655 return (NULL); 656 vmmap_add((void *)off, size); 657 pmap_qenter(off, pages, count); 658 659 return ((void *)off); 660} 661 662void 663vunmap(void *addr) 664{ 665 struct vmmap *vmmap; 666 667 vmmap = vmmap_remove(addr); 668 if (vmmap == NULL) 669 return; 670 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 671 kva_free((vm_offset_t)addr, vmmap->vm_size); 672 kfree(vmmap); 673} 674 675static void 676linux_compat_init(void) 677{ 678 struct sysctl_oid *rootoid; 679 int i; 680 681 rootoid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(), 682 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 683 kobject_init(&class_root, &class_ktype); 684 kobject_set_name(&class_root, "class"); 685 class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 686 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 687 kobject_init(&linux_rootdev.kobj, &dev_ktype); 688 kobject_set_name(&linux_rootdev.kobj, "device"); 689 linux_rootdev.kobj.oidp = SYSCTL_ADD_NODE(NULL, 690 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 691 "device"); 692 linux_rootdev.bsddev = root_bus; 693 miscclass.name = "misc"; 694 class_register(&miscclass); 695 INIT_LIST_HEAD(&pci_drivers); 696 INIT_LIST_HEAD(&pci_devices); 697 spin_lock_init(&pci_lock); 698 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 699 for (i = 0; i < VMMAP_HASH_SIZE; i++) 700 LIST_INIT(&vmmaphead[i]); 701} 702 703SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 704