linux_compat.c revision 219820
1/*- 2 * Copyright (c) 2010 Isilon Systems, Inc. 3 * Copyright (c) 2010 iX Systems, Inc. 4 * Copyright (c) 2010 Panasas, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/param.h> 30#include <sys/systm.h> 31#include <sys/malloc.h> 32#include <sys/kernel.h> 33#include <sys/sysctl.h> 34#include <sys/lock.h> 35#include <sys/mutex.h> 36#include <sys/bus.h> 37#include <sys/fcntl.h> 38#include <sys/file.h> 39#include <sys/filio.h> 40 41#include <vm/vm.h> 42#include <vm/pmap.h> 43 44#include <machine/stdarg.h> 45#include <machine/pmap.h> 46 47#include <linux/kobject.h> 48#include <linux/device.h> 49#include <linux/slab.h> 50#include <linux/module.h> 51#include <linux/cdev.h> 52#include <linux/file.h> 53#include <linux/sysfs.h> 54#include <linux/mm.h> 55#include <linux/io.h> 56#include <linux/vmalloc.h> 57 58#include <vm/vm_pager.h> 59 60MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compat"); 61 62#include <linux/rbtree.h> 63/* Undo Linux compat changes. */ 64#undef RB_ROOT 65#undef file 66#undef cdev 67#define RB_ROOT(head) (head)->rbh_root 68#undef LIST_HEAD 69/* From sys/queue.h */ 70#define LIST_HEAD(name, type) \ 71struct name { \ 72 struct type *lh_first; /* first element */ \ 73} 74 75struct kobject class_root; 76struct device linux_rootdev; 77struct class miscclass; 78struct list_head pci_drivers; 79struct list_head pci_devices; 80spinlock_t pci_lock; 81 82int 83panic_cmp(struct rb_node *one, struct rb_node *two) 84{ 85 panic("no cmp"); 86} 87 88RB_GENERATE(linux_root, rb_node, __entry, panic_cmp); 89 90int 91kobject_set_name(struct kobject *kobj, const char *fmt, ...) 92{ 93 va_list args; 94 int error; 95 96 va_start(args, fmt); 97 error = kobject_set_name_vargs(kobj, fmt, args); 98 va_end(args); 99 100 return (error); 101} 102 103static inline int 104kobject_add_complete(struct kobject *kobj, struct kobject *parent) 105{ 106 struct kobj_type *t; 107 int error; 108 109 kobj->parent = kobject_get(parent); 110 error = sysfs_create_dir(kobj); 111 if (error == 0 && kobj->ktype && kobj->ktype->default_attrs) { 112 struct attribute **attr; 113 t = kobj->ktype; 114 115 for (attr = t->default_attrs; *attr != NULL; attr++) { 116 error = sysfs_create_file(kobj, *attr); 117 if (error) 118 break; 119 } 120 if (error) 121 sysfs_remove_dir(kobj); 122 123 } 124 return (error); 125} 126 127int 128kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) 129{ 130 va_list args; 131 int error; 132 133 va_start(args, fmt); 134 error = kobject_set_name_vargs(kobj, fmt, args); 135 va_end(args); 136 if (error) 137 return (error); 138 139 return kobject_add_complete(kobj, parent); 140} 141 142void 143kobject_release(struct kref *kref) 144{ 145 struct kobject *kobj; 146 char *name; 147 148 kobj = container_of(kref, struct kobject, kref); 149 sysfs_remove_dir(kobj); 150 if (kobj->parent) 151 kobject_put(kobj->parent); 152 kobj->parent = NULL; 153 name = kobj->name; 154 if (kobj->ktype && kobj->ktype->release) 155 kobj->ktype->release(kobj); 156 kfree(name); 157} 158 159static void 160kobject_kfree(struct kobject *kobj) 161{ 162 163 kfree(kobj); 164} 165 166struct kobj_type kfree_type = { .release = kobject_kfree }; 167 168struct device * 169device_create(struct class *class, struct device *parent, dev_t devt, 170 void *drvdata, const char *fmt, ...) 171{ 172 struct device *dev; 173 va_list args; 174 175 dev = kzalloc(sizeof(*dev), M_WAITOK); 176 dev->parent = parent; 177 dev->class = class; 178 dev->devt = devt; 179 dev->driver_data = drvdata; 180 va_start(args, fmt); 181 kobject_set_name_vargs(&dev->kobj, fmt, args); 182 va_end(args); 183 device_register(dev); 184 185 return (dev); 186} 187 188int 189kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype, 190 struct kobject *parent, const char *fmt, ...) 191{ 192 va_list args; 193 int error; 194 195 kobject_init(kobj, ktype); 196 kobj->ktype = ktype; 197 kobj->parent = parent; 198 kobj->name = NULL; 199 200 va_start(args, fmt); 201 error = kobject_set_name_vargs(kobj, fmt, args); 202 va_end(args); 203 if (error) 204 return (error); 205 return kobject_add_complete(kobj, parent); 206} 207 208static void 209linux_file_dtor(void *cdp) 210{ 211 struct linux_file *filp; 212 213 filp = cdp; 214 filp->f_op->release(curthread->td_fpop->f_vnode, filp); 215 kfree(filp); 216} 217 218static int 219linux_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) 220{ 221 struct linux_cdev *ldev; 222 struct linux_file *filp; 223 struct file *file; 224 int error; 225 226 file = curthread->td_fpop; 227 ldev = dev->si_drv1; 228 if (ldev == NULL) 229 return (ENODEV); 230 filp = kzalloc(sizeof(*filp), GFP_KERNEL); 231 filp->f_dentry = &filp->f_dentry_store; 232 filp->f_op = ldev->ops; 233 filp->f_flags = file->f_flag; 234 if (filp->f_op->open) { 235 error = -filp->f_op->open(file->f_vnode, filp); 236 if (error) { 237 kfree(filp); 238 return (error); 239 } 240 } 241 error = devfs_set_cdevpriv(filp, linux_file_dtor); 242 if (error) { 243 filp->f_op->release(file->f_vnode, filp); 244 kfree(filp); 245 return (error); 246 } 247 248 return 0; 249} 250 251static int 252linux_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) 253{ 254 struct linux_cdev *ldev; 255 struct linux_file *filp; 256 struct file *file; 257 int error; 258 259 file = curthread->td_fpop; 260 ldev = dev->si_drv1; 261 if (ldev == NULL) 262 return (0); 263 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 264 return (error); 265 filp->f_flags = file->f_flag; 266 devfs_clear_cdevpriv(); 267 268 return (0); 269} 270 271static int 272linux_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 273 struct thread *td) 274{ 275 struct linux_cdev *ldev; 276 struct linux_file *filp; 277 struct file *file; 278 int error; 279 280 file = curthread->td_fpop; 281 ldev = dev->si_drv1; 282 if (ldev == NULL) 283 return (0); 284 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 285 return (error); 286 filp->f_flags = file->f_flag; 287 /* 288 * Linux does not have a generic ioctl copyin/copyout layer. All 289 * linux ioctls must be converted to void ioctls which pass a 290 * pointer to the address of the data. We want the actual user 291 * address so we dereference here. 292 */ 293 data = *(void **)data; 294 if (filp->f_op->unlocked_ioctl) 295 error = -filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data); 296 else 297 error = ENOTTY; 298 299 return (error); 300} 301 302static int 303linux_dev_read(struct cdev *dev, struct uio *uio, int ioflag) 304{ 305 struct linux_cdev *ldev; 306 struct linux_file *filp; 307 struct file *file; 308 ssize_t bytes; 309 int error; 310 311 file = curthread->td_fpop; 312 ldev = dev->si_drv1; 313 if (ldev == NULL) 314 return (0); 315 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 316 return (error); 317 filp->f_flags = file->f_flag; 318 if (uio->uio_iovcnt != 1) 319 panic("linux_dev_read: uio %p iovcnt %d", 320 uio, uio->uio_iovcnt); 321 if (filp->f_op->read) { 322 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 323 uio->uio_iov->iov_len, &uio->uio_offset); 324 if (bytes >= 0) { 325 uio->uio_iov->iov_base += bytes; 326 uio->uio_iov->iov_len -= bytes; 327 uio->uio_resid -= bytes; 328 } else 329 error = -bytes; 330 } else 331 error = ENXIO; 332 333 return (error); 334} 335 336static int 337linux_dev_write(struct cdev *dev, struct uio *uio, int ioflag) 338{ 339 struct linux_cdev *ldev; 340 struct linux_file *filp; 341 struct file *file; 342 ssize_t bytes; 343 int error; 344 345 file = curthread->td_fpop; 346 ldev = dev->si_drv1; 347 if (ldev == NULL) 348 return (0); 349 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 350 return (error); 351 filp->f_flags = file->f_flag; 352 if (uio->uio_iovcnt != 1) 353 panic("linux_dev_write: uio %p iovcnt %d", 354 uio, uio->uio_iovcnt); 355 if (filp->f_op->write) { 356 bytes = filp->f_op->write(filp, uio->uio_iov->iov_base, 357 uio->uio_iov->iov_len, &uio->uio_offset); 358 if (bytes >= 0) { 359 uio->uio_iov->iov_base += bytes; 360 uio->uio_iov->iov_len -= bytes; 361 uio->uio_resid -= bytes; 362 } else 363 error = -bytes; 364 } else 365 error = ENXIO; 366 367 return (error); 368} 369 370static int 371linux_dev_poll(struct cdev *dev, int events, struct thread *td) 372{ 373 struct linux_cdev *ldev; 374 struct linux_file *filp; 375 struct file *file; 376 int revents; 377 int error; 378 379 file = curthread->td_fpop; 380 ldev = dev->si_drv1; 381 if (ldev == NULL) 382 return (0); 383 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 384 return (error); 385 filp->f_flags = file->f_flag; 386 if (filp->f_op->poll) 387 revents = filp->f_op->poll(filp, NULL) & events; 388 else 389 revents = 0; 390 391 return (revents); 392} 393 394static int 395linux_dev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr, 396 int nprot, vm_memattr_t *memattr) 397{ 398 399 /* XXX memattr not honored. */ 400 *paddr = offset; 401 return (0); 402} 403 404static int 405linux_dev_mmap_single(struct cdev *dev, vm_ooffset_t *offset, 406 vm_size_t size, struct vm_object **object, int nprot) 407{ 408 struct linux_cdev *ldev; 409 struct linux_file *filp; 410 struct file *file; 411 struct vm_area_struct vma; 412 vm_paddr_t paddr; 413 vm_page_t m; 414 int error; 415 416 file = curthread->td_fpop; 417 ldev = dev->si_drv1; 418 if (ldev == NULL) 419 return (ENODEV); 420 if (size != PAGE_SIZE) 421 return (EINVAL); 422 if ((error = devfs_get_cdevpriv((void **)&filp)) != 0) 423 return (error); 424 filp->f_flags = file->f_flag; 425 vma.vm_start = 0; 426 vma.vm_end = PAGE_SIZE; 427 vma.vm_pgoff = *offset / PAGE_SIZE; 428 vma.vm_pfn = 0; 429 vma.vm_page_prot = 0; 430 if (filp->f_op->mmap) { 431 error = -filp->f_op->mmap(filp, &vma); 432 if (error == 0) { 433 paddr = (vm_paddr_t)vma.vm_pfn << PAGE_SHIFT; 434 *offset = paddr; 435 m = PHYS_TO_VM_PAGE(paddr); 436 *object = vm_pager_allocate(OBJT_DEVICE, dev, 437 PAGE_SIZE, nprot, *offset, curthread->td_ucred); 438 if (*object == NULL) 439 return (EINVAL); 440 if (vma.vm_page_prot != VM_MEMATTR_DEFAULT) 441 pmap_page_set_memattr(m, vma.vm_page_prot); 442 } 443 } else 444 error = ENODEV; 445 446 return (error); 447} 448 449struct cdevsw linuxcdevsw = { 450 .d_version = D_VERSION, 451 .d_flags = D_TRACKCLOSE, 452 .d_open = linux_dev_open, 453 .d_close = linux_dev_close, 454 .d_read = linux_dev_read, 455 .d_write = linux_dev_write, 456 .d_ioctl = linux_dev_ioctl, 457 .d_mmap_single = linux_dev_mmap_single, 458 .d_mmap = linux_dev_mmap, 459 .d_poll = linux_dev_poll, 460}; 461 462static int 463linux_file_read(struct file *file, struct uio *uio, struct ucred *active_cred, 464 int flags, struct thread *td) 465{ 466 struct linux_file *filp; 467 ssize_t bytes; 468 int error; 469 470 error = 0; 471 filp = (struct linux_file *)file->f_data; 472 filp->f_flags = file->f_flag; 473 if (uio->uio_iovcnt != 1) 474 panic("linux_file_read: uio %p iovcnt %d", 475 uio, uio->uio_iovcnt); 476 if (filp->f_op->read) { 477 bytes = filp->f_op->read(filp, uio->uio_iov->iov_base, 478 uio->uio_iov->iov_len, &uio->uio_offset); 479 if (bytes >= 0) { 480 uio->uio_iov->iov_base += bytes; 481 uio->uio_iov->iov_len -= bytes; 482 uio->uio_resid -= bytes; 483 } else 484 error = -bytes; 485 } else 486 error = ENXIO; 487 488 return (error); 489} 490 491static int 492linux_file_poll(struct file *file, int events, struct ucred *active_cred, 493 struct thread *td) 494{ 495 struct linux_file *filp; 496 int revents; 497 498 filp = (struct linux_file *)file->f_data; 499 filp->f_flags = file->f_flag; 500 if (filp->f_op->poll) 501 revents = filp->f_op->poll(filp, NULL) & events; 502 else 503 revents = 0; 504 505 return (0); 506} 507 508static int 509linux_file_close(struct file *file, struct thread *td) 510{ 511 struct linux_file *filp; 512 int error; 513 514 filp = (struct linux_file *)file->f_data; 515 filp->f_flags = file->f_flag; 516 error = -filp->f_op->release(NULL, filp); 517 funsetown(&filp->f_sigio); 518 kfree(filp); 519 520 return (error); 521} 522 523static int 524linux_file_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *cred, 525 struct thread *td) 526{ 527 struct linux_file *filp; 528 int error; 529 530 filp = (struct linux_file *)fp->f_data; 531 filp->f_flags = fp->f_flag; 532 error = 0; 533 534 switch (cmd) { 535 case FIONBIO: 536 break; 537 case FIOASYNC: 538 if (filp->f_op->fasync == NULL) 539 break; 540 error = filp->f_op->fasync(0, filp, fp->f_flag & FASYNC); 541 break; 542 case FIOSETOWN: 543 error = fsetown(*(int *)data, &filp->f_sigio); 544 if (error == 0) 545 error = filp->f_op->fasync(0, filp, 546 fp->f_flag & FASYNC); 547 break; 548 case FIOGETOWN: 549 *(int *)data = fgetown(&filp->f_sigio); 550 break; 551 default: 552 error = ENOTTY; 553 break; 554 } 555 return (error); 556} 557 558struct fileops linuxfileops = { 559 .fo_read = linux_file_read, 560 .fo_poll = linux_file_poll, 561 .fo_close = linux_file_close, 562 .fo_ioctl = linux_file_ioctl 563}; 564 565/* 566 * Hash of vmmap addresses. This is infrequently accessed and does not 567 * need to be particularly large. This is done because we must store the 568 * caller's idea of the map size to properly unmap. 569 */ 570struct vmmap { 571 LIST_ENTRY(vmmap) vm_next; 572 void *vm_addr; 573 unsigned long vm_size; 574}; 575 576LIST_HEAD(vmmaphd, vmmap); 577#define VMMAP_HASH_SIZE 64 578#define VMMAP_HASH_MASK (VMMAP_HASH_SIZE - 1) 579#define VM_HASH(addr) ((uintptr_t)(addr) >> PAGE_SHIFT) & VMMAP_HASH_MASK 580static struct vmmaphd vmmaphead[VMMAP_HASH_SIZE]; 581static struct mtx vmmaplock; 582 583static void 584vmmap_add(void *addr, unsigned long size) 585{ 586 struct vmmap *vmmap; 587 588 vmmap = kmalloc(sizeof(*vmmap), GFP_KERNEL); 589 mtx_lock(&vmmaplock); 590 vmmap->vm_size = size; 591 vmmap->vm_addr = addr; 592 LIST_INSERT_HEAD(&vmmaphead[VM_HASH(addr)], vmmap, vm_next); 593 mtx_unlock(&vmmaplock); 594} 595 596static struct vmmap * 597vmmap_remove(void *addr) 598{ 599 struct vmmap *vmmap; 600 601 mtx_lock(&vmmaplock); 602 LIST_FOREACH(vmmap, &vmmaphead[VM_HASH(addr)], vm_next) 603 if (vmmap->vm_addr == addr) 604 break; 605 if (vmmap) 606 LIST_REMOVE(vmmap, vm_next); 607 mtx_unlock(&vmmaplock); 608 609 return (vmmap); 610} 611 612void * 613_ioremap_attr(vm_paddr_t phys_addr, unsigned long size, int attr) 614{ 615 void *addr; 616 617 addr = pmap_mapdev_attr(phys_addr, size, attr); 618 if (addr == NULL) 619 return (NULL); 620 vmmap_add(addr, size); 621 622 return (addr); 623} 624 625void 626iounmap(void *addr) 627{ 628 struct vmmap *vmmap; 629 630 vmmap = vmmap_remove(addr); 631 if (vmmap == NULL) 632 return; 633 pmap_unmapdev((vm_offset_t)addr, vmmap->vm_size); 634 kfree(vmmap); 635} 636 637 638void * 639vmap(struct page **pages, unsigned int count, unsigned long flags, int prot) 640{ 641 vm_offset_t off; 642 size_t size; 643 644 size = count * PAGE_SIZE; 645 off = kmem_alloc_nofault(kernel_map, size); 646 if (off == 0) 647 return (NULL); 648 vmmap_add((void *)off, size); 649 pmap_qenter(off, pages, count); 650 651 return ((void *)off); 652} 653 654void 655vunmap(void *addr) 656{ 657 struct vmmap *vmmap; 658 659 vmmap = vmmap_remove(addr); 660 if (vmmap == NULL) 661 return; 662 pmap_qremove((vm_offset_t)addr, vmmap->vm_size / PAGE_SIZE); 663 kmem_free(kernel_map, (vm_offset_t)addr, vmmap->vm_size); 664 kfree(vmmap); 665} 666 667static void 668linux_compat_init(void) 669{ 670 struct sysctl_oid *rootoid; 671 int i; 672 673 rootoid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(), 674 OID_AUTO, "sys", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "sys"); 675 kobject_init(&class_root, &class_ktype); 676 kobject_set_name(&class_root, "class"); 677 class_root.oidp = SYSCTL_ADD_NODE(NULL, SYSCTL_CHILDREN(rootoid), 678 OID_AUTO, "class", CTLFLAG_RD|CTLFLAG_MPSAFE, NULL, "class"); 679 kobject_init(&linux_rootdev.kobj, &dev_ktype); 680 kobject_set_name(&linux_rootdev.kobj, "device"); 681 linux_rootdev.kobj.oidp = SYSCTL_ADD_NODE(NULL, 682 SYSCTL_CHILDREN(rootoid), OID_AUTO, "device", CTLFLAG_RD, NULL, 683 "device"); 684 linux_rootdev.bsddev = root_bus; 685 miscclass.name = "misc"; 686 class_register(&miscclass); 687 INIT_LIST_HEAD(&pci_drivers); 688 INIT_LIST_HEAD(&pci_devices); 689 spin_lock_init(&pci_lock); 690 mtx_init(&vmmaplock, "IO Map lock", NULL, MTX_DEF); 691 for (i = 0; i < VMMAP_HASH_SIZE; i++) 692 LIST_INIT(&vmmaphead[i]); 693} 694 695SYSINIT(linux_compat, SI_SUB_DRIVERS, SI_ORDER_SECOND, linux_compat_init, NULL); 696