1// SPDX-License-Identifier: GPL-2.0 2/* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7#include <linux/fs.h> 8#include <linux/dax.h> 9#include <linux/pci.h> 10#include <linux/pfn_t.h> 11#include <linux/memremap.h> 12#include <linux/module.h> 13#include <linux/virtio.h> 14#include <linux/virtio_fs.h> 15#include <linux/delay.h> 16#include <linux/fs_context.h> 17#include <linux/fs_parser.h> 18#include <linux/highmem.h> 19#include <linux/cleanup.h> 20#include <linux/uio.h> 21#include "fuse_i.h" 22 23/* Used to help calculate the FUSE connection's max_pages limit for a request's 24 * size. Parts of the struct fuse_req are sliced into scattergather lists in 25 * addition to the pages used, so this can help account for that overhead. 26 */ 27#define FUSE_HEADER_OVERHEAD 4 28 29/* List of virtio-fs device instances and a lock for the list. Also provides 30 * mutual exclusion in device removal and mounting path 31 */ 32static DEFINE_MUTEX(virtio_fs_mutex); 33static LIST_HEAD(virtio_fs_instances); 34 35/* The /sys/fs/virtio_fs/ kset */ 36static struct kset *virtio_fs_kset; 37 38enum { 39 VQ_HIPRIO, 40 VQ_REQUEST 41}; 42 43#define VQ_NAME_LEN 24 44 45/* Per-virtqueue state */ 46struct virtio_fs_vq { 47 spinlock_t lock; 48 struct virtqueue *vq; /* protected by ->lock */ 49 struct work_struct done_work; 50 struct list_head queued_reqs; 51 struct list_head end_reqs; /* End these requests */ 52 struct delayed_work dispatch_work; 53 struct fuse_dev *fud; 54 bool connected; 55 long in_flight; 56 struct completion in_flight_zero; /* No inflight requests */ 57 char name[VQ_NAME_LEN]; 58} ____cacheline_aligned_in_smp; 59 60/* A virtio-fs device instance */ 61struct virtio_fs { 62 struct kobject kobj; 63 struct list_head list; /* on virtio_fs_instances */ 64 char *tag; 65 struct virtio_fs_vq *vqs; 66 unsigned int nvqs; /* number of virtqueues */ 67 unsigned int num_request_queues; /* number of request queues */ 68 struct dax_device *dax_dev; 69 70 /* DAX memory window where file contents are mapped */ 71 void *window_kaddr; 72 phys_addr_t window_phys_addr; 73 size_t window_len; 74}; 75 76struct virtio_fs_forget_req { 77 struct fuse_in_header ih; 78 struct fuse_forget_in arg; 79}; 80 81struct virtio_fs_forget { 82 /* This request can be temporarily queued on virt queue */ 83 struct list_head list; 84 struct virtio_fs_forget_req req; 85}; 86 87struct virtio_fs_req_work { 88 struct fuse_req *req; 89 struct virtio_fs_vq *fsvq; 90 struct work_struct done_work; 91}; 92 93static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 94 struct fuse_req *req, bool in_flight); 95 96static const struct constant_table dax_param_enums[] = { 97 {"always", FUSE_DAX_ALWAYS }, 98 {"never", FUSE_DAX_NEVER }, 99 {"inode", FUSE_DAX_INODE_USER }, 100 {} 101}; 102 103enum { 104 OPT_DAX, 105 OPT_DAX_ENUM, 106}; 107 108static const struct fs_parameter_spec virtio_fs_parameters[] = { 109 fsparam_flag("dax", OPT_DAX), 110 fsparam_enum("dax", OPT_DAX_ENUM, dax_param_enums), 111 {} 112}; 113 114static int virtio_fs_parse_param(struct fs_context *fsc, 115 struct fs_parameter *param) 116{ 117 struct fs_parse_result result; 118 struct fuse_fs_context *ctx = fsc->fs_private; 119 int opt; 120 121 opt = fs_parse(fsc, virtio_fs_parameters, param, &result); 122 if (opt < 0) 123 return opt; 124 125 switch (opt) { 126 case OPT_DAX: 127 ctx->dax_mode = FUSE_DAX_ALWAYS; 128 break; 129 case OPT_DAX_ENUM: 130 ctx->dax_mode = result.uint_32; 131 break; 132 default: 133 return -EINVAL; 134 } 135 136 return 0; 137} 138 139static void virtio_fs_free_fsc(struct fs_context *fsc) 140{ 141 struct fuse_fs_context *ctx = fsc->fs_private; 142 143 kfree(ctx); 144} 145 146static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 147{ 148 struct virtio_fs *fs = vq->vdev->priv; 149 150 return &fs->vqs[vq->index]; 151} 152 153/* Should be called with fsvq->lock held. */ 154static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 155{ 156 fsvq->in_flight++; 157} 158 159/* Should be called with fsvq->lock held. */ 160static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 161{ 162 WARN_ON(fsvq->in_flight <= 0); 163 fsvq->in_flight--; 164 if (!fsvq->in_flight) 165 complete(&fsvq->in_flight_zero); 166} 167 168static ssize_t tag_show(struct kobject *kobj, 169 struct kobj_attribute *attr, char *buf) 170{ 171 struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); 172 173 return sysfs_emit(buf, fs->tag); 174} 175 176static struct kobj_attribute virtio_fs_tag_attr = __ATTR_RO(tag); 177 178static struct attribute *virtio_fs_attrs[] = { 179 &virtio_fs_tag_attr.attr, 180 NULL 181}; 182ATTRIBUTE_GROUPS(virtio_fs); 183 184static void virtio_fs_ktype_release(struct kobject *kobj) 185{ 186 struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj); 187 188 kfree(vfs->vqs); 189 kfree(vfs); 190} 191 192static const struct kobj_type virtio_fs_ktype = { 193 .release = virtio_fs_ktype_release, 194 .sysfs_ops = &kobj_sysfs_ops, 195 .default_groups = virtio_fs_groups, 196}; 197 198/* Make sure virtiofs_mutex is held */ 199static void virtio_fs_put(struct virtio_fs *fs) 200{ 201 kobject_put(&fs->kobj); 202} 203 204static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 205{ 206 struct virtio_fs *vfs = fiq->priv; 207 208 mutex_lock(&virtio_fs_mutex); 209 virtio_fs_put(vfs); 210 mutex_unlock(&virtio_fs_mutex); 211} 212 213static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 214{ 215 WARN_ON(fsvq->in_flight < 0); 216 217 /* Wait for in flight requests to finish.*/ 218 spin_lock(&fsvq->lock); 219 if (fsvq->in_flight) { 220 /* We are holding virtio_fs_mutex. There should not be any 221 * waiters waiting for completion. 222 */ 223 reinit_completion(&fsvq->in_flight_zero); 224 spin_unlock(&fsvq->lock); 225 wait_for_completion(&fsvq->in_flight_zero); 226 } else { 227 spin_unlock(&fsvq->lock); 228 } 229 230 flush_work(&fsvq->done_work); 231 flush_delayed_work(&fsvq->dispatch_work); 232} 233 234static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) 235{ 236 struct virtio_fs_vq *fsvq; 237 int i; 238 239 for (i = 0; i < fs->nvqs; i++) { 240 fsvq = &fs->vqs[i]; 241 virtio_fs_drain_queue(fsvq); 242 } 243} 244 245static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 246{ 247 /* Provides mutual exclusion between ->remove and ->kill_sb 248 * paths. We don't want both of these draining queue at the 249 * same time. Current completion logic reinits completion 250 * and that means there should not be any other thread 251 * doing reinit or waiting for completion already. 252 */ 253 mutex_lock(&virtio_fs_mutex); 254 virtio_fs_drain_all_queues_locked(fs); 255 mutex_unlock(&virtio_fs_mutex); 256} 257 258static void virtio_fs_start_all_queues(struct virtio_fs *fs) 259{ 260 struct virtio_fs_vq *fsvq; 261 int i; 262 263 for (i = 0; i < fs->nvqs; i++) { 264 fsvq = &fs->vqs[i]; 265 spin_lock(&fsvq->lock); 266 fsvq->connected = true; 267 spin_unlock(&fsvq->lock); 268 } 269} 270 271/* Add a new instance to the list or return -EEXIST if tag name exists*/ 272static int virtio_fs_add_instance(struct virtio_device *vdev, 273 struct virtio_fs *fs) 274{ 275 struct virtio_fs *fs2; 276 int ret; 277 278 mutex_lock(&virtio_fs_mutex); 279 280 list_for_each_entry(fs2, &virtio_fs_instances, list) { 281 if (strcmp(fs->tag, fs2->tag) == 0) { 282 mutex_unlock(&virtio_fs_mutex); 283 return -EEXIST; 284 } 285 } 286 287 /* Use the virtio_device's index as a unique identifier, there is no 288 * need to allocate our own identifiers because the virtio_fs instance 289 * is only visible to userspace as long as the underlying virtio_device 290 * exists. 291 */ 292 fs->kobj.kset = virtio_fs_kset; 293 ret = kobject_add(&fs->kobj, NULL, "%d", vdev->index); 294 if (ret < 0) { 295 mutex_unlock(&virtio_fs_mutex); 296 return ret; 297 } 298 299 ret = sysfs_create_link(&fs->kobj, &vdev->dev.kobj, "device"); 300 if (ret < 0) { 301 kobject_del(&fs->kobj); 302 mutex_unlock(&virtio_fs_mutex); 303 return ret; 304 } 305 306 list_add_tail(&fs->list, &virtio_fs_instances); 307 308 mutex_unlock(&virtio_fs_mutex); 309 310 kobject_uevent(&fs->kobj, KOBJ_ADD); 311 312 return 0; 313} 314 315/* Return the virtio_fs with a given tag, or NULL */ 316static struct virtio_fs *virtio_fs_find_instance(const char *tag) 317{ 318 struct virtio_fs *fs; 319 320 mutex_lock(&virtio_fs_mutex); 321 322 list_for_each_entry(fs, &virtio_fs_instances, list) { 323 if (strcmp(fs->tag, tag) == 0) { 324 kobject_get(&fs->kobj); 325 goto found; 326 } 327 } 328 329 fs = NULL; /* not found */ 330 331found: 332 mutex_unlock(&virtio_fs_mutex); 333 334 return fs; 335} 336 337static void virtio_fs_free_devs(struct virtio_fs *fs) 338{ 339 unsigned int i; 340 341 for (i = 0; i < fs->nvqs; i++) { 342 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 343 344 if (!fsvq->fud) 345 continue; 346 347 fuse_dev_free(fsvq->fud); 348 fsvq->fud = NULL; 349 } 350} 351 352/* Read filesystem name from virtio config into fs->tag (must kfree()). */ 353static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 354{ 355 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 356 char *end; 357 size_t len; 358 359 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 360 &tag_buf, sizeof(tag_buf)); 361 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 362 if (end == tag_buf) 363 return -EINVAL; /* empty tag */ 364 if (!end) 365 end = &tag_buf[sizeof(tag_buf)]; 366 367 len = end - tag_buf; 368 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 369 if (!fs->tag) 370 return -ENOMEM; 371 memcpy(fs->tag, tag_buf, len); 372 fs->tag[len] = '\0'; 373 374 /* While the VIRTIO specification allows any character, newlines are 375 * awkward on mount(8) command-lines and cause problems in the sysfs 376 * "tag" attr and uevent TAG= properties. Forbid them. 377 */ 378 if (strchr(fs->tag, '\n')) { 379 dev_dbg(&vdev->dev, "refusing virtiofs tag with newline character\n"); 380 return -EINVAL; 381 } 382 383 return 0; 384} 385 386/* Work function for hiprio completion */ 387static void virtio_fs_hiprio_done_work(struct work_struct *work) 388{ 389 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 390 done_work); 391 struct virtqueue *vq = fsvq->vq; 392 393 /* Free completed FUSE_FORGET requests */ 394 spin_lock(&fsvq->lock); 395 do { 396 unsigned int len; 397 void *req; 398 399 virtqueue_disable_cb(vq); 400 401 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 402 kfree(req); 403 dec_in_flight_req(fsvq); 404 } 405 } while (!virtqueue_enable_cb(vq)); 406 spin_unlock(&fsvq->lock); 407} 408 409static void virtio_fs_request_dispatch_work(struct work_struct *work) 410{ 411 struct fuse_req *req; 412 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 413 dispatch_work.work); 414 int ret; 415 416 pr_debug("virtio-fs: worker %s called.\n", __func__); 417 while (1) { 418 spin_lock(&fsvq->lock); 419 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 420 list); 421 if (!req) { 422 spin_unlock(&fsvq->lock); 423 break; 424 } 425 426 list_del_init(&req->list); 427 spin_unlock(&fsvq->lock); 428 fuse_request_end(req); 429 } 430 431 /* Dispatch pending requests */ 432 while (1) { 433 spin_lock(&fsvq->lock); 434 req = list_first_entry_or_null(&fsvq->queued_reqs, 435 struct fuse_req, list); 436 if (!req) { 437 spin_unlock(&fsvq->lock); 438 return; 439 } 440 list_del_init(&req->list); 441 spin_unlock(&fsvq->lock); 442 443 ret = virtio_fs_enqueue_req(fsvq, req, true); 444 if (ret < 0) { 445 if (ret == -ENOMEM || ret == -ENOSPC) { 446 spin_lock(&fsvq->lock); 447 list_add_tail(&req->list, &fsvq->queued_reqs); 448 schedule_delayed_work(&fsvq->dispatch_work, 449 msecs_to_jiffies(1)); 450 spin_unlock(&fsvq->lock); 451 return; 452 } 453 req->out.h.error = ret; 454 spin_lock(&fsvq->lock); 455 dec_in_flight_req(fsvq); 456 spin_unlock(&fsvq->lock); 457 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 458 ret); 459 fuse_request_end(req); 460 } 461 } 462} 463 464/* 465 * Returns 1 if queue is full and sender should wait a bit before sending 466 * next request, 0 otherwise. 467 */ 468static int send_forget_request(struct virtio_fs_vq *fsvq, 469 struct virtio_fs_forget *forget, 470 bool in_flight) 471{ 472 struct scatterlist sg; 473 struct virtqueue *vq; 474 int ret = 0; 475 bool notify; 476 struct virtio_fs_forget_req *req = &forget->req; 477 478 spin_lock(&fsvq->lock); 479 if (!fsvq->connected) { 480 if (in_flight) 481 dec_in_flight_req(fsvq); 482 kfree(forget); 483 goto out; 484 } 485 486 sg_init_one(&sg, req, sizeof(*req)); 487 vq = fsvq->vq; 488 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 489 490 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 491 if (ret < 0) { 492 if (ret == -ENOMEM || ret == -ENOSPC) { 493 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 494 ret); 495 list_add_tail(&forget->list, &fsvq->queued_reqs); 496 schedule_delayed_work(&fsvq->dispatch_work, 497 msecs_to_jiffies(1)); 498 if (!in_flight) 499 inc_in_flight_req(fsvq); 500 /* Queue is full */ 501 ret = 1; 502 } else { 503 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 504 ret); 505 kfree(forget); 506 if (in_flight) 507 dec_in_flight_req(fsvq); 508 } 509 goto out; 510 } 511 512 if (!in_flight) 513 inc_in_flight_req(fsvq); 514 notify = virtqueue_kick_prepare(vq); 515 spin_unlock(&fsvq->lock); 516 517 if (notify) 518 virtqueue_notify(vq); 519 return ret; 520out: 521 spin_unlock(&fsvq->lock); 522 return ret; 523} 524 525static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 526{ 527 struct virtio_fs_forget *forget; 528 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 529 dispatch_work.work); 530 pr_debug("virtio-fs: worker %s called.\n", __func__); 531 while (1) { 532 spin_lock(&fsvq->lock); 533 forget = list_first_entry_or_null(&fsvq->queued_reqs, 534 struct virtio_fs_forget, list); 535 if (!forget) { 536 spin_unlock(&fsvq->lock); 537 return; 538 } 539 540 list_del(&forget->list); 541 spin_unlock(&fsvq->lock); 542 if (send_forget_request(fsvq, forget, true)) 543 return; 544 } 545} 546 547/* Allocate and copy args into req->argbuf */ 548static int copy_args_to_argbuf(struct fuse_req *req) 549{ 550 struct fuse_args *args = req->args; 551 unsigned int offset = 0; 552 unsigned int num_in; 553 unsigned int num_out; 554 unsigned int len; 555 unsigned int i; 556 557 num_in = args->in_numargs - args->in_pages; 558 num_out = args->out_numargs - args->out_pages; 559 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 560 fuse_len_args(num_out, args->out_args); 561 562 req->argbuf = kmalloc(len, GFP_ATOMIC); 563 if (!req->argbuf) 564 return -ENOMEM; 565 566 for (i = 0; i < num_in; i++) { 567 memcpy(req->argbuf + offset, 568 args->in_args[i].value, 569 args->in_args[i].size); 570 offset += args->in_args[i].size; 571 } 572 573 return 0; 574} 575 576/* Copy args out of and free req->argbuf */ 577static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 578{ 579 unsigned int remaining; 580 unsigned int offset; 581 unsigned int num_in; 582 unsigned int num_out; 583 unsigned int i; 584 585 remaining = req->out.h.len - sizeof(req->out.h); 586 num_in = args->in_numargs - args->in_pages; 587 num_out = args->out_numargs - args->out_pages; 588 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 589 590 for (i = 0; i < num_out; i++) { 591 unsigned int argsize = args->out_args[i].size; 592 593 if (args->out_argvar && 594 i == args->out_numargs - 1 && 595 argsize > remaining) { 596 argsize = remaining; 597 } 598 599 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 600 offset += argsize; 601 602 if (i != args->out_numargs - 1) 603 remaining -= argsize; 604 } 605 606 /* Store the actual size of the variable-length arg */ 607 if (args->out_argvar) 608 args->out_args[args->out_numargs - 1].size = remaining; 609 610 kfree(req->argbuf); 611 req->argbuf = NULL; 612} 613 614/* Work function for request completion */ 615static void virtio_fs_request_complete(struct fuse_req *req, 616 struct virtio_fs_vq *fsvq) 617{ 618 struct fuse_pqueue *fpq = &fsvq->fud->pq; 619 struct fuse_args *args; 620 struct fuse_args_pages *ap; 621 unsigned int len, i, thislen; 622 struct page *page; 623 624 /* 625 * TODO verify that server properly follows FUSE protocol 626 * (oh.uniq, oh.len) 627 */ 628 args = req->args; 629 copy_args_from_argbuf(args, req); 630 631 if (args->out_pages && args->page_zeroing) { 632 len = args->out_args[args->out_numargs - 1].size; 633 ap = container_of(args, typeof(*ap), args); 634 for (i = 0; i < ap->num_pages; i++) { 635 thislen = ap->descs[i].length; 636 if (len < thislen) { 637 WARN_ON(ap->descs[i].offset); 638 page = ap->pages[i]; 639 zero_user_segment(page, len, thislen); 640 len = 0; 641 } else { 642 len -= thislen; 643 } 644 } 645 } 646 647 spin_lock(&fpq->lock); 648 clear_bit(FR_SENT, &req->flags); 649 spin_unlock(&fpq->lock); 650 651 fuse_request_end(req); 652 spin_lock(&fsvq->lock); 653 dec_in_flight_req(fsvq); 654 spin_unlock(&fsvq->lock); 655} 656 657static void virtio_fs_complete_req_work(struct work_struct *work) 658{ 659 struct virtio_fs_req_work *w = 660 container_of(work, typeof(*w), done_work); 661 662 virtio_fs_request_complete(w->req, w->fsvq); 663 kfree(w); 664} 665 666static void virtio_fs_requests_done_work(struct work_struct *work) 667{ 668 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 669 done_work); 670 struct fuse_pqueue *fpq = &fsvq->fud->pq; 671 struct virtqueue *vq = fsvq->vq; 672 struct fuse_req *req; 673 struct fuse_req *next; 674 unsigned int len; 675 LIST_HEAD(reqs); 676 677 /* Collect completed requests off the virtqueue */ 678 spin_lock(&fsvq->lock); 679 do { 680 virtqueue_disable_cb(vq); 681 682 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 683 spin_lock(&fpq->lock); 684 list_move_tail(&req->list, &reqs); 685 spin_unlock(&fpq->lock); 686 } 687 } while (!virtqueue_enable_cb(vq)); 688 spin_unlock(&fsvq->lock); 689 690 /* End requests */ 691 list_for_each_entry_safe(req, next, &reqs, list) { 692 list_del_init(&req->list); 693 694 /* blocking async request completes in a worker context */ 695 if (req->args->may_block) { 696 struct virtio_fs_req_work *w; 697 698 w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); 699 INIT_WORK(&w->done_work, virtio_fs_complete_req_work); 700 w->fsvq = fsvq; 701 w->req = req; 702 schedule_work(&w->done_work); 703 } else { 704 virtio_fs_request_complete(req, fsvq); 705 } 706 } 707} 708 709/* Virtqueue interrupt handler */ 710static void virtio_fs_vq_done(struct virtqueue *vq) 711{ 712 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 713 714 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 715 716 schedule_work(&fsvq->done_work); 717} 718 719static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, 720 int vq_type) 721{ 722 strscpy(fsvq->name, name, VQ_NAME_LEN); 723 spin_lock_init(&fsvq->lock); 724 INIT_LIST_HEAD(&fsvq->queued_reqs); 725 INIT_LIST_HEAD(&fsvq->end_reqs); 726 init_completion(&fsvq->in_flight_zero); 727 728 if (vq_type == VQ_REQUEST) { 729 INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 730 INIT_DELAYED_WORK(&fsvq->dispatch_work, 731 virtio_fs_request_dispatch_work); 732 } else { 733 INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 734 INIT_DELAYED_WORK(&fsvq->dispatch_work, 735 virtio_fs_hiprio_dispatch_work); 736 } 737} 738 739/* Initialize virtqueues */ 740static int virtio_fs_setup_vqs(struct virtio_device *vdev, 741 struct virtio_fs *fs) 742{ 743 struct virtqueue **vqs; 744 vq_callback_t **callbacks; 745 const char **names; 746 unsigned int i; 747 int ret = 0; 748 749 virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, 750 &fs->num_request_queues); 751 if (fs->num_request_queues == 0) 752 return -EINVAL; 753 754 fs->nvqs = VQ_REQUEST + fs->num_request_queues; 755 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 756 if (!fs->vqs) 757 return -ENOMEM; 758 759 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 760 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), 761 GFP_KERNEL); 762 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); 763 if (!vqs || !callbacks || !names) { 764 ret = -ENOMEM; 765 goto out; 766 } 767 768 /* Initialize the hiprio/forget request virtqueue */ 769 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 770 virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); 771 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 772 773 /* Initialize the requests virtqueues */ 774 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 775 char vq_name[VQ_NAME_LEN]; 776 777 snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); 778 virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); 779 callbacks[i] = virtio_fs_vq_done; 780 names[i] = fs->vqs[i].name; 781 } 782 783 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); 784 if (ret < 0) 785 goto out; 786 787 for (i = 0; i < fs->nvqs; i++) 788 fs->vqs[i].vq = vqs[i]; 789 790 virtio_fs_start_all_queues(fs); 791out: 792 kfree(names); 793 kfree(callbacks); 794 kfree(vqs); 795 if (ret) 796 kfree(fs->vqs); 797 return ret; 798} 799 800/* Free virtqueues (device must already be reset) */ 801static void virtio_fs_cleanup_vqs(struct virtio_device *vdev) 802{ 803 vdev->config->del_vqs(vdev); 804} 805 806/* Map a window offset to a page frame number. The window offset will have 807 * been produced by .iomap_begin(), which maps a file offset to a window 808 * offset. 809 */ 810static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 811 long nr_pages, enum dax_access_mode mode, 812 void **kaddr, pfn_t *pfn) 813{ 814 struct virtio_fs *fs = dax_get_private(dax_dev); 815 phys_addr_t offset = PFN_PHYS(pgoff); 816 size_t max_nr_pages = fs->window_len / PAGE_SIZE - pgoff; 817 818 if (kaddr) 819 *kaddr = fs->window_kaddr + offset; 820 if (pfn) 821 *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 822 PFN_DEV | PFN_MAP); 823 return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 824} 825 826static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 827 pgoff_t pgoff, size_t nr_pages) 828{ 829 long rc; 830 void *kaddr; 831 832 rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr, 833 NULL); 834 if (rc < 0) 835 return dax_mem2blk_err(rc); 836 837 memset(kaddr, 0, nr_pages << PAGE_SHIFT); 838 dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 839 return 0; 840} 841 842static const struct dax_operations virtio_fs_dax_ops = { 843 .direct_access = virtio_fs_direct_access, 844 .zero_page_range = virtio_fs_zero_page_range, 845}; 846 847static void virtio_fs_cleanup_dax(void *data) 848{ 849 struct dax_device *dax_dev = data; 850 851 kill_dax(dax_dev); 852 put_dax(dax_dev); 853} 854 855DEFINE_FREE(cleanup_dax, struct dax_dev *, if (!IS_ERR_OR_NULL(_T)) virtio_fs_cleanup_dax(_T)) 856 857static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 858{ 859 struct dax_device *dax_dev __free(cleanup_dax) = NULL; 860 struct virtio_shm_region cache_reg; 861 struct dev_pagemap *pgmap; 862 bool have_cache; 863 864 if (!IS_ENABLED(CONFIG_FUSE_DAX)) 865 return 0; 866 867 dax_dev = alloc_dax(fs, &virtio_fs_dax_ops); 868 if (IS_ERR(dax_dev)) { 869 int rc = PTR_ERR(dax_dev); 870 return rc == -EOPNOTSUPP ? 0 : rc; 871 } 872 873 /* Get cache region */ 874 have_cache = virtio_get_shm_region(vdev, &cache_reg, 875 (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 876 if (!have_cache) { 877 dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 878 return 0; 879 } 880 881 if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 882 dev_name(&vdev->dev))) { 883 dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 884 cache_reg.addr, cache_reg.len); 885 return -EBUSY; 886 } 887 888 dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 889 cache_reg.addr); 890 891 pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 892 if (!pgmap) 893 return -ENOMEM; 894 895 pgmap->type = MEMORY_DEVICE_FS_DAX; 896 897 /* Ideally we would directly use the PCI BAR resource but 898 * devm_memremap_pages() wants its own copy in pgmap. So 899 * initialize a struct resource from scratch (only the start 900 * and end fields will be used). 901 */ 902 pgmap->range = (struct range) { 903 .start = (phys_addr_t) cache_reg.addr, 904 .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 905 }; 906 pgmap->nr_range = 1; 907 908 fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 909 if (IS_ERR(fs->window_kaddr)) 910 return PTR_ERR(fs->window_kaddr); 911 912 fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 913 fs->window_len = (phys_addr_t) cache_reg.len; 914 915 dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 916 __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 917 918 fs->dax_dev = no_free_ptr(dax_dev); 919 return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 920 fs->dax_dev); 921} 922 923static int virtio_fs_probe(struct virtio_device *vdev) 924{ 925 struct virtio_fs *fs; 926 int ret; 927 928 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 929 if (!fs) 930 return -ENOMEM; 931 kobject_init(&fs->kobj, &virtio_fs_ktype); 932 vdev->priv = fs; 933 934 ret = virtio_fs_read_tag(vdev, fs); 935 if (ret < 0) 936 goto out; 937 938 ret = virtio_fs_setup_vqs(vdev, fs); 939 if (ret < 0) 940 goto out; 941 942 /* TODO vq affinity */ 943 944 ret = virtio_fs_setup_dax(vdev, fs); 945 if (ret < 0) 946 goto out_vqs; 947 948 /* Bring the device online in case the filesystem is mounted and 949 * requests need to be sent before we return. 950 */ 951 virtio_device_ready(vdev); 952 953 ret = virtio_fs_add_instance(vdev, fs); 954 if (ret < 0) 955 goto out_vqs; 956 957 return 0; 958 959out_vqs: 960 virtio_reset_device(vdev); 961 virtio_fs_cleanup_vqs(vdev); 962 963out: 964 vdev->priv = NULL; 965 kobject_put(&fs->kobj); 966 return ret; 967} 968 969static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 970{ 971 struct virtio_fs_vq *fsvq; 972 int i; 973 974 for (i = 0; i < fs->nvqs; i++) { 975 fsvq = &fs->vqs[i]; 976 spin_lock(&fsvq->lock); 977 fsvq->connected = false; 978 spin_unlock(&fsvq->lock); 979 } 980} 981 982static void virtio_fs_remove(struct virtio_device *vdev) 983{ 984 struct virtio_fs *fs = vdev->priv; 985 986 mutex_lock(&virtio_fs_mutex); 987 /* This device is going away. No one should get new reference */ 988 list_del_init(&fs->list); 989 sysfs_remove_link(&fs->kobj, "device"); 990 kobject_del(&fs->kobj); 991 virtio_fs_stop_all_queues(fs); 992 virtio_fs_drain_all_queues_locked(fs); 993 virtio_reset_device(vdev); 994 virtio_fs_cleanup_vqs(vdev); 995 996 vdev->priv = NULL; 997 /* Put device reference on virtio_fs object */ 998 virtio_fs_put(fs); 999 mutex_unlock(&virtio_fs_mutex); 1000} 1001 1002#ifdef CONFIG_PM_SLEEP 1003static int virtio_fs_freeze(struct virtio_device *vdev) 1004{ 1005 /* TODO need to save state here */ 1006 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 1007 return -EOPNOTSUPP; 1008} 1009 1010static int virtio_fs_restore(struct virtio_device *vdev) 1011{ 1012 /* TODO need to restore state here */ 1013 return 0; 1014} 1015#endif /* CONFIG_PM_SLEEP */ 1016 1017static const struct virtio_device_id id_table[] = { 1018 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 1019 {}, 1020}; 1021 1022static const unsigned int feature_table[] = {}; 1023 1024static struct virtio_driver virtio_fs_driver = { 1025 .driver.name = KBUILD_MODNAME, 1026 .driver.owner = THIS_MODULE, 1027 .id_table = id_table, 1028 .feature_table = feature_table, 1029 .feature_table_size = ARRAY_SIZE(feature_table), 1030 .probe = virtio_fs_probe, 1031 .remove = virtio_fs_remove, 1032#ifdef CONFIG_PM_SLEEP 1033 .freeze = virtio_fs_freeze, 1034 .restore = virtio_fs_restore, 1035#endif 1036}; 1037 1038static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 1039__releases(fiq->lock) 1040{ 1041 struct fuse_forget_link *link; 1042 struct virtio_fs_forget *forget; 1043 struct virtio_fs_forget_req *req; 1044 struct virtio_fs *fs; 1045 struct virtio_fs_vq *fsvq; 1046 u64 unique; 1047 1048 link = fuse_dequeue_forget(fiq, 1, NULL); 1049 unique = fuse_get_unique(fiq); 1050 1051 fs = fiq->priv; 1052 fsvq = &fs->vqs[VQ_HIPRIO]; 1053 spin_unlock(&fiq->lock); 1054 1055 /* Allocate a buffer for the request */ 1056 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 1057 req = &forget->req; 1058 1059 req->ih = (struct fuse_in_header){ 1060 .opcode = FUSE_FORGET, 1061 .nodeid = link->forget_one.nodeid, 1062 .unique = unique, 1063 .len = sizeof(*req), 1064 }; 1065 req->arg = (struct fuse_forget_in){ 1066 .nlookup = link->forget_one.nlookup, 1067 }; 1068 1069 send_forget_request(fsvq, forget, false); 1070 kfree(link); 1071} 1072 1073static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 1074__releases(fiq->lock) 1075{ 1076 /* 1077 * TODO interrupts. 1078 * 1079 * Normal fs operations on a local filesystems aren't interruptible. 1080 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 1081 * with shared lock between host and guest. 1082 */ 1083 spin_unlock(&fiq->lock); 1084} 1085 1086/* Count number of scatter-gather elements required */ 1087static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs, 1088 unsigned int num_pages, 1089 unsigned int total_len) 1090{ 1091 unsigned int i; 1092 unsigned int this_len; 1093 1094 for (i = 0; i < num_pages && total_len; i++) { 1095 this_len = min(page_descs[i].length, total_len); 1096 total_len -= this_len; 1097 } 1098 1099 return i; 1100} 1101 1102/* Return the number of scatter-gather list elements required */ 1103static unsigned int sg_count_fuse_req(struct fuse_req *req) 1104{ 1105 struct fuse_args *args = req->args; 1106 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 1107 unsigned int size, total_sgs = 1 /* fuse_in_header */; 1108 1109 if (args->in_numargs - args->in_pages) 1110 total_sgs += 1; 1111 1112 if (args->in_pages) { 1113 size = args->in_args[args->in_numargs - 1].size; 1114 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1115 size); 1116 } 1117 1118 if (!test_bit(FR_ISREPLY, &req->flags)) 1119 return total_sgs; 1120 1121 total_sgs += 1 /* fuse_out_header */; 1122 1123 if (args->out_numargs - args->out_pages) 1124 total_sgs += 1; 1125 1126 if (args->out_pages) { 1127 size = args->out_args[args->out_numargs - 1].size; 1128 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1129 size); 1130 } 1131 1132 return total_sgs; 1133} 1134 1135/* Add pages to scatter-gather list and return number of elements used */ 1136static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 1137 struct page **pages, 1138 struct fuse_page_desc *page_descs, 1139 unsigned int num_pages, 1140 unsigned int total_len) 1141{ 1142 unsigned int i; 1143 unsigned int this_len; 1144 1145 for (i = 0; i < num_pages && total_len; i++) { 1146 sg_init_table(&sg[i], 1); 1147 this_len = min(page_descs[i].length, total_len); 1148 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 1149 total_len -= this_len; 1150 } 1151 1152 return i; 1153} 1154 1155/* Add args to scatter-gather list and return number of elements used */ 1156static unsigned int sg_init_fuse_args(struct scatterlist *sg, 1157 struct fuse_req *req, 1158 struct fuse_arg *args, 1159 unsigned int numargs, 1160 bool argpages, 1161 void *argbuf, 1162 unsigned int *len_used) 1163{ 1164 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 1165 unsigned int total_sgs = 0; 1166 unsigned int len; 1167 1168 len = fuse_len_args(numargs - argpages, args); 1169 if (len) 1170 sg_init_one(&sg[total_sgs++], argbuf, len); 1171 1172 if (argpages) 1173 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 1174 ap->pages, ap->descs, 1175 ap->num_pages, 1176 args[numargs - 1].size); 1177 1178 if (len_used) 1179 *len_used = len; 1180 1181 return total_sgs; 1182} 1183 1184/* Add a request to a virtqueue and kick the device */ 1185static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 1186 struct fuse_req *req, bool in_flight) 1187{ 1188 /* requests need at least 4 elements */ 1189 struct scatterlist *stack_sgs[6]; 1190 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 1191 struct scatterlist **sgs = stack_sgs; 1192 struct scatterlist *sg = stack_sg; 1193 struct virtqueue *vq; 1194 struct fuse_args *args = req->args; 1195 unsigned int argbuf_used = 0; 1196 unsigned int out_sgs = 0; 1197 unsigned int in_sgs = 0; 1198 unsigned int total_sgs; 1199 unsigned int i; 1200 int ret; 1201 bool notify; 1202 struct fuse_pqueue *fpq; 1203 1204 /* Does the sglist fit on the stack? */ 1205 total_sgs = sg_count_fuse_req(req); 1206 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 1207 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 1208 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 1209 if (!sgs || !sg) { 1210 ret = -ENOMEM; 1211 goto out; 1212 } 1213 } 1214 1215 /* Use a bounce buffer since stack args cannot be mapped */ 1216 ret = copy_args_to_argbuf(req); 1217 if (ret < 0) 1218 goto out; 1219 1220 /* Request elements */ 1221 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 1222 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 1223 (struct fuse_arg *)args->in_args, 1224 args->in_numargs, args->in_pages, 1225 req->argbuf, &argbuf_used); 1226 1227 /* Reply elements */ 1228 if (test_bit(FR_ISREPLY, &req->flags)) { 1229 sg_init_one(&sg[out_sgs + in_sgs++], 1230 &req->out.h, sizeof(req->out.h)); 1231 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 1232 args->out_args, args->out_numargs, 1233 args->out_pages, 1234 req->argbuf + argbuf_used, NULL); 1235 } 1236 1237 WARN_ON(out_sgs + in_sgs != total_sgs); 1238 1239 for (i = 0; i < total_sgs; i++) 1240 sgs[i] = &sg[i]; 1241 1242 spin_lock(&fsvq->lock); 1243 1244 if (!fsvq->connected) { 1245 spin_unlock(&fsvq->lock); 1246 ret = -ENOTCONN; 1247 goto out; 1248 } 1249 1250 vq = fsvq->vq; 1251 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 1252 if (ret < 0) { 1253 spin_unlock(&fsvq->lock); 1254 goto out; 1255 } 1256 1257 /* Request successfully sent. */ 1258 fpq = &fsvq->fud->pq; 1259 spin_lock(&fpq->lock); 1260 list_add_tail(&req->list, fpq->processing); 1261 spin_unlock(&fpq->lock); 1262 set_bit(FR_SENT, &req->flags); 1263 /* matches barrier in request_wait_answer() */ 1264 smp_mb__after_atomic(); 1265 1266 if (!in_flight) 1267 inc_in_flight_req(fsvq); 1268 notify = virtqueue_kick_prepare(vq); 1269 1270 spin_unlock(&fsvq->lock); 1271 1272 if (notify) 1273 virtqueue_notify(vq); 1274 1275out: 1276 if (ret < 0 && req->argbuf) { 1277 kfree(req->argbuf); 1278 req->argbuf = NULL; 1279 } 1280 if (sgs != stack_sgs) { 1281 kfree(sgs); 1282 kfree(sg); 1283 } 1284 1285 return ret; 1286} 1287 1288static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 1289__releases(fiq->lock) 1290{ 1291 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ 1292 struct virtio_fs *fs; 1293 struct fuse_req *req; 1294 struct virtio_fs_vq *fsvq; 1295 int ret; 1296 1297 WARN_ON(list_empty(&fiq->pending)); 1298 req = list_last_entry(&fiq->pending, struct fuse_req, list); 1299 clear_bit(FR_PENDING, &req->flags); 1300 list_del_init(&req->list); 1301 WARN_ON(!list_empty(&fiq->pending)); 1302 spin_unlock(&fiq->lock); 1303 1304 fs = fiq->priv; 1305 1306 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", 1307 __func__, req->in.h.opcode, req->in.h.unique, 1308 req->in.h.nodeid, req->in.h.len, 1309 fuse_len_args(req->args->out_numargs, req->args->out_args)); 1310 1311 fsvq = &fs->vqs[queue_id]; 1312 ret = virtio_fs_enqueue_req(fsvq, req, false); 1313 if (ret < 0) { 1314 if (ret == -ENOMEM || ret == -ENOSPC) { 1315 /* 1316 * Virtqueue full. Retry submission from worker 1317 * context as we might be holding fc->bg_lock. 1318 */ 1319 spin_lock(&fsvq->lock); 1320 list_add_tail(&req->list, &fsvq->queued_reqs); 1321 inc_in_flight_req(fsvq); 1322 schedule_delayed_work(&fsvq->dispatch_work, 1323 msecs_to_jiffies(1)); 1324 spin_unlock(&fsvq->lock); 1325 return; 1326 } 1327 req->out.h.error = ret; 1328 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1329 1330 /* Can't end request in submission context. Use a worker */ 1331 spin_lock(&fsvq->lock); 1332 list_add_tail(&req->list, &fsvq->end_reqs); 1333 schedule_delayed_work(&fsvq->dispatch_work, 0); 1334 spin_unlock(&fsvq->lock); 1335 return; 1336 } 1337} 1338 1339static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1340 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 1341 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 1342 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 1343 .release = virtio_fs_fiq_release, 1344}; 1345 1346static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) 1347{ 1348 ctx->rootmode = S_IFDIR; 1349 ctx->default_permissions = 1; 1350 ctx->allow_other = 1; 1351 ctx->max_read = UINT_MAX; 1352 ctx->blksize = 512; 1353 ctx->destroy = true; 1354 ctx->no_control = true; 1355 ctx->no_force_umount = true; 1356} 1357 1358static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) 1359{ 1360 struct fuse_mount *fm = get_fuse_mount_super(sb); 1361 struct fuse_conn *fc = fm->fc; 1362 struct virtio_fs *fs = fc->iq.priv; 1363 struct fuse_fs_context *ctx = fsc->fs_private; 1364 unsigned int i; 1365 int err; 1366 1367 virtio_fs_ctx_set_defaults(ctx); 1368 mutex_lock(&virtio_fs_mutex); 1369 1370 /* After holding mutex, make sure virtiofs device is still there. 1371 * Though we are holding a reference to it, drive ->remove might 1372 * still have cleaned up virtual queues. In that case bail out. 1373 */ 1374 err = -EINVAL; 1375 if (list_empty(&fs->list)) { 1376 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1377 goto err; 1378 } 1379 1380 err = -ENOMEM; 1381 /* Allocate fuse_dev for hiprio and notification queues */ 1382 for (i = 0; i < fs->nvqs; i++) { 1383 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1384 1385 fsvq->fud = fuse_dev_alloc(); 1386 if (!fsvq->fud) 1387 goto err_free_fuse_devs; 1388 } 1389 1390 /* virtiofs allocates and installs its own fuse devices */ 1391 ctx->fudptr = NULL; 1392 if (ctx->dax_mode != FUSE_DAX_NEVER) { 1393 if (ctx->dax_mode == FUSE_DAX_ALWAYS && !fs->dax_dev) { 1394 err = -EINVAL; 1395 pr_err("virtio-fs: dax can't be enabled as filesystem" 1396 " device does not support it.\n"); 1397 goto err_free_fuse_devs; 1398 } 1399 ctx->dax_dev = fs->dax_dev; 1400 } 1401 err = fuse_fill_super_common(sb, ctx); 1402 if (err < 0) 1403 goto err_free_fuse_devs; 1404 1405 for (i = 0; i < fs->nvqs; i++) { 1406 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1407 1408 fuse_dev_install(fsvq->fud, fc); 1409 } 1410 1411 /* Previous unmount will stop all queues. Start these again */ 1412 virtio_fs_start_all_queues(fs); 1413 fuse_send_init(fm); 1414 mutex_unlock(&virtio_fs_mutex); 1415 return 0; 1416 1417err_free_fuse_devs: 1418 virtio_fs_free_devs(fs); 1419err: 1420 mutex_unlock(&virtio_fs_mutex); 1421 return err; 1422} 1423 1424static void virtio_fs_conn_destroy(struct fuse_mount *fm) 1425{ 1426 struct fuse_conn *fc = fm->fc; 1427 struct virtio_fs *vfs = fc->iq.priv; 1428 struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; 1429 1430 /* Stop dax worker. Soon evict_inodes() will be called which 1431 * will free all memory ranges belonging to all inodes. 1432 */ 1433 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1434 fuse_dax_cancel_work(fc); 1435 1436 /* Stop forget queue. Soon destroy will be sent */ 1437 spin_lock(&fsvq->lock); 1438 fsvq->connected = false; 1439 spin_unlock(&fsvq->lock); 1440 virtio_fs_drain_all_queues(vfs); 1441 1442 fuse_conn_destroy(fm); 1443 1444 /* fuse_conn_destroy() must have sent destroy. Stop all queues 1445 * and drain one more time and free fuse devices. Freeing fuse 1446 * devices will drop their reference on fuse_conn and that in 1447 * turn will drop its reference on virtio_fs object. 1448 */ 1449 virtio_fs_stop_all_queues(vfs); 1450 virtio_fs_drain_all_queues(vfs); 1451 virtio_fs_free_devs(vfs); 1452} 1453 1454static void virtio_kill_sb(struct super_block *sb) 1455{ 1456 struct fuse_mount *fm = get_fuse_mount_super(sb); 1457 bool last; 1458 1459 /* If mount failed, we can still be called without any fc */ 1460 if (sb->s_root) { 1461 last = fuse_mount_remove(fm); 1462 if (last) 1463 virtio_fs_conn_destroy(fm); 1464 } 1465 kill_anon_super(sb); 1466 fuse_mount_destroy(fm); 1467} 1468 1469static int virtio_fs_test_super(struct super_block *sb, 1470 struct fs_context *fsc) 1471{ 1472 struct fuse_mount *fsc_fm = fsc->s_fs_info; 1473 struct fuse_mount *sb_fm = get_fuse_mount_super(sb); 1474 1475 return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; 1476} 1477 1478static int virtio_fs_get_tree(struct fs_context *fsc) 1479{ 1480 struct virtio_fs *fs; 1481 struct super_block *sb; 1482 struct fuse_conn *fc = NULL; 1483 struct fuse_mount *fm; 1484 unsigned int virtqueue_size; 1485 int err = -EIO; 1486 1487 /* This gets a reference on virtio_fs object. This ptr gets installed 1488 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1489 * to drop the reference to this object. 1490 */ 1491 fs = virtio_fs_find_instance(fsc->source); 1492 if (!fs) { 1493 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1494 return -EINVAL; 1495 } 1496 1497 virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); 1498 if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) 1499 goto out_err; 1500 1501 err = -ENOMEM; 1502 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1503 if (!fc) 1504 goto out_err; 1505 1506 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1507 if (!fm) 1508 goto out_err; 1509 1510 fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); 1511 fc->release = fuse_free_conn; 1512 fc->delete_stale = true; 1513 fc->auto_submounts = true; 1514 fc->sync_fs = true; 1515 1516 /* Tell FUSE to split requests that exceed the virtqueue's size */ 1517 fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, 1518 virtqueue_size - FUSE_HEADER_OVERHEAD); 1519 1520 fsc->s_fs_info = fm; 1521 sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc); 1522 if (fsc->s_fs_info) 1523 fuse_mount_destroy(fm); 1524 if (IS_ERR(sb)) 1525 return PTR_ERR(sb); 1526 1527 if (!sb->s_root) { 1528 err = virtio_fs_fill_super(sb, fsc); 1529 if (err) { 1530 deactivate_locked_super(sb); 1531 return err; 1532 } 1533 1534 sb->s_flags |= SB_ACTIVE; 1535 } 1536 1537 WARN_ON(fsc->root); 1538 fsc->root = dget(sb->s_root); 1539 return 0; 1540 1541out_err: 1542 kfree(fc); 1543 mutex_lock(&virtio_fs_mutex); 1544 virtio_fs_put(fs); 1545 mutex_unlock(&virtio_fs_mutex); 1546 return err; 1547} 1548 1549static const struct fs_context_operations virtio_fs_context_ops = { 1550 .free = virtio_fs_free_fsc, 1551 .parse_param = virtio_fs_parse_param, 1552 .get_tree = virtio_fs_get_tree, 1553}; 1554 1555static int virtio_fs_init_fs_context(struct fs_context *fsc) 1556{ 1557 struct fuse_fs_context *ctx; 1558 1559 if (fsc->purpose == FS_CONTEXT_FOR_SUBMOUNT) 1560 return fuse_init_fs_context_submount(fsc); 1561 1562 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1563 if (!ctx) 1564 return -ENOMEM; 1565 fsc->fs_private = ctx; 1566 fsc->ops = &virtio_fs_context_ops; 1567 return 0; 1568} 1569 1570static struct file_system_type virtio_fs_type = { 1571 .owner = THIS_MODULE, 1572 .name = "virtiofs", 1573 .init_fs_context = virtio_fs_init_fs_context, 1574 .kill_sb = virtio_kill_sb, 1575}; 1576 1577static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) 1578{ 1579 const struct virtio_fs *fs = container_of(kobj, struct virtio_fs, kobj); 1580 1581 add_uevent_var(env, "TAG=%s", fs->tag); 1582 return 0; 1583} 1584 1585static const struct kset_uevent_ops virtio_fs_uevent_ops = { 1586 .uevent = virtio_fs_uevent, 1587}; 1588 1589static int __init virtio_fs_sysfs_init(void) 1590{ 1591 virtio_fs_kset = kset_create_and_add("virtiofs", &virtio_fs_uevent_ops, 1592 fs_kobj); 1593 if (!virtio_fs_kset) 1594 return -ENOMEM; 1595 return 0; 1596} 1597 1598static void virtio_fs_sysfs_exit(void) 1599{ 1600 kset_unregister(virtio_fs_kset); 1601 virtio_fs_kset = NULL; 1602} 1603 1604static int __init virtio_fs_init(void) 1605{ 1606 int ret; 1607 1608 ret = virtio_fs_sysfs_init(); 1609 if (ret < 0) 1610 return ret; 1611 1612 ret = register_virtio_driver(&virtio_fs_driver); 1613 if (ret < 0) 1614 goto sysfs_exit; 1615 1616 ret = register_filesystem(&virtio_fs_type); 1617 if (ret < 0) 1618 goto unregister_virtio_driver; 1619 1620 return 0; 1621 1622unregister_virtio_driver: 1623 unregister_virtio_driver(&virtio_fs_driver); 1624sysfs_exit: 1625 virtio_fs_sysfs_exit(); 1626 return ret; 1627} 1628module_init(virtio_fs_init); 1629 1630static void __exit virtio_fs_exit(void) 1631{ 1632 unregister_filesystem(&virtio_fs_type); 1633 unregister_virtio_driver(&virtio_fs_driver); 1634 virtio_fs_sysfs_exit(); 1635} 1636module_exit(virtio_fs_exit); 1637 1638MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1639MODULE_DESCRIPTION("Virtio Filesystem"); 1640MODULE_LICENSE("GPL"); 1641MODULE_ALIAS_FS(KBUILD_MODNAME); 1642MODULE_DEVICE_TABLE(virtio, id_table); 1643