// SPDX-License-Identifier: GPL-2.0-only /* * VDPA simulator for block device. * * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. * Copyright (c) 2021, Red Hat Inc. All rights reserved. * */ #include #include #include #include #include #include #include #include #include "vdpa_sim.h" #define DRV_VERSION "0.1" #define DRV_AUTHOR "Max Gurtovoy " #define DRV_DESC "vDPA Device Simulator for block device" #define DRV_LICENSE "GPL v2" #define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \ (1ULL << VIRTIO_BLK_F_FLUSH) | \ (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \ (1ULL << VIRTIO_BLK_F_SEG_MAX) | \ (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \ (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \ (1ULL << VIRTIO_BLK_F_MQ) | \ (1ULL << VIRTIO_BLK_F_DISCARD) | \ (1ULL << VIRTIO_BLK_F_WRITE_ZEROES)) #define VDPASIM_BLK_CAPACITY 0x40000 #define VDPASIM_BLK_SIZE_MAX 0x1000 #define VDPASIM_BLK_SEG_MAX 32 #define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX /* 1 virtqueue, 1 address space, 1 virtqueue group */ #define VDPASIM_BLK_VQ_NUM 1 #define VDPASIM_BLK_AS_NUM 1 #define VDPASIM_BLK_GROUP_NUM 1 struct vdpasim_blk { struct vdpasim vdpasim; void *buffer; bool shared_backend; }; static struct vdpasim_blk *sim_to_blk(struct vdpasim *vdpasim) { return container_of(vdpasim, struct vdpasim_blk, vdpasim); } static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim"; static bool shared_backend; module_param(shared_backend, bool, 0444); MODULE_PARM_DESC(shared_backend, "Enable the shared backend between virtio-blk devices"); static void *shared_buffer; /* mutex to synchronize shared_buffer access */ static DEFINE_MUTEX(shared_buffer_mutex); static void vdpasim_blk_buffer_lock(struct vdpasim_blk *blk) { if (blk->shared_backend) mutex_lock(&shared_buffer_mutex); } static void vdpasim_blk_buffer_unlock(struct vdpasim_blk *blk) { if (blk->shared_backend) mutex_unlock(&shared_buffer_mutex); } static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector, u64 num_sectors, u64 max_sectors) { if (start_sector > VDPASIM_BLK_CAPACITY) { dev_dbg(&vdpasim->vdpa.dev, "starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n", start_sector, VDPASIM_BLK_CAPACITY); } if (num_sectors > max_sectors) { dev_dbg(&vdpasim->vdpa.dev, "number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n", num_sectors, max_sectors); return false; } if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) { dev_dbg(&vdpasim->vdpa.dev, "request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n", start_sector, num_sectors, VDPASIM_BLK_CAPACITY); return false; } return true; } /* Returns 'true' if the request is handled (with or without an I/O error) * and the status is correctly written in the last byte of the 'in iov', * 'false' otherwise. */ static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim, struct vdpasim_virtqueue *vq) { struct vdpasim_blk *blk = sim_to_blk(vdpasim); size_t pushed = 0, to_pull, to_push; struct virtio_blk_outhdr hdr; bool handled = false; ssize_t bytes; loff_t offset; u64 sector; u8 status; u32 type; int ret; ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov, &vq->head, GFP_ATOMIC); if (ret != 1) return false; if (vq->out_iov.used < 1 || vq->in_iov.used < 1) { dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n", vq->out_iov.used, vq->in_iov.used); goto err; } if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) { dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n"); goto err; } /* The last byte is the status and we checked if the last iov has * enough room for it. */ to_push = vringh_kiov_length(&vq->in_iov) - 1; to_pull = vringh_kiov_length(&vq->out_iov); bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr, sizeof(hdr)); if (bytes != sizeof(hdr)) { dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n"); goto err; } to_pull -= bytes; type = vdpasim32_to_cpu(vdpasim, hdr.type); sector = vdpasim64_to_cpu(vdpasim, hdr.sector); offset = sector << SECTOR_SHIFT; status = VIRTIO_BLK_S_OK; if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT && sector != 0) { dev_dbg(&vdpasim->vdpa.dev, "sector must be 0 for %u request - sector: 0x%llx\n", type, sector); status = VIRTIO_BLK_S_IOERR; goto err_status; } switch (type) { case VIRTIO_BLK_T_IN: if (!vdpasim_blk_check_range(vdpasim, sector, to_push >> SECTOR_SHIFT, VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) { status = VIRTIO_BLK_S_IOERR; break; } vdpasim_blk_buffer_lock(blk); bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, blk->buffer + offset, to_push); vdpasim_blk_buffer_unlock(blk); if (bytes < 0) { dev_dbg(&vdpasim->vdpa.dev, "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", bytes, offset, to_push); status = VIRTIO_BLK_S_IOERR; break; } pushed += bytes; break; case VIRTIO_BLK_T_OUT: if (!vdpasim_blk_check_range(vdpasim, sector, to_pull >> SECTOR_SHIFT, VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) { status = VIRTIO_BLK_S_IOERR; break; } vdpasim_blk_buffer_lock(blk); bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, blk->buffer + offset, to_pull); vdpasim_blk_buffer_unlock(blk); if (bytes < 0) { dev_dbg(&vdpasim->vdpa.dev, "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", bytes, offset, to_pull); status = VIRTIO_BLK_S_IOERR; break; } break; case VIRTIO_BLK_T_GET_ID: bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, vdpasim_blk_id, VIRTIO_BLK_ID_BYTES); if (bytes < 0) { dev_dbg(&vdpasim->vdpa.dev, "vringh_iov_push_iotlb() error: %zd\n", bytes); status = VIRTIO_BLK_S_IOERR; break; } pushed += bytes; break; case VIRTIO_BLK_T_FLUSH: /* nothing to do */ break; case VIRTIO_BLK_T_DISCARD: case VIRTIO_BLK_T_WRITE_ZEROES: { struct virtio_blk_discard_write_zeroes range; u32 num_sectors, flags; if (to_pull != sizeof(range)) { dev_dbg(&vdpasim->vdpa.dev, "discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n", to_pull, sizeof(range)); status = VIRTIO_BLK_S_IOERR; break; } bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range, to_pull); if (bytes < 0) { dev_dbg(&vdpasim->vdpa.dev, "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n", bytes, offset, to_pull); status = VIRTIO_BLK_S_IOERR; break; } sector = le64_to_cpu(range.sector); offset = sector << SECTOR_SHIFT; num_sectors = le32_to_cpu(range.num_sectors); flags = le32_to_cpu(range.flags); if (type == VIRTIO_BLK_T_DISCARD && flags != 0) { dev_dbg(&vdpasim->vdpa.dev, "discard unexpected flags set - flags: 0x%x\n", flags); status = VIRTIO_BLK_S_UNSUPP; break; } if (type == VIRTIO_BLK_T_WRITE_ZEROES && flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) { dev_dbg(&vdpasim->vdpa.dev, "write_zeroes unexpected flags set - flags: 0x%x\n", flags); status = VIRTIO_BLK_S_UNSUPP; break; } if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors, VDPASIM_BLK_DWZ_MAX_SECTORS)) { status = VIRTIO_BLK_S_IOERR; break; } if (type == VIRTIO_BLK_T_WRITE_ZEROES) { vdpasim_blk_buffer_lock(blk); memset(blk->buffer + offset, 0, num_sectors << SECTOR_SHIFT); vdpasim_blk_buffer_unlock(blk); } break; } default: dev_dbg(&vdpasim->vdpa.dev, "Unsupported request type %d\n", type); status = VIRTIO_BLK_S_IOERR; break; } err_status: /* If some operations fail, we need to skip the remaining bytes * to put the status in the last byte */ if (to_push - pushed > 0) vringh_kiov_advance(&vq->in_iov, to_push - pushed); /* Last byte is the status */ bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1); if (bytes != 1) goto err; pushed += bytes; /* Make sure data is wrote before advancing index */ smp_wmb(); handled = true; err: vringh_complete_iotlb(&vq->vring, vq->head, pushed); return handled; } static void vdpasim_blk_work(struct vdpasim *vdpasim) { bool reschedule = false; int i; mutex_lock(&vdpasim->mutex); if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) goto out; if (!vdpasim->running) goto out; for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) { struct vdpasim_virtqueue *vq = &vdpasim->vqs[i]; int reqs = 0; if (!vq->ready) continue; while (vdpasim_blk_handle_req(vdpasim, vq)) { /* Make sure used is visible before rasing the interrupt. */ smp_wmb(); local_bh_disable(); if (vringh_need_notify_iotlb(&vq->vring) > 0) vringh_notify(&vq->vring); local_bh_enable(); if (++reqs > 4) { reschedule = true; break; } } } out: mutex_unlock(&vdpasim->mutex); if (reschedule) vdpasim_schedule_work(vdpasim); } static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config) { struct virtio_blk_config *blk_config = config; memset(config, 0, sizeof(struct virtio_blk_config)); blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY); blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX); blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX); blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM); blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1); blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1); blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); /* VIRTIO_BLK_F_DISCARD */ blk_config->discard_sector_alignment = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE); blk_config->max_discard_sectors = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS); blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1); /* VIRTIO_BLK_F_WRITE_ZEROES */ blk_config->max_write_zeroes_sectors = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS); blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1); } static void vdpasim_blk_free(struct vdpasim *vdpasim) { struct vdpasim_blk *blk = sim_to_blk(vdpasim); if (!blk->shared_backend) kvfree(blk->buffer); } static void vdpasim_blk_mgmtdev_release(struct device *dev) { } static struct device vdpasim_blk_mgmtdev = { .init_name = "vdpasim_blk", .release = vdpasim_blk_mgmtdev_release, }; static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, const struct vdpa_dev_set_config *config) { struct vdpasim_dev_attr dev_attr = {}; struct vdpasim_blk *blk; struct vdpasim *simdev; int ret; dev_attr.mgmt_dev = mdev; dev_attr.name = name; dev_attr.id = VIRTIO_ID_BLOCK; dev_attr.supported_features = VDPASIM_BLK_FEATURES; dev_attr.nvqs = VDPASIM_BLK_VQ_NUM; dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM; dev_attr.nas = VDPASIM_BLK_AS_NUM; dev_attr.alloc_size = sizeof(struct vdpasim_blk); dev_attr.config_size = sizeof(struct virtio_blk_config); dev_attr.get_config = vdpasim_blk_get_config; dev_attr.work_fn = vdpasim_blk_work; dev_attr.free = vdpasim_blk_free; simdev = vdpasim_create(&dev_attr, config); if (IS_ERR(simdev)) return PTR_ERR(simdev); blk = sim_to_blk(simdev); blk->shared_backend = shared_backend; if (blk->shared_backend) { blk->buffer = shared_buffer; } else { blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, GFP_KERNEL); if (!blk->buffer) { ret = -ENOMEM; goto put_dev; } } ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM); if (ret) goto put_dev; return 0; put_dev: put_device(&simdev->vdpa.dev); return ret; } static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev) { struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa); _vdpa_unregister_device(&simdev->vdpa); } static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = { .dev_add = vdpasim_blk_dev_add, .dev_del = vdpasim_blk_dev_del }; static struct virtio_device_id id_table[] = { { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, { 0 }, }; static struct vdpa_mgmt_dev mgmt_dev = { .device = &vdpasim_blk_mgmtdev, .id_table = id_table, .ops = &vdpasim_blk_mgmtdev_ops, }; static int __init vdpasim_blk_init(void) { int ret; ret = device_register(&vdpasim_blk_mgmtdev); if (ret) { put_device(&vdpasim_blk_mgmtdev); return ret; } ret = vdpa_mgmtdev_register(&mgmt_dev); if (ret) goto parent_err; if (shared_backend) { shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT, GFP_KERNEL); if (!shared_buffer) { ret = -ENOMEM; goto mgmt_dev_err; } } return 0; mgmt_dev_err: vdpa_mgmtdev_unregister(&mgmt_dev); parent_err: device_unregister(&vdpasim_blk_mgmtdev); return ret; } static void __exit vdpasim_blk_exit(void) { kvfree(shared_buffer); vdpa_mgmtdev_unregister(&mgmt_dev); device_unregister(&vdpasim_blk_mgmtdev); } module_init(vdpasim_blk_init) module_exit(vdpasim_blk_exit) MODULE_VERSION(DRV_VERSION); MODULE_LICENSE(DRV_LICENSE); MODULE_AUTHOR(DRV_AUTHOR); MODULE_DESCRIPTION(DRV_DESC);