1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * VDPA simulator for block device.
4 *
5 * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
6 * Copyright (c) 2021, Red Hat Inc. All rights reserved.
7 *
8 */
9
10#include <linux/init.h>
11#include <linux/module.h>
12#include <linux/device.h>
13#include <linux/kernel.h>
14#include <linux/blkdev.h>
15#include <linux/vringh.h>
16#include <linux/vdpa.h>
17#include <uapi/linux/virtio_blk.h>
18
19#include "vdpa_sim.h"
20
21#define DRV_VERSION  "0.1"
22#define DRV_AUTHOR   "Max Gurtovoy <mgurtovoy@nvidia.com>"
23#define DRV_DESC     "vDPA Device Simulator for block device"
24#define DRV_LICENSE  "GPL v2"
25
26#define VDPASIM_BLK_FEATURES	(VDPASIM_FEATURES | \
27				 (1ULL << VIRTIO_BLK_F_FLUSH)    | \
28				 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
29				 (1ULL << VIRTIO_BLK_F_SEG_MAX)  | \
30				 (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
31				 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
32				 (1ULL << VIRTIO_BLK_F_MQ)       | \
33				 (1ULL << VIRTIO_BLK_F_DISCARD)  | \
34				 (1ULL << VIRTIO_BLK_F_WRITE_ZEROES))
35
36#define VDPASIM_BLK_CAPACITY	0x40000
37#define VDPASIM_BLK_SIZE_MAX	0x1000
38#define VDPASIM_BLK_SEG_MAX	32
39#define VDPASIM_BLK_DWZ_MAX_SECTORS UINT_MAX
40
41/* 1 virtqueue, 1 address space, 1 virtqueue group */
42#define VDPASIM_BLK_VQ_NUM	1
43#define VDPASIM_BLK_AS_NUM	1
44#define VDPASIM_BLK_GROUP_NUM	1
45
46struct vdpasim_blk {
47	struct vdpasim vdpasim;
48	void *buffer;
49	bool shared_backend;
50};
51
52static struct vdpasim_blk *sim_to_blk(struct vdpasim *vdpasim)
53{
54	return container_of(vdpasim, struct vdpasim_blk, vdpasim);
55}
56
57static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
58
59static bool shared_backend;
60module_param(shared_backend, bool, 0444);
61MODULE_PARM_DESC(shared_backend, "Enable the shared backend between virtio-blk devices");
62
63static void *shared_buffer;
64/* mutex to synchronize shared_buffer access */
65static DEFINE_MUTEX(shared_buffer_mutex);
66
67static void vdpasim_blk_buffer_lock(struct vdpasim_blk *blk)
68{
69	if (blk->shared_backend)
70		mutex_lock(&shared_buffer_mutex);
71}
72
73static void vdpasim_blk_buffer_unlock(struct vdpasim_blk *blk)
74{
75	if (blk->shared_backend)
76		mutex_unlock(&shared_buffer_mutex);
77}
78
79static bool vdpasim_blk_check_range(struct vdpasim *vdpasim, u64 start_sector,
80				    u64 num_sectors, u64 max_sectors)
81{
82	if (start_sector > VDPASIM_BLK_CAPACITY) {
83		dev_dbg(&vdpasim->vdpa.dev,
84			"starting sector exceeds the capacity - start: 0x%llx capacity: 0x%x\n",
85			start_sector, VDPASIM_BLK_CAPACITY);
86	}
87
88	if (num_sectors > max_sectors) {
89		dev_dbg(&vdpasim->vdpa.dev,
90			"number of sectors exceeds the max allowed in a request - num: 0x%llx max: 0x%llx\n",
91			num_sectors, max_sectors);
92		return false;
93	}
94
95	if (num_sectors > VDPASIM_BLK_CAPACITY - start_sector) {
96		dev_dbg(&vdpasim->vdpa.dev,
97			"request exceeds the capacity - start: 0x%llx num: 0x%llx capacity: 0x%x\n",
98			start_sector, num_sectors, VDPASIM_BLK_CAPACITY);
99		return false;
100	}
101
102	return true;
103}
104
105/* Returns 'true' if the request is handled (with or without an I/O error)
106 * and the status is correctly written in the last byte of the 'in iov',
107 * 'false' otherwise.
108 */
109static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
110				   struct vdpasim_virtqueue *vq)
111{
112	struct vdpasim_blk *blk = sim_to_blk(vdpasim);
113	size_t pushed = 0, to_pull, to_push;
114	struct virtio_blk_outhdr hdr;
115	bool handled = false;
116	ssize_t bytes;
117	loff_t offset;
118	u64 sector;
119	u8 status;
120	u32 type;
121	int ret;
122
123	ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov,
124				   &vq->head, GFP_ATOMIC);
125	if (ret != 1)
126		return false;
127
128	if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
129		dev_dbg(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
130			vq->out_iov.used, vq->in_iov.used);
131		goto err;
132	}
133
134	if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
135		dev_dbg(&vdpasim->vdpa.dev, "request in header too short\n");
136		goto err;
137	}
138
139	/* The last byte is the status and we checked if the last iov has
140	 * enough room for it.
141	 */
142	to_push = vringh_kiov_length(&vq->in_iov) - 1;
143
144	to_pull = vringh_kiov_length(&vq->out_iov);
145
146	bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
147				      sizeof(hdr));
148	if (bytes != sizeof(hdr)) {
149		dev_dbg(&vdpasim->vdpa.dev, "request out header too short\n");
150		goto err;
151	}
152
153	to_pull -= bytes;
154
155	type = vdpasim32_to_cpu(vdpasim, hdr.type);
156	sector = vdpasim64_to_cpu(vdpasim, hdr.sector);
157	offset = sector << SECTOR_SHIFT;
158	status = VIRTIO_BLK_S_OK;
159
160	if (type != VIRTIO_BLK_T_IN && type != VIRTIO_BLK_T_OUT &&
161	    sector != 0) {
162		dev_dbg(&vdpasim->vdpa.dev,
163			"sector must be 0 for %u request - sector: 0x%llx\n",
164			type, sector);
165		status = VIRTIO_BLK_S_IOERR;
166		goto err_status;
167	}
168
169	switch (type) {
170	case VIRTIO_BLK_T_IN:
171		if (!vdpasim_blk_check_range(vdpasim, sector,
172					     to_push >> SECTOR_SHIFT,
173					     VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
174			status = VIRTIO_BLK_S_IOERR;
175			break;
176		}
177
178		vdpasim_blk_buffer_lock(blk);
179		bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
180					      blk->buffer + offset, to_push);
181		vdpasim_blk_buffer_unlock(blk);
182		if (bytes < 0) {
183			dev_dbg(&vdpasim->vdpa.dev,
184				"vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
185				bytes, offset, to_push);
186			status = VIRTIO_BLK_S_IOERR;
187			break;
188		}
189
190		pushed += bytes;
191		break;
192
193	case VIRTIO_BLK_T_OUT:
194		if (!vdpasim_blk_check_range(vdpasim, sector,
195					     to_pull >> SECTOR_SHIFT,
196					     VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)) {
197			status = VIRTIO_BLK_S_IOERR;
198			break;
199		}
200
201		vdpasim_blk_buffer_lock(blk);
202		bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
203					      blk->buffer + offset, to_pull);
204		vdpasim_blk_buffer_unlock(blk);
205		if (bytes < 0) {
206			dev_dbg(&vdpasim->vdpa.dev,
207				"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
208				bytes, offset, to_pull);
209			status = VIRTIO_BLK_S_IOERR;
210			break;
211		}
212		break;
213
214	case VIRTIO_BLK_T_GET_ID:
215		bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
216					      vdpasim_blk_id,
217					      VIRTIO_BLK_ID_BYTES);
218		if (bytes < 0) {
219			dev_dbg(&vdpasim->vdpa.dev,
220				"vringh_iov_push_iotlb() error: %zd\n", bytes);
221			status = VIRTIO_BLK_S_IOERR;
222			break;
223		}
224
225		pushed += bytes;
226		break;
227
228	case VIRTIO_BLK_T_FLUSH:
229		/* nothing to do */
230		break;
231
232	case VIRTIO_BLK_T_DISCARD:
233	case VIRTIO_BLK_T_WRITE_ZEROES: {
234		struct virtio_blk_discard_write_zeroes range;
235		u32 num_sectors, flags;
236
237		if (to_pull != sizeof(range)) {
238			dev_dbg(&vdpasim->vdpa.dev,
239				"discard/write_zeroes header len: 0x%zx [expected: 0x%zx]\n",
240				to_pull, sizeof(range));
241			status = VIRTIO_BLK_S_IOERR;
242			break;
243		}
244
245		bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &range,
246					      to_pull);
247		if (bytes < 0) {
248			dev_dbg(&vdpasim->vdpa.dev,
249				"vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
250				bytes, offset, to_pull);
251			status = VIRTIO_BLK_S_IOERR;
252			break;
253		}
254
255		sector = le64_to_cpu(range.sector);
256		offset = sector << SECTOR_SHIFT;
257		num_sectors = le32_to_cpu(range.num_sectors);
258		flags = le32_to_cpu(range.flags);
259
260		if (type == VIRTIO_BLK_T_DISCARD && flags != 0) {
261			dev_dbg(&vdpasim->vdpa.dev,
262				"discard unexpected flags set - flags: 0x%x\n",
263				flags);
264			status = VIRTIO_BLK_S_UNSUPP;
265			break;
266		}
267
268		if (type == VIRTIO_BLK_T_WRITE_ZEROES &&
269		    flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
270			dev_dbg(&vdpasim->vdpa.dev,
271				"write_zeroes unexpected flags set - flags: 0x%x\n",
272				flags);
273			status = VIRTIO_BLK_S_UNSUPP;
274			break;
275		}
276
277		if (!vdpasim_blk_check_range(vdpasim, sector, num_sectors,
278					     VDPASIM_BLK_DWZ_MAX_SECTORS)) {
279			status = VIRTIO_BLK_S_IOERR;
280			break;
281		}
282
283		if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
284			vdpasim_blk_buffer_lock(blk);
285			memset(blk->buffer + offset, 0,
286			       num_sectors << SECTOR_SHIFT);
287			vdpasim_blk_buffer_unlock(blk);
288		}
289
290		break;
291	}
292	default:
293		dev_dbg(&vdpasim->vdpa.dev,
294			"Unsupported request type %d\n", type);
295		status = VIRTIO_BLK_S_IOERR;
296		break;
297	}
298
299err_status:
300	/* If some operations fail, we need to skip the remaining bytes
301	 * to put the status in the last byte
302	 */
303	if (to_push - pushed > 0)
304		vringh_kiov_advance(&vq->in_iov, to_push - pushed);
305
306	/* Last byte is the status */
307	bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
308	if (bytes != 1)
309		goto err;
310
311	pushed += bytes;
312
313	/* Make sure data is wrote before advancing index */
314	smp_wmb();
315
316	handled = true;
317
318err:
319	vringh_complete_iotlb(&vq->vring, vq->head, pushed);
320
321	return handled;
322}
323
324static void vdpasim_blk_work(struct vdpasim *vdpasim)
325{
326	bool reschedule = false;
327	int i;
328
329	mutex_lock(&vdpasim->mutex);
330
331	if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
332		goto out;
333
334	if (!vdpasim->running)
335		goto out;
336
337	for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
338		struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
339		int reqs = 0;
340
341		if (!vq->ready)
342			continue;
343
344		while (vdpasim_blk_handle_req(vdpasim, vq)) {
345			/* Make sure used is visible before rasing the interrupt. */
346			smp_wmb();
347
348			local_bh_disable();
349			if (vringh_need_notify_iotlb(&vq->vring) > 0)
350				vringh_notify(&vq->vring);
351			local_bh_enable();
352
353			if (++reqs > 4) {
354				reschedule = true;
355				break;
356			}
357		}
358	}
359out:
360	mutex_unlock(&vdpasim->mutex);
361
362	if (reschedule)
363		vdpasim_schedule_work(vdpasim);
364}
365
366static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
367{
368	struct virtio_blk_config *blk_config = config;
369
370	memset(config, 0, sizeof(struct virtio_blk_config));
371
372	blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY);
373	blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX);
374	blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX);
375	blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM);
376	blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
377	blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
378	blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
379	/* VIRTIO_BLK_F_DISCARD */
380	blk_config->discard_sector_alignment =
381		cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
382	blk_config->max_discard_sectors =
383		cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
384	blk_config->max_discard_seg = cpu_to_vdpasim32(vdpasim, 1);
385	/* VIRTIO_BLK_F_WRITE_ZEROES */
386	blk_config->max_write_zeroes_sectors =
387		cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_DWZ_MAX_SECTORS);
388	blk_config->max_write_zeroes_seg = cpu_to_vdpasim32(vdpasim, 1);
389
390}
391
392static void vdpasim_blk_free(struct vdpasim *vdpasim)
393{
394	struct vdpasim_blk *blk = sim_to_blk(vdpasim);
395
396	if (!blk->shared_backend)
397		kvfree(blk->buffer);
398}
399
400static void vdpasim_blk_mgmtdev_release(struct device *dev)
401{
402}
403
404static struct device vdpasim_blk_mgmtdev = {
405	.init_name = "vdpasim_blk",
406	.release = vdpasim_blk_mgmtdev_release,
407};
408
409static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
410			       const struct vdpa_dev_set_config *config)
411{
412	struct vdpasim_dev_attr dev_attr = {};
413	struct vdpasim_blk *blk;
414	struct vdpasim *simdev;
415	int ret;
416
417	dev_attr.mgmt_dev = mdev;
418	dev_attr.name = name;
419	dev_attr.id = VIRTIO_ID_BLOCK;
420	dev_attr.supported_features = VDPASIM_BLK_FEATURES;
421	dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
422	dev_attr.ngroups = VDPASIM_BLK_GROUP_NUM;
423	dev_attr.nas = VDPASIM_BLK_AS_NUM;
424	dev_attr.alloc_size = sizeof(struct vdpasim_blk);
425	dev_attr.config_size = sizeof(struct virtio_blk_config);
426	dev_attr.get_config = vdpasim_blk_get_config;
427	dev_attr.work_fn = vdpasim_blk_work;
428	dev_attr.free = vdpasim_blk_free;
429
430	simdev = vdpasim_create(&dev_attr, config);
431	if (IS_ERR(simdev))
432		return PTR_ERR(simdev);
433
434	blk = sim_to_blk(simdev);
435	blk->shared_backend = shared_backend;
436
437	if (blk->shared_backend) {
438		blk->buffer = shared_buffer;
439	} else {
440		blk->buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
441				       GFP_KERNEL);
442		if (!blk->buffer) {
443			ret = -ENOMEM;
444			goto put_dev;
445		}
446	}
447
448	ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
449	if (ret)
450		goto put_dev;
451
452	return 0;
453
454put_dev:
455	put_device(&simdev->vdpa.dev);
456	return ret;
457}
458
459static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev,
460				struct vdpa_device *dev)
461{
462	struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
463
464	_vdpa_unregister_device(&simdev->vdpa);
465}
466
467static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = {
468	.dev_add = vdpasim_blk_dev_add,
469	.dev_del = vdpasim_blk_dev_del
470};
471
472static struct virtio_device_id id_table[] = {
473	{ VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
474	{ 0 },
475};
476
477static struct vdpa_mgmt_dev mgmt_dev = {
478	.device = &vdpasim_blk_mgmtdev,
479	.id_table = id_table,
480	.ops = &vdpasim_blk_mgmtdev_ops,
481};
482
483static int __init vdpasim_blk_init(void)
484{
485	int ret;
486
487	ret = device_register(&vdpasim_blk_mgmtdev);
488	if (ret) {
489		put_device(&vdpasim_blk_mgmtdev);
490		return ret;
491	}
492
493	ret = vdpa_mgmtdev_register(&mgmt_dev);
494	if (ret)
495		goto parent_err;
496
497	if (shared_backend) {
498		shared_buffer = kvzalloc(VDPASIM_BLK_CAPACITY << SECTOR_SHIFT,
499					 GFP_KERNEL);
500		if (!shared_buffer) {
501			ret = -ENOMEM;
502			goto mgmt_dev_err;
503		}
504	}
505
506	return 0;
507mgmt_dev_err:
508	vdpa_mgmtdev_unregister(&mgmt_dev);
509parent_err:
510	device_unregister(&vdpasim_blk_mgmtdev);
511	return ret;
512}
513
514static void __exit vdpasim_blk_exit(void)
515{
516	kvfree(shared_buffer);
517	vdpa_mgmtdev_unregister(&mgmt_dev);
518	device_unregister(&vdpasim_blk_mgmtdev);
519}
520
521module_init(vdpasim_blk_init)
522module_exit(vdpasim_blk_exit)
523
524MODULE_VERSION(DRV_VERSION);
525MODULE_LICENSE(DRV_LICENSE);
526MODULE_AUTHOR(DRV_AUTHOR);
527MODULE_DESCRIPTION(DRV_DESC);
528