1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved
4 */
5
6#include <linux/device.h>
7#include <linux/module.h>
8#include <linux/mutex.h>
9#include <linux/pci.h>
10#include <linux/pm_runtime.h>
11#include <linux/types.h>
12#include <linux/uaccess.h>
13#include <linux/vfio.h>
14#include <linux/vfio_pci_core.h>
15#include <linux/virtio_pci.h>
16#include <linux/virtio_net.h>
17#include <linux/virtio_pci_admin.h>
18
19struct virtiovf_pci_core_device {
20	struct vfio_pci_core_device core_device;
21	u8 *bar0_virtual_buf;
22	/* synchronize access to the virtual buf */
23	struct mutex bar_mutex;
24	void __iomem *notify_addr;
25	u64 notify_offset;
26	__le32 pci_base_addr_0;
27	__le16 pci_cmd;
28	u8 bar0_virtual_buf_size;
29	u8 notify_bar;
30};
31
32static int
33virtiovf_issue_legacy_rw_cmd(struct virtiovf_pci_core_device *virtvdev,
34			     loff_t pos, char __user *buf,
35			     size_t count, bool read)
36{
37	bool msix_enabled =
38		(virtvdev->core_device.irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
39	struct pci_dev *pdev = virtvdev->core_device.pdev;
40	u8 *bar0_buf = virtvdev->bar0_virtual_buf;
41	bool common;
42	u8 offset;
43	int ret;
44
45	common = pos < VIRTIO_PCI_CONFIG_OFF(msix_enabled);
46	/* offset within the relevant configuration area */
47	offset = common ? pos : pos - VIRTIO_PCI_CONFIG_OFF(msix_enabled);
48	mutex_lock(&virtvdev->bar_mutex);
49	if (read) {
50		if (common)
51			ret = virtio_pci_admin_legacy_common_io_read(pdev, offset,
52					count, bar0_buf + pos);
53		else
54			ret = virtio_pci_admin_legacy_device_io_read(pdev, offset,
55					count, bar0_buf + pos);
56		if (ret)
57			goto out;
58		if (copy_to_user(buf, bar0_buf + pos, count))
59			ret = -EFAULT;
60	} else {
61		if (copy_from_user(bar0_buf + pos, buf, count)) {
62			ret = -EFAULT;
63			goto out;
64		}
65
66		if (common)
67			ret = virtio_pci_admin_legacy_common_io_write(pdev, offset,
68					count, bar0_buf + pos);
69		else
70			ret = virtio_pci_admin_legacy_device_io_write(pdev, offset,
71					count, bar0_buf + pos);
72	}
73out:
74	mutex_unlock(&virtvdev->bar_mutex);
75	return ret;
76}
77
78static int
79virtiovf_pci_bar0_rw(struct virtiovf_pci_core_device *virtvdev,
80		     loff_t pos, char __user *buf,
81		     size_t count, bool read)
82{
83	struct vfio_pci_core_device *core_device = &virtvdev->core_device;
84	struct pci_dev *pdev = core_device->pdev;
85	u16 queue_notify;
86	int ret;
87
88	if (!(le16_to_cpu(virtvdev->pci_cmd) & PCI_COMMAND_IO))
89		return -EIO;
90
91	if (pos + count > virtvdev->bar0_virtual_buf_size)
92		return -EINVAL;
93
94	ret = pm_runtime_resume_and_get(&pdev->dev);
95	if (ret) {
96		pci_info_ratelimited(pdev, "runtime resume failed %d\n", ret);
97		return -EIO;
98	}
99
100	switch (pos) {
101	case VIRTIO_PCI_QUEUE_NOTIFY:
102		if (count != sizeof(queue_notify)) {
103			ret = -EINVAL;
104			goto end;
105		}
106		if (read) {
107			ret = vfio_pci_core_ioread16(core_device, true, &queue_notify,
108						     virtvdev->notify_addr);
109			if (ret)
110				goto end;
111			if (copy_to_user(buf, &queue_notify,
112					 sizeof(queue_notify))) {
113				ret = -EFAULT;
114				goto end;
115			}
116		} else {
117			if (copy_from_user(&queue_notify, buf, count)) {
118				ret = -EFAULT;
119				goto end;
120			}
121			ret = vfio_pci_core_iowrite16(core_device, true, queue_notify,
122						      virtvdev->notify_addr);
123		}
124		break;
125	default:
126		ret = virtiovf_issue_legacy_rw_cmd(virtvdev, pos, buf, count,
127						   read);
128	}
129
130end:
131	pm_runtime_put(&pdev->dev);
132	return ret ? ret : count;
133}
134
135static ssize_t virtiovf_pci_read_config(struct vfio_device *core_vdev,
136					char __user *buf, size_t count,
137					loff_t *ppos)
138{
139	struct virtiovf_pci_core_device *virtvdev = container_of(
140		core_vdev, struct virtiovf_pci_core_device, core_device.vdev);
141	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
142	size_t register_offset;
143	loff_t copy_offset;
144	size_t copy_count;
145	__le32 val32;
146	__le16 val16;
147	u8 val8;
148	int ret;
149
150	ret = vfio_pci_core_read(core_vdev, buf, count, ppos);
151	if (ret < 0)
152		return ret;
153
154	if (vfio_pci_core_range_intersect_range(pos, count, PCI_DEVICE_ID,
155						sizeof(val16), &copy_offset,
156						&copy_count, &register_offset)) {
157		val16 = cpu_to_le16(VIRTIO_TRANS_ID_NET);
158		if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset, copy_count))
159			return -EFAULT;
160	}
161
162	if ((le16_to_cpu(virtvdev->pci_cmd) & PCI_COMMAND_IO) &&
163	    vfio_pci_core_range_intersect_range(pos, count, PCI_COMMAND,
164						sizeof(val16), &copy_offset,
165						&copy_count, &register_offset)) {
166		if (copy_from_user((void *)&val16 + register_offset, buf + copy_offset,
167				   copy_count))
168			return -EFAULT;
169		val16 |= cpu_to_le16(PCI_COMMAND_IO);
170		if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset,
171				 copy_count))
172			return -EFAULT;
173	}
174
175	if (vfio_pci_core_range_intersect_range(pos, count, PCI_REVISION_ID,
176						sizeof(val8), &copy_offset,
177						&copy_count, &register_offset)) {
178		/* Transional needs to have revision 0 */
179		val8 = 0;
180		if (copy_to_user(buf + copy_offset, &val8, copy_count))
181			return -EFAULT;
182	}
183
184	if (vfio_pci_core_range_intersect_range(pos, count, PCI_BASE_ADDRESS_0,
185						sizeof(val32), &copy_offset,
186						&copy_count, &register_offset)) {
187		u32 bar_mask = ~(virtvdev->bar0_virtual_buf_size - 1);
188		u32 pci_base_addr_0 = le32_to_cpu(virtvdev->pci_base_addr_0);
189
190		val32 = cpu_to_le32((pci_base_addr_0 & bar_mask) | PCI_BASE_ADDRESS_SPACE_IO);
191		if (copy_to_user(buf + copy_offset, (void *)&val32 + register_offset, copy_count))
192			return -EFAULT;
193	}
194
195	if (vfio_pci_core_range_intersect_range(pos, count, PCI_SUBSYSTEM_ID,
196						sizeof(val16), &copy_offset,
197						&copy_count, &register_offset)) {
198		/*
199		 * Transitional devices use the PCI subsystem device id as
200		 * virtio device id, same as legacy driver always did.
201		 */
202		val16 = cpu_to_le16(VIRTIO_ID_NET);
203		if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset,
204				 copy_count))
205			return -EFAULT;
206	}
207
208	if (vfio_pci_core_range_intersect_range(pos, count, PCI_SUBSYSTEM_VENDOR_ID,
209						sizeof(val16), &copy_offset,
210						&copy_count, &register_offset)) {
211		val16 = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET);
212		if (copy_to_user(buf + copy_offset, (void *)&val16 + register_offset,
213				 copy_count))
214			return -EFAULT;
215	}
216
217	return count;
218}
219
220static ssize_t
221virtiovf_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
222		       size_t count, loff_t *ppos)
223{
224	struct virtiovf_pci_core_device *virtvdev = container_of(
225		core_vdev, struct virtiovf_pci_core_device, core_device.vdev);
226	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
227	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
228
229	if (!count)
230		return 0;
231
232	if (index == VFIO_PCI_CONFIG_REGION_INDEX)
233		return virtiovf_pci_read_config(core_vdev, buf, count, ppos);
234
235	if (index == VFIO_PCI_BAR0_REGION_INDEX)
236		return virtiovf_pci_bar0_rw(virtvdev, pos, buf, count, true);
237
238	return vfio_pci_core_read(core_vdev, buf, count, ppos);
239}
240
241static ssize_t virtiovf_pci_write_config(struct vfio_device *core_vdev,
242					 const char __user *buf, size_t count,
243					 loff_t *ppos)
244{
245	struct virtiovf_pci_core_device *virtvdev = container_of(
246		core_vdev, struct virtiovf_pci_core_device, core_device.vdev);
247	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
248	size_t register_offset;
249	loff_t copy_offset;
250	size_t copy_count;
251
252	if (vfio_pci_core_range_intersect_range(pos, count, PCI_COMMAND,
253						sizeof(virtvdev->pci_cmd),
254						&copy_offset, &copy_count,
255						&register_offset)) {
256		if (copy_from_user((void *)&virtvdev->pci_cmd + register_offset,
257				   buf + copy_offset,
258				   copy_count))
259			return -EFAULT;
260	}
261
262	if (vfio_pci_core_range_intersect_range(pos, count, PCI_BASE_ADDRESS_0,
263						sizeof(virtvdev->pci_base_addr_0),
264						&copy_offset, &copy_count,
265						&register_offset)) {
266		if (copy_from_user((void *)&virtvdev->pci_base_addr_0 + register_offset,
267				   buf + copy_offset,
268				   copy_count))
269			return -EFAULT;
270	}
271
272	return vfio_pci_core_write(core_vdev, buf, count, ppos);
273}
274
275static ssize_t
276virtiovf_pci_core_write(struct vfio_device *core_vdev, const char __user *buf,
277			size_t count, loff_t *ppos)
278{
279	struct virtiovf_pci_core_device *virtvdev = container_of(
280		core_vdev, struct virtiovf_pci_core_device, core_device.vdev);
281	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
282	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
283
284	if (!count)
285		return 0;
286
287	if (index == VFIO_PCI_CONFIG_REGION_INDEX)
288		return virtiovf_pci_write_config(core_vdev, buf, count, ppos);
289
290	if (index == VFIO_PCI_BAR0_REGION_INDEX)
291		return virtiovf_pci_bar0_rw(virtvdev, pos, (char __user *)buf, count, false);
292
293	return vfio_pci_core_write(core_vdev, buf, count, ppos);
294}
295
296static int
297virtiovf_pci_ioctl_get_region_info(struct vfio_device *core_vdev,
298				   unsigned int cmd, unsigned long arg)
299{
300	struct virtiovf_pci_core_device *virtvdev = container_of(
301		core_vdev, struct virtiovf_pci_core_device, core_device.vdev);
302	unsigned long minsz = offsetofend(struct vfio_region_info, offset);
303	void __user *uarg = (void __user *)arg;
304	struct vfio_region_info info = {};
305
306	if (copy_from_user(&info, uarg, minsz))
307		return -EFAULT;
308
309	if (info.argsz < minsz)
310		return -EINVAL;
311
312	switch (info.index) {
313	case VFIO_PCI_BAR0_REGION_INDEX:
314		info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
315		info.size = virtvdev->bar0_virtual_buf_size;
316		info.flags = VFIO_REGION_INFO_FLAG_READ |
317			     VFIO_REGION_INFO_FLAG_WRITE;
318		return copy_to_user(uarg, &info, minsz) ? -EFAULT : 0;
319	default:
320		return vfio_pci_core_ioctl(core_vdev, cmd, arg);
321	}
322}
323
324static long
325virtiovf_vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
326			     unsigned long arg)
327{
328	switch (cmd) {
329	case VFIO_DEVICE_GET_REGION_INFO:
330		return virtiovf_pci_ioctl_get_region_info(core_vdev, cmd, arg);
331	default:
332		return vfio_pci_core_ioctl(core_vdev, cmd, arg);
333	}
334}
335
336static int
337virtiovf_set_notify_addr(struct virtiovf_pci_core_device *virtvdev)
338{
339	struct vfio_pci_core_device *core_device = &virtvdev->core_device;
340	int ret;
341
342	/*
343	 * Setup the BAR where the 'notify' exists to be used by vfio as well
344	 * This will let us mmap it only once and use it when needed.
345	 */
346	ret = vfio_pci_core_setup_barmap(core_device,
347					 virtvdev->notify_bar);
348	if (ret)
349		return ret;
350
351	virtvdev->notify_addr = core_device->barmap[virtvdev->notify_bar] +
352			virtvdev->notify_offset;
353	return 0;
354}
355
356static int virtiovf_pci_open_device(struct vfio_device *core_vdev)
357{
358	struct virtiovf_pci_core_device *virtvdev = container_of(
359		core_vdev, struct virtiovf_pci_core_device, core_device.vdev);
360	struct vfio_pci_core_device *vdev = &virtvdev->core_device;
361	int ret;
362
363	ret = vfio_pci_core_enable(vdev);
364	if (ret)
365		return ret;
366
367	if (virtvdev->bar0_virtual_buf) {
368		/*
369		 * Upon close_device() the vfio_pci_core_disable() is called
370		 * and will close all the previous mmaps, so it seems that the
371		 * valid life cycle for the 'notify' addr is per open/close.
372		 */
373		ret = virtiovf_set_notify_addr(virtvdev);
374		if (ret) {
375			vfio_pci_core_disable(vdev);
376			return ret;
377		}
378	}
379
380	vfio_pci_core_finish_enable(vdev);
381	return 0;
382}
383
384static int virtiovf_get_device_config_size(unsigned short device)
385{
386	/* Network card */
387	return offsetofend(struct virtio_net_config, status);
388}
389
390static int virtiovf_read_notify_info(struct virtiovf_pci_core_device *virtvdev)
391{
392	u64 offset;
393	int ret;
394	u8 bar;
395
396	ret = virtio_pci_admin_legacy_io_notify_info(virtvdev->core_device.pdev,
397				VIRTIO_ADMIN_CMD_NOTIFY_INFO_FLAGS_OWNER_MEM,
398				&bar, &offset);
399	if (ret)
400		return ret;
401
402	virtvdev->notify_bar = bar;
403	virtvdev->notify_offset = offset;
404	return 0;
405}
406
407static int virtiovf_pci_init_device(struct vfio_device *core_vdev)
408{
409	struct virtiovf_pci_core_device *virtvdev = container_of(
410		core_vdev, struct virtiovf_pci_core_device, core_device.vdev);
411	struct pci_dev *pdev;
412	int ret;
413
414	ret = vfio_pci_core_init_dev(core_vdev);
415	if (ret)
416		return ret;
417
418	pdev = virtvdev->core_device.pdev;
419	ret = virtiovf_read_notify_info(virtvdev);
420	if (ret)
421		return ret;
422
423	virtvdev->bar0_virtual_buf_size = VIRTIO_PCI_CONFIG_OFF(true) +
424				virtiovf_get_device_config_size(pdev->device);
425	BUILD_BUG_ON(!is_power_of_2(virtvdev->bar0_virtual_buf_size));
426	virtvdev->bar0_virtual_buf = kzalloc(virtvdev->bar0_virtual_buf_size,
427					     GFP_KERNEL);
428	if (!virtvdev->bar0_virtual_buf)
429		return -ENOMEM;
430	mutex_init(&virtvdev->bar_mutex);
431	return 0;
432}
433
434static void virtiovf_pci_core_release_dev(struct vfio_device *core_vdev)
435{
436	struct virtiovf_pci_core_device *virtvdev = container_of(
437		core_vdev, struct virtiovf_pci_core_device, core_device.vdev);
438
439	kfree(virtvdev->bar0_virtual_buf);
440	vfio_pci_core_release_dev(core_vdev);
441}
442
443static const struct vfio_device_ops virtiovf_vfio_pci_tran_ops = {
444	.name = "virtio-vfio-pci-trans",
445	.init = virtiovf_pci_init_device,
446	.release = virtiovf_pci_core_release_dev,
447	.open_device = virtiovf_pci_open_device,
448	.close_device = vfio_pci_core_close_device,
449	.ioctl = virtiovf_vfio_pci_core_ioctl,
450	.device_feature = vfio_pci_core_ioctl_feature,
451	.read = virtiovf_pci_core_read,
452	.write = virtiovf_pci_core_write,
453	.mmap = vfio_pci_core_mmap,
454	.request = vfio_pci_core_request,
455	.match = vfio_pci_core_match,
456	.bind_iommufd = vfio_iommufd_physical_bind,
457	.unbind_iommufd = vfio_iommufd_physical_unbind,
458	.attach_ioas = vfio_iommufd_physical_attach_ioas,
459	.detach_ioas = vfio_iommufd_physical_detach_ioas,
460};
461
462static const struct vfio_device_ops virtiovf_vfio_pci_ops = {
463	.name = "virtio-vfio-pci",
464	.init = vfio_pci_core_init_dev,
465	.release = vfio_pci_core_release_dev,
466	.open_device = virtiovf_pci_open_device,
467	.close_device = vfio_pci_core_close_device,
468	.ioctl = vfio_pci_core_ioctl,
469	.device_feature = vfio_pci_core_ioctl_feature,
470	.read = vfio_pci_core_read,
471	.write = vfio_pci_core_write,
472	.mmap = vfio_pci_core_mmap,
473	.request = vfio_pci_core_request,
474	.match = vfio_pci_core_match,
475	.bind_iommufd = vfio_iommufd_physical_bind,
476	.unbind_iommufd = vfio_iommufd_physical_unbind,
477	.attach_ioas = vfio_iommufd_physical_attach_ioas,
478	.detach_ioas = vfio_iommufd_physical_detach_ioas,
479};
480
481static bool virtiovf_bar0_exists(struct pci_dev *pdev)
482{
483	struct resource *res = pdev->resource;
484
485	return res->flags;
486}
487
488static int virtiovf_pci_probe(struct pci_dev *pdev,
489			      const struct pci_device_id *id)
490{
491	const struct vfio_device_ops *ops = &virtiovf_vfio_pci_ops;
492	struct virtiovf_pci_core_device *virtvdev;
493	int ret;
494
495	if (pdev->is_virtfn && virtio_pci_admin_has_legacy_io(pdev) &&
496	    !virtiovf_bar0_exists(pdev))
497		ops = &virtiovf_vfio_pci_tran_ops;
498
499	virtvdev = vfio_alloc_device(virtiovf_pci_core_device, core_device.vdev,
500				     &pdev->dev, ops);
501	if (IS_ERR(virtvdev))
502		return PTR_ERR(virtvdev);
503
504	dev_set_drvdata(&pdev->dev, &virtvdev->core_device);
505	ret = vfio_pci_core_register_device(&virtvdev->core_device);
506	if (ret)
507		goto out;
508	return 0;
509out:
510	vfio_put_device(&virtvdev->core_device.vdev);
511	return ret;
512}
513
514static void virtiovf_pci_remove(struct pci_dev *pdev)
515{
516	struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev);
517
518	vfio_pci_core_unregister_device(&virtvdev->core_device);
519	vfio_put_device(&virtvdev->core_device.vdev);
520}
521
522static const struct pci_device_id virtiovf_pci_table[] = {
523	/* Only virtio-net is supported/tested so far */
524	{ PCI_DRIVER_OVERRIDE_DEVICE_VFIO(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1041) },
525	{}
526};
527
528MODULE_DEVICE_TABLE(pci, virtiovf_pci_table);
529
530static void virtiovf_pci_aer_reset_done(struct pci_dev *pdev)
531{
532	struct virtiovf_pci_core_device *virtvdev = dev_get_drvdata(&pdev->dev);
533
534	virtvdev->pci_cmd = 0;
535}
536
537static const struct pci_error_handlers virtiovf_err_handlers = {
538	.reset_done = virtiovf_pci_aer_reset_done,
539	.error_detected = vfio_pci_core_aer_err_detected,
540};
541
542static struct pci_driver virtiovf_pci_driver = {
543	.name = KBUILD_MODNAME,
544	.id_table = virtiovf_pci_table,
545	.probe = virtiovf_pci_probe,
546	.remove = virtiovf_pci_remove,
547	.err_handler = &virtiovf_err_handlers,
548	.driver_managed_dma = true,
549};
550
551module_pci_driver(virtiovf_pci_driver);
552
553MODULE_LICENSE("GPL");
554MODULE_AUTHOR("Yishai Hadas <yishaih@nvidia.com>");
555MODULE_DESCRIPTION(
556	"VIRTIO VFIO PCI - User Level meta-driver for VIRTIO NET devices");
557