1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2023, Klara Inc. 23 */ 24 25#ifdef CONFIG_COMPAT 26#include <linux/compat.h> 27#endif 28#include <linux/fs.h> 29#ifdef HAVE_VFS_SPLICE_COPY_FILE_RANGE 30#include <linux/splice.h> 31#endif 32#include <sys/file.h> 33#include <sys/zfs_znode.h> 34#include <sys/zfs_vnops.h> 35#include <sys/zfeature.h> 36 37/* 38 * Clone part of a file via block cloning. 39 * 40 * Note that we are not required to update file offsets; the kernel will take 41 * care of that depending on how it was called. 42 */ 43static ssize_t 44zpl_clone_file_range_impl(struct file *src_file, loff_t src_off, 45 struct file *dst_file, loff_t dst_off, size_t len) 46{ 47 struct inode *src_i = file_inode(src_file); 48 struct inode *dst_i = file_inode(dst_file); 49 uint64_t src_off_o = (uint64_t)src_off; 50 uint64_t dst_off_o = (uint64_t)dst_off; 51 uint64_t len_o = (uint64_t)len; 52 cred_t *cr = CRED(); 53 fstrans_cookie_t cookie; 54 int err; 55 56 if (!zfs_bclone_enabled) 57 return (-EOPNOTSUPP); 58 59 if (!spa_feature_is_enabled( 60 dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING)) 61 return (-EOPNOTSUPP); 62 63 if (src_i != dst_i) 64 spl_inode_lock_shared(src_i); 65 spl_inode_lock(dst_i); 66 67 crhold(cr); 68 cookie = spl_fstrans_mark(); 69 70 err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i), 71 &dst_off_o, &len_o, cr); 72 73 spl_fstrans_unmark(cookie); 74 crfree(cr); 75 76 spl_inode_unlock(dst_i); 77 if (src_i != dst_i) 78 spl_inode_unlock_shared(src_i); 79 80 if (err < 0) 81 return (err); 82 83 return ((ssize_t)len_o); 84} 85 86#if defined(HAVE_VFS_COPY_FILE_RANGE) || \ 87 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) 88/* 89 * Entry point for copy_file_range(). Copy len bytes from src_off in src_file 90 * to dst_off in dst_file. We are permitted to do this however we like, so we 91 * try to just clone the blocks, and if we can't support it, fall back to the 92 * kernel's generic byte copy function. 93 */ 94ssize_t 95zpl_copy_file_range(struct file *src_file, loff_t src_off, 96 struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags) 97{ 98 ssize_t ret; 99 100 /* Flags is reserved for future extensions and must be zero. */ 101 if (flags != 0) 102 return (-EINVAL); 103 104 /* Try to do it via zfs_clone_range() and allow shortening. */ 105 ret = zpl_clone_file_range_impl(src_file, src_off, 106 dst_file, dst_off, len); 107 108#if defined(HAVE_VFS_GENERIC_COPY_FILE_RANGE) 109 /* 110 * Since Linux 5.3 the filesystem driver is responsible for executing 111 * an appropriate fallback, and a generic fallback function is provided. 112 */ 113 if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV || 114 ret == -EAGAIN) 115 ret = generic_copy_file_range(src_file, src_off, dst_file, 116 dst_off, len, flags); 117#elif defined(HAVE_VFS_SPLICE_COPY_FILE_RANGE) 118 /* 119 * Since 6.8 the fallback function is called splice_copy_file_range 120 * and has a slightly different signature. 121 */ 122 if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV || 123 ret == -EAGAIN) 124 ret = splice_copy_file_range(src_file, src_off, dst_file, 125 dst_off, len); 126#else 127 /* 128 * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal 129 * to the kernel that it should fallback to a content copy. 130 */ 131 if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN) 132 ret = -EOPNOTSUPP; 133#endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE || HAVE_VFS_SPLICE_COPY_FILE_RANGE */ 134 135 return (ret); 136} 137#endif /* HAVE_VFS_COPY_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ 138 139#ifdef HAVE_VFS_REMAP_FILE_RANGE 140/* 141 * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE. 142 * 143 * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except 144 * that they must clone - they cannot fall back to copying. FICLONE is exactly 145 * FICLONERANGE, for the entire file. We don't need to try to tell them apart; 146 * the kernel will sort that out for us. 147 * 148 * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the 149 * range in both files and if they're the same, arrange for them to be backed 150 * by the same storage. 151 * 152 * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range 153 * if we want. It's designed for filesystems that may need to shorten the 154 * length for alignment, EOF, or any other requirement. ZFS may shorten the 155 * request when there is outstanding dirty data which hasn't been written. 156 */ 157loff_t 158zpl_remap_file_range(struct file *src_file, loff_t src_off, 159 struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags) 160{ 161 if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN)) 162 return (-EINVAL); 163 164 /* No support for dedup yet */ 165 if (flags & REMAP_FILE_DEDUP) 166 return (-EOPNOTSUPP); 167 168 /* Zero length means to clone everything to the end of the file */ 169 if (len == 0) 170 len = i_size_read(file_inode(src_file)) - src_off; 171 172 ssize_t ret = zpl_clone_file_range_impl(src_file, src_off, 173 dst_file, dst_off, len); 174 175 if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len) 176 ret = -EINVAL; 177 178 return (ret); 179} 180#endif /* HAVE_VFS_REMAP_FILE_RANGE */ 181 182#if defined(HAVE_VFS_CLONE_FILE_RANGE) || \ 183 defined(HAVE_VFS_FILE_OPERATIONS_EXTEND) 184/* 185 * Entry point for FICLONE and FICLONERANGE, before Linux 4.20. 186 */ 187int 188zpl_clone_file_range(struct file *src_file, loff_t src_off, 189 struct file *dst_file, loff_t dst_off, uint64_t len) 190{ 191 /* Zero length means to clone everything to the end of the file */ 192 if (len == 0) 193 len = i_size_read(file_inode(src_file)) - src_off; 194 195 /* The entire length must be cloned or this is an error. */ 196 ssize_t ret = zpl_clone_file_range_impl(src_file, src_off, 197 dst_file, dst_off, len); 198 199 if (ret >= 0 && ret != len) 200 ret = -EINVAL; 201 202 return (ret); 203} 204#endif /* HAVE_VFS_CLONE_FILE_RANGE || HAVE_VFS_FILE_OPERATIONS_EXTEND */ 205 206#ifdef HAVE_VFS_DEDUPE_FILE_RANGE 207/* 208 * Entry point for FIDEDUPERANGE, before Linux 4.20. 209 */ 210int 211zpl_dedupe_file_range(struct file *src_file, loff_t src_off, 212 struct file *dst_file, loff_t dst_off, uint64_t len) 213{ 214 /* No support for dedup yet */ 215 return (-EOPNOTSUPP); 216} 217#endif /* HAVE_VFS_DEDUPE_FILE_RANGE */ 218 219/* Entry point for FICLONE, before Linux 4.5. */ 220long 221zpl_ioctl_ficlone(struct file *dst_file, void *arg) 222{ 223 unsigned long sfd = (unsigned long)arg; 224 225 struct file *src_file = fget(sfd); 226 if (src_file == NULL) 227 return (-EBADF); 228 229 if (dst_file->f_op != src_file->f_op) { 230 fput(src_file); 231 return (-EXDEV); 232 } 233 234 size_t len = i_size_read(file_inode(src_file)); 235 236 ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len); 237 238 fput(src_file); 239 240 if (ret < 0) { 241 if (ret == -EOPNOTSUPP) 242 return (-ENOTTY); 243 return (ret); 244 } 245 246 if (ret != len) 247 return (-EINVAL); 248 249 return (0); 250} 251 252/* Entry point for FICLONERANGE, before Linux 4.5. */ 253long 254zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg) 255{ 256 zfs_ioc_compat_file_clone_range_t fcr; 257 258 if (copy_from_user(&fcr, arg, sizeof (fcr))) 259 return (-EFAULT); 260 261 struct file *src_file = fget(fcr.fcr_src_fd); 262 if (src_file == NULL) 263 return (-EBADF); 264 265 if (dst_file->f_op != src_file->f_op) { 266 fput(src_file); 267 return (-EXDEV); 268 } 269 270 size_t len = fcr.fcr_src_length; 271 if (len == 0) 272 len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset; 273 274 ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset, 275 dst_file, fcr.fcr_dest_offset, len); 276 277 fput(src_file); 278 279 if (ret < 0) { 280 if (ret == -EOPNOTSUPP) 281 return (-ENOTTY); 282 return (ret); 283 } 284 285 if (ret != len) 286 return (-EINVAL); 287 288 return (0); 289} 290 291/* Entry point for FIDEDUPERANGE, before Linux 4.5. */ 292long 293zpl_ioctl_fideduperange(struct file *filp, void *arg) 294{ 295 (void) arg; 296 297 /* No support for dedup yet */ 298 return (-ENOTTY); 299} 300