1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21/* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011, 2015 by Delphix. All rights reserved. 24 */ 25 26#include <sys/zfs_context.h> 27#include <sys/spa.h> 28#include <sys/vdev_file.h> 29#include <sys/vdev_impl.h> 30#include <sys/zio.h> 31#include <sys/fs/zfs.h> 32#include <sys/fm/fs/zfs.h> 33 34/* 35 * Virtual device vector for files. 36 */ 37 38static void 39vdev_file_hold(vdev_t *vd) 40{ 41 ASSERT(vd->vdev_path != NULL); 42} 43 44static void 45vdev_file_rele(vdev_t *vd) 46{ 47 ASSERT(vd->vdev_path != NULL); 48} 49 50static int 51vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, 52 uint64_t *logical_ashift, uint64_t *physical_ashift) 53{ 54 vdev_file_t *vf; 55 vnode_t *vp; 56 vattr_t vattr; 57 int error; 58 59 /* 60 * We must have a pathname, and it must be absolute. 61 */ 62 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') { 63 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL; 64 return (SET_ERROR(EINVAL)); 65 } 66 67 /* 68 * Reopen the device if it's not currently open. Otherwise, 69 * just update the physical size of the device. 70 */ 71 if (vd->vdev_tsd != NULL) { 72 ASSERT(vd->vdev_reopening); 73 vf = vd->vdev_tsd; 74 vp = vf->vf_vnode; 75 goto skip_open; 76 } 77 78 vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP); 79 80 /* 81 * We always open the files from the root of the global zone, even if 82 * we're in a local zone. If the user has gotten to this point, the 83 * administrator has already decided that the pool should be available 84 * to local zone users, so the underlying devices should be as well. 85 */ 86 ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/'); 87 error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE, 88 spa_mode(vd->vdev_spa) | FOFFMAX, 0, &vp, 0, 0, rootdir, -1); 89 90 if (error) { 91 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 92 kmem_free(vd->vdev_tsd, sizeof (vdev_file_t)); 93 vd->vdev_tsd = NULL; 94 return (error); 95 } 96 97 vf->vf_vnode = vp; 98 99#ifdef _KERNEL 100 /* 101 * Make sure it's a regular file. 102 */ 103 if (vp->v_type != VREG) { 104 (void) VOP_CLOSE(vp, spa_mode(vd->vdev_spa), 1, 0, kcred, NULL); 105 kmem_free(vd->vdev_tsd, sizeof (vdev_file_t)); 106 vd->vdev_tsd = NULL; 107 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 108 return (SET_ERROR(ENODEV)); 109 } 110#endif /* _KERNEL */ 111 112skip_open: 113 /* 114 * Determine the physical size of the file. 115 */ 116 vattr.va_mask = AT_SIZE; 117#ifdef __FreeBSD__ 118 vn_lock(vp, LK_SHARED | LK_RETRY); 119 error = VOP_GETATTR(vp, &vattr, kcred); 120 VOP_UNLOCK(vp, 0); 121#endif 122#ifdef __NetBSD__ 123 error = VOP_GETATTR(vp, &vattr, 0, kcred, NULL); 124#endif 125 if (error) { 126 (void) VOP_CLOSE(vp, spa_mode(vd->vdev_spa), 1, 0, kcred, NULL); 127 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED; 128 kmem_free(vd->vdev_tsd, sizeof (vdev_file_t)); 129 vd->vdev_tsd = NULL; 130 return (error); 131 } 132 133 vd->vdev_notrim = B_TRUE; 134 135 *max_psize = *psize = vattr.va_size; 136 *logical_ashift = SPA_MINBLOCKSHIFT; 137 *physical_ashift = SPA_MINBLOCKSHIFT; 138 139 return (0); 140} 141 142static void 143vdev_file_close(vdev_t *vd) 144{ 145 vdev_file_t *vf = vd->vdev_tsd; 146 147 if (vd->vdev_reopening || vf == NULL) 148 return; 149 150 if (vf->vf_vnode != NULL) { 151 (void) VOP_CLOSE(vf->vf_vnode, spa_mode(vd->vdev_spa), 1, 0, 152 kcred, NULL); 153 } 154 155 vd->vdev_delayed_close = B_FALSE; 156 kmem_free(vf, sizeof (vdev_file_t)); 157 vd->vdev_tsd = NULL; 158} 159 160static void 161vdev_file_io_start(zio_t *zio) 162{ 163 vdev_t *vd = zio->io_vd; 164 vdev_file_t *vf; 165 vnode_t *vp; 166 ssize_t resid; 167 168 if (!vdev_readable(vd)) { 169 zio->io_error = SET_ERROR(ENXIO); 170 zio_interrupt(zio); 171 return; 172 } 173 174 vf = vd->vdev_tsd; 175 vp = vf->vf_vnode; 176 177 if (zio->io_type == ZIO_TYPE_IOCTL) { 178 switch (zio->io_cmd) { 179 case DKIOCFLUSHWRITECACHE: 180 zio->io_error = VOP_FSYNC(vp, FSYNC | FDSYNC, 181 kcred, NULL); 182 break; 183 default: 184 zio->io_error = SET_ERROR(ENOTSUP); 185 } 186 187 zio_execute(zio); 188 return; 189 } 190 191 ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE); 192 zio->io_target_timestamp = zio_handle_io_delay(zio); 193 194 zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ? 195 UIO_READ : UIO_WRITE, vp, zio->io_data, zio->io_size, 196 zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 197 198 if (resid != 0 && zio->io_error == 0) 199 zio->io_error = ENOSPC; 200 201 zio_delay_interrupt(zio); 202 203#ifdef illumos 204 VERIFY3U(taskq_dispatch(system_taskq, vdev_file_io_strategy, bp, 205 TQ_SLEEP), !=, 0); 206#endif 207} 208 209/* ARGSUSED */ 210static void 211vdev_file_io_done(zio_t *zio) 212{ 213} 214 215vdev_ops_t vdev_file_ops = { 216 vdev_file_open, 217 vdev_file_close, 218 vdev_default_asize, 219 vdev_file_io_start, 220 vdev_file_io_done, 221 NULL, 222 vdev_file_hold, 223 vdev_file_rele, 224 VDEV_TYPE_FILE, /* name of this vdev type */ 225 B_TRUE /* leaf vdev */ 226}; 227 228/* 229 * From userland we access disks just like files. 230 */ 231#ifndef _KERNEL 232 233vdev_ops_t vdev_disk_ops = { 234 vdev_file_open, 235 vdev_file_close, 236 vdev_default_asize, 237 vdev_file_io_start, 238 vdev_file_io_done, 239 NULL, 240 vdev_file_hold, 241 vdev_file_rele, 242 VDEV_TYPE_DISK, /* name of this vdev type */ 243 B_TRUE /* leaf vdev */ 244}; 245 246#endif 247