zfs_replay.c revision 169884
1132718Skan/* 2132718Skan * CDDL HEADER START 3132718Skan * 4132718Skan * The contents of this file are subject to the terms of the 5132718Skan * Common Development and Distribution License (the "License"). 6132718Skan * You may not use this file except in compliance with the License. 7132718Skan * 8132718Skan * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9132718Skan * or http://www.opensolaris.org/os/licensing. 10132718Skan * See the License for the specific language governing permissions 11132718Skan * and limitations under the License. 12132718Skan * 13132718Skan * When distributing Covered Code, include this CDDL HEADER in each 14132718Skan * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15132718Skan * If applicable, add the following below this CDDL HEADER, with the 16132718Skan * fields enclosed by brackets "[]" replaced with your own identifying 17132718Skan * information: Portions Copyright [yyyy] [name of copyright owner] 18132718Skan * 19132718Skan * CDDL HEADER END 20132718Skan */ 21132718Skan/* 22132718Skan * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23132718Skan * Use is subject to license terms. 24132718Skan */ 25132718Skan 26132718Skan#pragma ident "%Z%%M% %I% %E% SMI" 27132718Skan 28132718Skan#include <sys/types.h> 29132718Skan#include <sys/param.h> 30132718Skan#include <sys/systm.h> 31132718Skan#include <sys/sysmacros.h> 32132718Skan#include <sys/cmn_err.h> 33132718Skan#include <sys/kmem.h> 34132718Skan#include <sys/file.h> 35132718Skan#include <sys/fcntl.h> 36132718Skan#include <sys/vfs.h> 37132718Skan#include <sys/fs/zfs.h> 38132718Skan#include <sys/zfs_znode.h> 39132718Skan#include <sys/zfs_dir.h> 40132718Skan#include <sys/zfs_acl.h> 41132718Skan#include <sys/spa.h> 42132718Skan#include <sys/zil.h> 43132718Skan#include <sys/byteorder.h> 44132718Skan#include <sys/stat.h> 45132718Skan#include <sys/acl.h> 46132718Skan#include <sys/atomic.h> 47132718Skan#include <sys/cred.h> 48132718Skan#include <sys/namei.h> 49132718Skan 50132718Skan/* 51132718Skan * Functions to replay ZFS intent log (ZIL) records 52132718Skan * The functions are called through a function vector (zfs_replay_vector) 53132718Skan * which is indexed by the transaction type. 54132718Skan */ 55132718Skan 56132718Skanstatic void 57132718Skanzfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode, 58132718Skan uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid) 59132718Skan{ 60132718Skan VATTR_NULL(vap); 61132718Skan vap->va_mask = (uint_t)mask; 62132718Skan vap->va_type = IFTOVT(mode); 63132718Skan vap->va_mode = mode & MODEMASK; 64132718Skan vap->va_uid = (uid_t)uid; 65132718Skan vap->va_gid = (gid_t)gid; 66132718Skan vap->va_rdev = zfs_cmpldev(rdev); 67132718Skan vap->va_nodeid = nodeid; 68132718Skan} 69132718Skan 70132718Skan/* ARGSUSED */ 71132718Skanstatic int 72132718Skanzfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap) 73132718Skan{ 74132718Skan return (ENOTSUP); 75132718Skan} 76132718Skan 77132718Skanstatic int 78132718Skanzfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap) 79132718Skan{ 80132718Skan char *name = (char *)(lr + 1); /* name follows lr_create_t */ 81132718Skan char *link; /* symlink content follows name */ 82132718Skan znode_t *dzp; 83132718Skan vnode_t *vp = NULL; 84132718Skan vattr_t va; 85132718Skan struct componentname cn; 86132718Skan int error; 87132718Skan 88132718Skan if (byteswap) 89132718Skan byteswap_uint64_array(lr, sizeof (*lr)); 90132718Skan 91132718Skan if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) 92132718Skan return (error); 93132718Skan 94132718Skan zfs_init_vattr(&va, AT_TYPE | AT_MODE | AT_UID | AT_GID, 95132718Skan lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid); 96132718Skan 97132718Skan /* 98132718Skan * All forms of zfs create (create, mkdir, mkxattrdir, symlink) 99132718Skan * eventually end up in zfs_mknode(), which assigns the object's 100132718Skan * creation time and generation number. The generic VOP_CREATE() 101132718Skan * doesn't have either concept, so we smuggle the values inside 102132718Skan * the vattr's otherwise unused va_ctime and va_nblocks fields. 103132718Skan */ 104132718Skan ZFS_TIME_DECODE(&va.va_ctime, lr->lr_crtime); 105132718Skan va.va_nblocks = lr->lr_gen; 106132718Skan 107132718Skan cn.cn_nameptr = name; 108132718Skan cn.cn_cred = kcred; 109132718Skan cn.cn_thread = curthread; 110132718Skan cn.cn_flags = SAVENAME; 111132718Skan 112132718Skan vn_lock(ZTOV(dzp), LK_EXCLUSIVE | LK_RETRY, curthread); 113132718Skan switch ((int)lr->lr_common.lrc_txtype) { 114132718Skan case TX_CREATE: 115132718Skan error = VOP_CREATE(ZTOV(dzp), &vp, &cn, &va); 116132718Skan break; 117132718Skan case TX_MKDIR: 118132718Skan error = VOP_MKDIR(ZTOV(dzp), &vp, &cn, &va); 119132718Skan break; 120132718Skan case TX_MKXATTR: 121132718Skan error = zfs_make_xattrdir(dzp, &va, &vp, kcred); 122132718Skan break; 123132718Skan case TX_SYMLINK: 124132718Skan link = name + strlen(name) + 1; 125132718Skan error = VOP_SYMLINK(ZTOV(dzp), &vp, &cn, &va, link); 126132718Skan break; 127132718Skan default: 128132718Skan error = ENOTSUP; 129132718Skan } 130132718Skan VOP_UNLOCK(ZTOV(dzp), 0, curthread); 131132718Skan 132132718Skan if (error == 0 && vp != NULL) { 133132718Skan VOP_UNLOCK(vp, 0, curthread); 134132718Skan VN_RELE(vp); 135132718Skan } 136132718Skan 137132718Skan VN_RELE(ZTOV(dzp)); 138132718Skan 139132718Skan return (error); 140132718Skan} 141132718Skan 142132718Skanstatic int 143132718Skanzfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap) 144132718Skan{ 145132718Skan char *name = (char *)(lr + 1); /* name follows lr_remove_t */ 146132718Skan znode_t *dzp; 147132718Skan struct componentname cn; 148132718Skan vnode_t *vp; 149132718Skan int error; 150132718Skan 151132718Skan if (byteswap) 152132718Skan byteswap_uint64_array(lr, sizeof (*lr)); 153132718Skan 154132718Skan if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) 155132718Skan return (error); 156132718Skan 157132718Skan cn.cn_nameptr = name; 158132718Skan cn.cn_namelen = strlen(name); 159132718Skan cn.cn_nameiop = DELETE; 160132718Skan cn.cn_flags = ISLASTCN | SAVENAME; 161132718Skan cn.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 162132718Skan cn.cn_cred = kcred; 163132718Skan cn.cn_thread = curthread; 164132718Skan vn_lock(ZTOV(dzp), LK_EXCLUSIVE | LK_RETRY, curthread); 165132718Skan error = VOP_LOOKUP(ZTOV(dzp), &vp, &cn); 166132718Skan if (error != 0) { 167132718Skan VOP_UNLOCK(ZTOV(dzp), 0, curthread); 168132718Skan goto fail; 169132718Skan } 170132718Skan 171132718Skan switch ((int)lr->lr_common.lrc_txtype) { 172132718Skan case TX_REMOVE: 173132718Skan error = VOP_REMOVE(ZTOV(dzp), vp, &cn); 174132718Skan break; 175132718Skan case TX_RMDIR: 176132718Skan error = VOP_RMDIR(ZTOV(dzp), vp, &cn); 177132718Skan break; 178132718Skan default: 179132718Skan error = ENOTSUP; 180132718Skan } 181132718Skan vput(vp); 182132718Skan VOP_UNLOCK(ZTOV(dzp), 0, curthread); 183132718Skanfail: 184132718Skan VN_RELE(ZTOV(dzp)); 185132718Skan 186132718Skan return (error); 187132718Skan} 188132718Skan 189132718Skanstatic int 190132718Skanzfs_replay_link(zfsvfs_t *zfsvfs, lr_link_t *lr, boolean_t byteswap) 191132718Skan{ 192132718Skan char *name = (char *)(lr + 1); /* name follows lr_link_t */ 193132718Skan znode_t *dzp, *zp; 194132718Skan struct componentname cn; 195132718Skan int error; 196132718Skan 197132718Skan if (byteswap) 198132718Skan byteswap_uint64_array(lr, sizeof (*lr)); 199132718Skan 200132718Skan if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0) 201132718Skan return (error); 202132718Skan 203132718Skan if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) { 204132718Skan VN_RELE(ZTOV(dzp)); 205132718Skan return (error); 206132718Skan } 207132718Skan 208132718Skan cn.cn_nameptr = name; 209132718Skan cn.cn_cred = kcred; 210132718Skan cn.cn_thread = curthread; 211132718Skan cn.cn_flags = SAVENAME; 212132718Skan 213132718Skan vn_lock(ZTOV(dzp), LK_EXCLUSIVE | LK_RETRY, curthread); 214132718Skan vn_lock(ZTOV(zp), LK_EXCLUSIVE | LK_RETRY, curthread); 215132718Skan error = VOP_LINK(ZTOV(dzp), ZTOV(zp), &cn); 216132718Skan VOP_UNLOCK(ZTOV(zp), 0, curthread); 217132718Skan VOP_UNLOCK(ZTOV(dzp), 0, curthread); 218132718Skan 219132718Skan VN_RELE(ZTOV(zp)); 220132718Skan VN_RELE(ZTOV(dzp)); 221132718Skan 222132718Skan return (error); 223132718Skan} 224132718Skan 225132718Skanstatic int 226132718Skanzfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap) 227132718Skan{ 228132718Skan char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */ 229132718Skan char *tname = sname + strlen(sname) + 1; 230132718Skan znode_t *sdzp, *tdzp; 231132718Skan struct componentname scn, tcn; 232132718Skan vnode_t *svp, *tvp; 233132718Skan kthread_t *td = curthread; 234132718Skan int error; 235132718Skan 236132718Skan if (byteswap) 237132718Skan byteswap_uint64_array(lr, sizeof (*lr)); 238132718Skan 239132718Skan if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0) 240132718Skan return (error); 241132718Skan 242132718Skan if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) { 243132718Skan VN_RELE(ZTOV(sdzp)); 244132718Skan return (error); 245132718Skan } 246132718Skan 247132718Skan svp = tvp = NULL; 248132718Skan 249132718Skan scn.cn_nameptr = sname; 250132718Skan scn.cn_namelen = strlen(sname); 251132718Skan scn.cn_nameiop = DELETE; 252132718Skan scn.cn_flags = ISLASTCN | SAVENAME; 253132718Skan scn.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 254132718Skan scn.cn_cred = kcred; 255132718Skan scn.cn_thread = td; 256132718Skan vn_lock(ZTOV(sdzp), LK_EXCLUSIVE | LK_RETRY, td); 257132718Skan error = VOP_LOOKUP(ZTOV(sdzp), &svp, &scn); 258132718Skan VOP_UNLOCK(ZTOV(sdzp), 0, td); 259132718Skan if (error != 0) 260132718Skan goto fail; 261132718Skan VOP_UNLOCK(svp, 0, td); 262132718Skan 263132718Skan tcn.cn_nameptr = tname; 264132718Skan tcn.cn_namelen = strlen(tname); 265132718Skan tcn.cn_nameiop = RENAME; 266132718Skan tcn.cn_flags = ISLASTCN | SAVENAME; 267132718Skan tcn.cn_lkflags = LK_EXCLUSIVE | LK_RETRY; 268132718Skan tcn.cn_cred = kcred; 269132718Skan tcn.cn_thread = td; 270132718Skan vn_lock(ZTOV(tdzp), LK_EXCLUSIVE | LK_RETRY, td); 271132718Skan error = VOP_LOOKUP(ZTOV(tdzp), &tvp, &tcn); 272132718Skan if (error == EJUSTRETURN) 273132718Skan tvp = NULL; 274132718Skan else if (error != 0) { 275132718Skan VOP_UNLOCK(ZTOV(tdzp), 0, td); 276132718Skan goto fail; 277132718Skan } 278132718Skan 279132718Skan error = VOP_RENAME(ZTOV(sdzp), svp, &scn, ZTOV(tdzp), tvp, &tcn); 280132718Skan return (error); 281132718Skanfail: 282132718Skan if (svp != NULL) 283132718Skan vrele(svp); 284132718Skan if (tvp != NULL) 285132718Skan vrele(tvp); 286132718Skan VN_RELE(ZTOV(tdzp)); 287132718Skan VN_RELE(ZTOV(sdzp)); 288132718Skan 289132718Skan return (error); 290132718Skan} 291132718Skan 292132718Skanstatic int 293132718Skanzfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap) 294132718Skan{ 295132718Skan char *data = (char *)(lr + 1); /* data follows lr_write_t */ 296132718Skan znode_t *zp; 297132718Skan int error; 298132718Skan ssize_t resid; 299132718Skan 300132718Skan if (byteswap) 301132718Skan byteswap_uint64_array(lr, sizeof (*lr)); 302132718Skan 303132718Skan if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { 304132718Skan /* 305132718Skan * As we can log writes out of order, it's possible the 306132718Skan * file has been removed. In this case just drop the write 307132718Skan * and return success. 308132718Skan */ 309132718Skan if (error == ENOENT) 310132718Skan error = 0; 311132718Skan return (error); 312132718Skan } 313132718Skan 314132718Skan error = vn_rdwr(UIO_WRITE, ZTOV(zp), data, lr->lr_length, 315132718Skan lr->lr_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid); 316132718Skan 317132718Skan VN_RELE(ZTOV(zp)); 318132718Skan 319132718Skan return (error); 320132718Skan} 321132718Skan 322132718Skanstatic int 323132718Skanzfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap) 324132718Skan{ 325132718Skan 326132718Skan ZFS_LOG(0, "Unexpected code path, report to pjd@FreeBSD.org"); 327132718Skan return (EOPNOTSUPP); 328132718Skan} 329132718Skan 330132718Skanstatic int 331132718Skanzfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap) 332132718Skan{ 333132718Skan znode_t *zp; 334132718Skan vattr_t va; 335132718Skan vnode_t *vp; 336132718Skan int error; 337132718Skan 338132718Skan if (byteswap) 339132718Skan byteswap_uint64_array(lr, sizeof (*lr)); 340132718Skan 341132718Skan if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { 342132718Skan /* 343132718Skan * As we can log setattrs out of order, it's possible the 344132718Skan * file has been removed. In this case just drop the setattr 345132718Skan * and return success. 346132718Skan */ 347132718Skan if (error == ENOENT) 348132718Skan error = 0; 349132718Skan return (error); 350132718Skan } 351132718Skan 352132718Skan zfs_init_vattr(&va, lr->lr_mask, lr->lr_mode, 353132718Skan lr->lr_uid, lr->lr_gid, 0, lr->lr_foid); 354132718Skan 355132718Skan va.va_size = lr->lr_size; 356132718Skan ZFS_TIME_DECODE(&va.va_atime, lr->lr_atime); 357132718Skan ZFS_TIME_DECODE(&va.va_mtime, lr->lr_mtime); 358132718Skan 359132718Skan vp = ZTOV(zp); 360132718Skan vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); 361132718Skan error = VOP_SETATTR(vp, &va, kcred, curthread); 362132718Skan VOP_UNLOCK(vp, 0, curthread); 363132718Skan VN_RELE(vp); 364132718Skan 365132718Skan return (error); 366132718Skan} 367132718Skan 368132718Skanstatic int 369132718Skanzfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap) 370132718Skan{ 371132718Skan ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */ 372132718Skan#ifdef TODO 373132718Skan vsecattr_t vsa; 374132718Skan#endif 375132718Skan znode_t *zp; 376132718Skan int error; 377132718Skan 378132718Skan if (byteswap) { 379132718Skan byteswap_uint64_array(lr, sizeof (*lr)); 380132718Skan zfs_ace_byteswap(ace, lr->lr_aclcnt); 381132718Skan } 382132718Skan 383132718Skan if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) { 384132718Skan /* 385132718Skan * As we can log acls out of order, it's possible the 386132718Skan * file has been removed. In this case just drop the acl 387132718Skan * and return success. 388132718Skan */ 389132718Skan if (error == ENOENT) 390132718Skan error = 0; 391132718Skan return (error); 392132718Skan } 393132718Skan 394132718Skan#ifdef TODO 395132718Skan bzero(&vsa, sizeof (vsa)); 396132718Skan vsa.vsa_mask = VSA_ACE | VSA_ACECNT; 397132718Skan vsa.vsa_aclcnt = lr->lr_aclcnt; 398132718Skan vsa.vsa_aclentp = ace; 399132718Skan 400132718Skan error = VOP_SETSECATTR(ZTOV(zp), &vsa, 0, kcred); 401132718Skan#else 402132718Skan error = EOPNOTSUPP; 403132718Skan#endif 404132718Skan 405132718Skan VN_RELE(ZTOV(zp)); 406132718Skan 407132718Skan return (error); 408132718Skan} 409132718Skan 410132718Skan/* 411132718Skan * Callback vectors for replaying records 412132718Skan */ 413132718Skanzil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE] = { 414132718Skan zfs_replay_error, /* 0 no such transaction type */ 415132718Skan zfs_replay_create, /* TX_CREATE */ 416132718Skan zfs_replay_create, /* TX_MKDIR */ 417132718Skan zfs_replay_create, /* TX_MKXATTR */ 418132718Skan zfs_replay_create, /* TX_SYMLINK */ 419132718Skan zfs_replay_remove, /* TX_REMOVE */ 420132718Skan zfs_replay_remove, /* TX_RMDIR */ 421132718Skan zfs_replay_link, /* TX_LINK */ 422132718Skan zfs_replay_rename, /* TX_RENAME */ 423132718Skan zfs_replay_write, /* TX_WRITE */ 424132718Skan zfs_replay_truncate, /* TX_TRUNCATE */ 425132718Skan zfs_replay_setattr, /* TX_SETATTR */ 426132718Skan zfs_replay_acl, /* TX_ACL */ 427132718Skan}; 428132718Skan