zfs_log.c revision 6514:852c82a1989c
1132718Skan/* 2132718Skan * CDDL HEADER START 3132718Skan * 4132718Skan * The contents of this file are subject to the terms of the 5132718Skan * Common Development and Distribution License (the "License"). 6132718Skan * You may not use this file except in compliance with the License. 7132718Skan * 8132718Skan * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9132718Skan * or http://www.opensolaris.org/os/licensing. 10132718Skan * See the License for the specific language governing permissions 11132718Skan * and limitations under the License. 12132718Skan * 13132718Skan * When distributing Covered Code, include this CDDL HEADER in each 14132718Skan * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15132718Skan * If applicable, add the following below this CDDL HEADER, with the 16132718Skan * fields enclosed by brackets "[]" replaced with your own identifying 17132718Skan * information: Portions Copyright [yyyy] [name of copyright owner] 18132718Skan * 19132718Skan * CDDL HEADER END 20132718Skan */ 21132718Skan/* 22132718Skan * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23132718Skan * Use is subject to license terms. 24132718Skan */ 25132718Skan 26132718Skan#pragma ident "%Z%%M% %I% %E% SMI" 27132718Skan 28132718Skan#include <sys/types.h> 29132718Skan#include <sys/param.h> 30132718Skan#include <sys/systm.h> 31132718Skan#include <sys/sysmacros.h> 32132718Skan#include <sys/cmn_err.h> 33132718Skan#include <sys/kmem.h> 34132718Skan#include <sys/thread.h> 35132718Skan#include <sys/file.h> 36132718Skan#include <sys/vfs.h> 37132718Skan#include <sys/zfs_znode.h> 38132718Skan#include <sys/zfs_dir.h> 39132718Skan#include <sys/zil.h> 40132718Skan#include <sys/zil_impl.h> 41132718Skan#include <sys/byteorder.h> 42132718Skan#include <sys/policy.h> 43132718Skan#include <sys/stat.h> 44132718Skan#include <sys/mode.h> 45132718Skan#include <sys/acl.h> 46132718Skan#include <sys/dmu.h> 47132718Skan#include <sys/spa.h> 48132718Skan#include <sys/zfs_fuid.h> 49132718Skan#include <sys/ddi.h> 50132718Skan 51132718Skan/* 52132718Skan * All the functions in this file are used to construct the log entries 53132718Skan * to record transactions. They allocate * an intent log transaction 54132718Skan * structure (itx_t) and save within it all the information necessary to 55132718Skan * possibly replay the transaction. The itx is then assigned a sequence 56132718Skan * number and inserted in the in-memory list anchored in the zilog. 57132718Skan */ 58132718Skan 59132718Skanint 60132718Skanzfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap) 61132718Skan{ 62132718Skan int isxvattr = (vap->va_mask & AT_XVATTR); 63132718Skan switch (type) { 64132718Skan case Z_FILE: 65132718Skan if (vsecp == NULL && !isxvattr) 66132718Skan return (TX_CREATE); 67132718Skan if (vsecp && isxvattr) 68132718Skan return (TX_CREATE_ACL_ATTR); 69132718Skan if (vsecp) 70132718Skan return (TX_CREATE_ACL); 71132718Skan else 72132718Skan return (TX_CREATE_ATTR); 73132718Skan /*NOTREACHED*/ 74132718Skan case Z_DIR: 75132718Skan if (vsecp == NULL && !isxvattr) 76132718Skan return (TX_MKDIR); 77132718Skan if (vsecp && isxvattr) 78132718Skan return (TX_MKDIR_ACL_ATTR); 79132718Skan if (vsecp) 80132718Skan return (TX_MKDIR_ACL); 81132718Skan else 82132718Skan return (TX_MKDIR_ATTR); 83132718Skan case Z_XATTRDIR: 84132718Skan return (TX_MKXATTR); 85132718Skan } 86132718Skan ASSERT(0); 87132718Skan return (TX_MAX_TYPE); 88132718Skan} 89132718Skan 90132718Skan/* 91132718Skan * build up the log data necessary for logging xvattr_t 92132718Skan * First lr_attr_t is initialized. following the lr_attr_t 93132718Skan * is the mapsize and attribute bitmap copied from the xvattr_t. 94132718Skan * Following the bitmap and bitmapsize two 64 bit words are reserved 95132718Skan * for the create time which may be set. Following the create time 96132718Skan * records a single 64 bit integer which has the bits to set on 97132718Skan * replay for the xvattr. 98132718Skan */ 99132718Skanstatic void 100132718Skanzfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) 101132718Skan{ 102132718Skan uint32_t *bitmap; 103132718Skan uint64_t *attrs; 104132718Skan uint64_t *crtime; 105132718Skan xoptattr_t *xoap; 106132718Skan void *scanstamp; 107132718Skan int i; 108132718Skan 109132718Skan xoap = xva_getxoptattr(xvap); 110132718Skan ASSERT(xoap); 111132718Skan 112132718Skan lrattr->lr_attr_masksize = xvap->xva_mapsize; 113132718Skan bitmap = &lrattr->lr_attr_bitmap; 114132718Skan for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) { 115132718Skan *bitmap = xvap->xva_reqattrmap[i]; 116132718Skan } 117132718Skan 118132718Skan /* Now pack the attributes up in a single uint64_t */ 119132718Skan attrs = (uint64_t *)bitmap; 120132718Skan crtime = attrs + 1; 121132718Skan scanstamp = (caddr_t)(crtime + 2); 122132718Skan *attrs = 0; 123132718Skan if (XVA_ISSET_REQ(xvap, XAT_READONLY)) 124132718Skan *attrs |= (xoap->xoa_readonly == 0) ? 0 : 125132718Skan XAT0_READONLY; 126132718Skan if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) 127132718Skan *attrs |= (xoap->xoa_hidden == 0) ? 0 : 128132718Skan XAT0_HIDDEN; 129132718Skan if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) 130132718Skan *attrs |= (xoap->xoa_system == 0) ? 0 : 131132718Skan XAT0_SYSTEM; 132132718Skan if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) 133132718Skan *attrs |= (xoap->xoa_archive == 0) ? 0 : 134132718Skan XAT0_ARCHIVE; 135132718Skan if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) 136132718Skan *attrs |= (xoap->xoa_immutable == 0) ? 0 : 137132718Skan XAT0_IMMUTABLE; 138132718Skan if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) 139132718Skan *attrs |= (xoap->xoa_nounlink == 0) ? 0 : 140132718Skan XAT0_NOUNLINK; 141132718Skan if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) 142132718Skan *attrs |= (xoap->xoa_appendonly == 0) ? 0 : 143132718Skan XAT0_APPENDONLY; 144132718Skan if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) 145132718Skan *attrs |= (xoap->xoa_opaque == 0) ? 0 : 146132718Skan XAT0_APPENDONLY; 147132718Skan if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) 148132718Skan *attrs |= (xoap->xoa_nodump == 0) ? 0 : 149132718Skan XAT0_NODUMP; 150132718Skan if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) 151132718Skan *attrs |= (xoap->xoa_av_quarantined == 0) ? 0 : 152132718Skan XAT0_AV_QUARANTINED; 153132718Skan if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) 154132718Skan *attrs |= (xoap->xoa_av_modified == 0) ? 0 : 155132718Skan XAT0_AV_MODIFIED; 156132718Skan if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 157132718Skan ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime); 158132718Skan if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 159132718Skan bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ); 160132718Skan} 161132718Skan 162132718Skanstatic void * 163132718Skanzfs_log_fuid_ids(zfs_fuid_info_t *fuidp, void *start) 164132718Skan{ 165132718Skan zfs_fuid_t *zfuid; 166132718Skan uint64_t *fuidloc = start; 167132718Skan 168132718Skan /* First copy in the ACE FUIDs */ 169132718Skan for (zfuid = list_head(&fuidp->z_fuids); zfuid; 170132718Skan zfuid = list_next(&fuidp->z_fuids, zfuid)) { 171132718Skan *fuidloc++ = zfuid->z_logfuid; 172132718Skan } 173132718Skan return (fuidloc); 174132718Skan} 175132718Skan 176132718Skan 177132718Skanstatic void * 178132718Skanzfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start) 179132718Skan{ 180132718Skan zfs_fuid_domain_t *zdomain; 181132718Skan 182132718Skan /* now copy in the domain info, if any */ 183132718Skan if (fuidp->z_domain_str_sz != 0) { 184132718Skan for (zdomain = list_head(&fuidp->z_domains); zdomain; 185132718Skan zdomain = list_next(&fuidp->z_domains, zdomain)) { 186132718Skan bcopy((void *)zdomain->z_domain, start, 187132718Skan strlen(zdomain->z_domain) + 1); 188132718Skan start = (caddr_t)start + 189132718Skan strlen(zdomain->z_domain) + 1; 190132718Skan } 191132718Skan } 192132718Skan return (start); 193132718Skan} 194132718Skan 195132718Skan/* 196132718Skan * zfs_log_create() is used to handle TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, 197132718Skan * TX_MKDIR_ATTR and TX_MKXATTR 198132718Skan * transactions. 199132718Skan * 200132718Skan * TX_CREATE and TX_MKDIR are standard creates, but they may have FUID 201132718Skan * domain information appended prior to the name. In this case the 202132718Skan * uid/gid in the log record will be a log centric FUID. 203132718Skan * 204132718Skan * TX_CREATE_ACL_ATTR and TX_MKDIR_ACL_ATTR handle special creates that 205132718Skan * may contain attributes, ACL and optional fuid information. 206132718Skan * 207132718Skan * TX_CREATE_ACL and TX_MKDIR_ACL handle special creates that specify 208132718Skan * and ACL and normal users/groups in the ACEs. 209132718Skan * 210132718Skan * There may be an optional xvattr attribute information similar 211132718Skan * to zfs_log_setattr. 212132718Skan * 213132718Skan * Also, after the file name "domain" strings may be appended. 214132718Skan */ 215132718Skanvoid 216132718Skanzfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 217132718Skan znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp, 218132718Skan zfs_fuid_info_t *fuidp, vattr_t *vap) 219132718Skan{ 220132718Skan itx_t *itx; 221132718Skan uint64_t seq; 222132718Skan lr_create_t *lr; 223132718Skan lr_acl_create_t *lracl; 224132718Skan size_t aclsize; 225132718Skan size_t xvatsize = 0; 226132718Skan size_t txsize; 227132718Skan xvattr_t *xvap = (xvattr_t *)vap; 228132718Skan void *end; 229132718Skan size_t lrsize; 230132718Skan size_t namesize = strlen(name) + 1; 231132718Skan size_t fuidsz = 0; 232132718Skan 233132718Skan if (zilog == NULL) 234132718Skan return; 235132718Skan 236132718Skan /* 237132718Skan * If we have FUIDs present then add in space for 238132718Skan * domains and ACE fuid's if any. 239132718Skan */ 240132718Skan if (fuidp) { 241132718Skan fuidsz += fuidp->z_domain_str_sz; 242132718Skan fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t); 243132718Skan } 244132718Skan 245132718Skan if (vap->va_mask & AT_XVATTR) 246132718Skan xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize); 247132718Skan 248132718Skan if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR || 249132718Skan (int)txtype == TX_CREATE || (int)txtype == TX_MKDIR || 250132718Skan (int)txtype == TX_MKXATTR) { 251132718Skan txsize = sizeof (*lr) + namesize + fuidsz + xvatsize; 252132718Skan lrsize = sizeof (*lr); 253132718Skan } else { 254132718Skan aclsize = (vsecp) ? vsecp->vsa_aclentsz : 0; 255132718Skan txsize = 256132718Skan sizeof (lr_acl_create_t) + namesize + fuidsz + 257132718Skan ZIL_ACE_LENGTH(aclsize) + xvatsize; 258132718Skan lrsize = sizeof (lr_acl_create_t); 259132718Skan } 260132718Skan 261132718Skan itx = zil_itx_create(txtype, txsize); 262132718Skan 263132718Skan lr = (lr_create_t *)&itx->itx_lr; 264132718Skan lr->lr_doid = dzp->z_id; 265132718Skan lr->lr_foid = zp->z_id; 266132718Skan lr->lr_mode = zp->z_phys->zp_mode; 267132718Skan if (!IS_EPHEMERAL(zp->z_phys->zp_uid)) { 268132718Skan lr->lr_uid = (uint64_t)zp->z_phys->zp_uid; 269132718Skan } else { 270132718Skan lr->lr_uid = fuidp->z_fuid_owner; 271132718Skan } 272132718Skan if (!IS_EPHEMERAL(zp->z_phys->zp_gid)) { 273132718Skan lr->lr_gid = (uint64_t)zp->z_phys->zp_gid; 274132718Skan } else { 275132718Skan lr->lr_gid = fuidp->z_fuid_group; 276132718Skan } 277132718Skan lr->lr_gen = zp->z_phys->zp_gen; 278132718Skan lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; 279132718Skan lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; 280132718Skan lr->lr_rdev = zp->z_phys->zp_rdev; 281132718Skan 282132718Skan /* 283132718Skan * Fill in xvattr info if any 284132718Skan */ 285132718Skan if (vap->va_mask & AT_XVATTR) { 286132718Skan zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap); 287132718Skan end = (caddr_t)lr + lrsize + xvatsize; 288132718Skan } else { 289132718Skan end = (caddr_t)lr + lrsize; 290132718Skan } 291132718Skan 292132718Skan /* Now fill in any ACL info */ 293132718Skan 294132718Skan if (vsecp) { 295132718Skan lracl = (lr_acl_create_t *)&itx->itx_lr; 296132718Skan lracl->lr_aclcnt = vsecp->vsa_aclcnt; 297132718Skan lracl->lr_acl_bytes = aclsize; 298132718Skan lracl->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0; 299132718Skan lracl->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0; 300132718Skan if (vsecp->vsa_aclflags & VSA_ACE_ACLFLAGS) 301132718Skan lracl->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags; 302132718Skan else 303132718Skan lracl->lr_acl_flags = 0; 304132718Skan 305132718Skan bcopy(vsecp->vsa_aclentp, end, aclsize); 306132718Skan end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize); 307132718Skan } 308132718Skan 309132718Skan /* drop in FUID info */ 310132718Skan if (fuidp) { 311132718Skan end = zfs_log_fuid_ids(fuidp, end); 312132718Skan end = zfs_log_fuid_domains(fuidp, end); 313132718Skan } 314132718Skan /* 315132718Skan * Now place file name in log record 316132718Skan */ 317132718Skan bcopy(name, end, namesize); 318132718Skan 319132718Skan seq = zil_itx_assign(zilog, itx, tx); 320132718Skan dzp->z_last_itx = seq; 321132718Skan zp->z_last_itx = seq; 322132718Skan} 323132718Skan 324132718Skan/* 325132718Skan * zfs_log_remove() handles both TX_REMOVE and TX_RMDIR transactions. 326132718Skan */ 327132718Skanvoid 328132718Skanzfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 329132718Skan znode_t *dzp, char *name) 330132718Skan{ 331132718Skan itx_t *itx; 332132718Skan uint64_t seq; 333132718Skan lr_remove_t *lr; 334132718Skan size_t namesize = strlen(name) + 1; 335132718Skan 336132718Skan if (zilog == NULL) 337132718Skan return; 338132718Skan 339132718Skan itx = zil_itx_create(txtype, sizeof (*lr) + namesize); 340132718Skan lr = (lr_remove_t *)&itx->itx_lr; 341132718Skan lr->lr_doid = dzp->z_id; 342132718Skan bcopy(name, (char *)(lr + 1), namesize); 343132718Skan 344132718Skan seq = zil_itx_assign(zilog, itx, tx); 345132718Skan dzp->z_last_itx = seq; 346132718Skan} 347132718Skan 348132718Skan/* 349132718Skan * zfs_log_link() handles TX_LINK transactions. 350132718Skan */ 351132718Skanvoid 352132718Skanzfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 353132718Skan znode_t *dzp, znode_t *zp, char *name) 354132718Skan{ 355132718Skan itx_t *itx; 356132718Skan uint64_t seq; 357132718Skan lr_link_t *lr; 358132718Skan size_t namesize = strlen(name) + 1; 359132718Skan 360132718Skan if (zilog == NULL) 361132718Skan return; 362132718Skan 363132718Skan itx = zil_itx_create(txtype, sizeof (*lr) + namesize); 364132718Skan lr = (lr_link_t *)&itx->itx_lr; 365132718Skan lr->lr_doid = dzp->z_id; 366132718Skan lr->lr_link_obj = zp->z_id; 367132718Skan bcopy(name, (char *)(lr + 1), namesize); 368132718Skan 369132718Skan seq = zil_itx_assign(zilog, itx, tx); 370132718Skan dzp->z_last_itx = seq; 371132718Skan zp->z_last_itx = seq; 372132718Skan} 373132718Skan 374132718Skan/* 375132718Skan * zfs_log_symlink() handles TX_SYMLINK transactions. 376132718Skan */ 377132718Skanvoid 378132718Skanzfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 379132718Skan znode_t *dzp, znode_t *zp, char *name, char *link) 380132718Skan{ 381132718Skan itx_t *itx; 382132718Skan uint64_t seq; 383132718Skan lr_create_t *lr; 384132718Skan size_t namesize = strlen(name) + 1; 385132718Skan size_t linksize = strlen(link) + 1; 386132718Skan 387132718Skan if (zilog == NULL) 388132718Skan return; 389132718Skan 390132718Skan itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize); 391132718Skan lr = (lr_create_t *)&itx->itx_lr; 392132718Skan lr->lr_doid = dzp->z_id; 393132718Skan lr->lr_foid = zp->z_id; 394132718Skan lr->lr_mode = zp->z_phys->zp_mode; 395132718Skan lr->lr_uid = zp->z_phys->zp_uid; 396132718Skan lr->lr_gid = zp->z_phys->zp_gid; 397132718Skan lr->lr_gen = zp->z_phys->zp_gen; 398132718Skan lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; 399132718Skan lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; 400132718Skan bcopy(name, (char *)(lr + 1), namesize); 401132718Skan bcopy(link, (char *)(lr + 1) + namesize, linksize); 402132718Skan 403132718Skan seq = zil_itx_assign(zilog, itx, tx); 404132718Skan dzp->z_last_itx = seq; 405132718Skan zp->z_last_itx = seq; 406132718Skan} 407132718Skan 408132718Skan/* 409132718Skan * zfs_log_rename() handles TX_RENAME transactions. 410132718Skan */ 411132718Skanvoid 412132718Skanzfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 413132718Skan znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp) 414132718Skan{ 415132718Skan itx_t *itx; 416132718Skan uint64_t seq; 417132718Skan lr_rename_t *lr; 418132718Skan size_t snamesize = strlen(sname) + 1; 419132718Skan size_t dnamesize = strlen(dname) + 1; 420132718Skan 421132718Skan if (zilog == NULL) 422132718Skan return; 423132718Skan 424132718Skan itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); 425132718Skan lr = (lr_rename_t *)&itx->itx_lr; 426132718Skan lr->lr_sdoid = sdzp->z_id; 427132718Skan lr->lr_tdoid = tdzp->z_id; 428132718Skan bcopy(sname, (char *)(lr + 1), snamesize); 429132718Skan bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize); 430132718Skan 431132718Skan seq = zil_itx_assign(zilog, itx, tx); 432132718Skan sdzp->z_last_itx = seq; 433132718Skan tdzp->z_last_itx = seq; 434132718Skan szp->z_last_itx = seq; 435132718Skan} 436132718Skan 437132718Skan/* 438132718Skan * zfs_log_write() handles TX_WRITE transactions. 439132718Skan */ 440132718Skanssize_t zfs_immediate_write_sz = 32768; 441132718Skan 442132718Skan#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \ 443132718Skan sizeof (lr_write_t)) 444132718Skan 445132718Skanvoid 446132718Skanzfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, 447132718Skan znode_t *zp, offset_t off, ssize_t resid, int ioflag) 448132718Skan{ 449132718Skan itx_wr_state_t write_state; 450132718Skan boolean_t slogging; 451132718Skan uintptr_t fsync_cnt; 452132718Skan 453132718Skan if (zilog == NULL || zp->z_unlinked) 454132718Skan return; 455132718Skan 456132718Skan /* 457132718Skan * Writes are handled in three different ways: 458132718Skan * 459132718Skan * WR_INDIRECT: 460132718Skan * If the write is greater than zfs_immediate_write_sz and there are 461132718Skan * no separate logs in this pool then later *if* we need to log the 462132718Skan * write then dmu_sync() is used to immediately write the block and 463132718Skan * its block pointer is put in the log record. 464132718Skan * WR_COPIED: 465132718Skan * If we know we'll immediately be committing the 466132718Skan * transaction (FSYNC or FDSYNC), the we allocate a larger 467132718Skan * log record here for the data and copy the data in. 468132718Skan * WR_NEED_COPY: 469132718Skan * Otherwise we don't allocate a buffer, and *if* we need to 470132718Skan * flush the write later then a buffer is allocated and 471132718Skan * we retrieve the data using the dmu. 472132718Skan */ 473132718Skan slogging = spa_has_slogs(zilog->zl_spa); 474132718Skan if (resid > zfs_immediate_write_sz && !slogging) 475132718Skan write_state = WR_INDIRECT; 476132718Skan else if (ioflag & (FSYNC | FDSYNC)) 477132718Skan write_state = WR_COPIED; 478132718Skan else 479132718Skan write_state = WR_NEED_COPY; 480132718Skan 481132718Skan if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) { 482132718Skan (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1)); 483132718Skan } 484132718Skan 485132718Skan while (resid) { 486132718Skan itx_t *itx; 487132718Skan lr_write_t *lr; 488132718Skan ssize_t len; 489132718Skan 490132718Skan /* 491132718Skan * If there are slogs and the write would overflow the largest 492132718Skan * block, then because we don't want to use the main pool 493132718Skan * to dmu_sync, we have to split the write. 494132718Skan */ 495132718Skan if (slogging && resid > ZIL_MAX_LOG_DATA) 496132718Skan len = SPA_MAXBLOCKSIZE >> 1; 497132718Skan else 498132718Skan len = resid; 499132718Skan 500132718Skan itx = zil_itx_create(txtype, sizeof (*lr) + 501132718Skan (write_state == WR_COPIED ? len : 0)); 502132718Skan lr = (lr_write_t *)&itx->itx_lr; 503132718Skan if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os, 504132718Skan zp->z_id, off, len, lr + 1) != 0) { 505132718Skan kmem_free(itx, offsetof(itx_t, itx_lr) + 506132718Skan itx->itx_lr.lrc_reclen); 507132718Skan itx = zil_itx_create(txtype, sizeof (*lr)); 508132718Skan lr = (lr_write_t *)&itx->itx_lr; 509132718Skan write_state = WR_NEED_COPY; 510132718Skan } 511132718Skan 512132718Skan itx->itx_wr_state = write_state; 513132718Skan if (write_state == WR_NEED_COPY) 514132718Skan itx->itx_sod += len; 515132718Skan lr->lr_foid = zp->z_id; 516132718Skan lr->lr_offset = off; 517132718Skan lr->lr_length = len; 518132718Skan lr->lr_blkoff = 0; 519132718Skan BP_ZERO(&lr->lr_blkptr); 520132718Skan 521132718Skan itx->itx_private = zp->z_zfsvfs; 522132718Skan 523132718Skan if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0) || 524132718Skan (ioflag & (FSYNC | FDSYNC))) 525132718Skan itx->itx_sync = B_TRUE; 526132718Skan else 527132718Skan itx->itx_sync = B_FALSE; 528132718Skan 529132718Skan zp->z_last_itx = zil_itx_assign(zilog, itx, tx); 530132718Skan 531132718Skan off += len; 532132718Skan resid -= len; 533132718Skan } 534132718Skan} 535132718Skan 536132718Skan/* 537132718Skan * zfs_log_truncate() handles TX_TRUNCATE transactions. 538132718Skan */ 539132718Skanvoid 540132718Skanzfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, 541132718Skan znode_t *zp, uint64_t off, uint64_t len) 542132718Skan{ 543132718Skan itx_t *itx; 544132718Skan uint64_t seq; 545132718Skan lr_truncate_t *lr; 546132718Skan 547132718Skan if (zilog == NULL || zp->z_unlinked) 548132718Skan return; 549132718Skan 550132718Skan itx = zil_itx_create(txtype, sizeof (*lr)); 551132718Skan lr = (lr_truncate_t *)&itx->itx_lr; 552132718Skan lr->lr_foid = zp->z_id; 553132718Skan lr->lr_offset = off; 554132718Skan lr->lr_length = len; 555132718Skan 556132718Skan itx->itx_sync = (zp->z_sync_cnt != 0); 557132718Skan seq = zil_itx_assign(zilog, itx, tx); 558132718Skan zp->z_last_itx = seq; 559132718Skan} 560132718Skan 561132718Skan/* 562132718Skan * zfs_log_setattr() handles TX_SETATTR transactions. 563132718Skan */ 564132718Skanvoid 565132718Skanzfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, 566132718Skan znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp) 567132718Skan{ 568132718Skan itx_t *itx; 569132718Skan uint64_t seq; 570132718Skan lr_setattr_t *lr; 571132718Skan xvattr_t *xvap = (xvattr_t *)vap; 572132718Skan size_t recsize = sizeof (lr_setattr_t); 573132718Skan void *start; 574132718Skan 575132718Skan 576132718Skan if (zilog == NULL || zp->z_unlinked) 577132718Skan return; 578132718Skan 579132718Skan /* 580132718Skan * If XVATTR set, then log record size needs to allow 581132718Skan * for lr_attr_t + xvattr mask, mapsize and create time 582132718Skan * plus actual attribute values 583132718Skan */ 584132718Skan if (vap->va_mask & AT_XVATTR) 585132718Skan recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize); 586132718Skan 587132718Skan if (fuidp) 588132718Skan recsize += fuidp->z_domain_str_sz; 589132718Skan 590132718Skan itx = zil_itx_create(txtype, recsize); 591132718Skan lr = (lr_setattr_t *)&itx->itx_lr; 592132718Skan lr->lr_foid = zp->z_id; 593132718Skan lr->lr_mask = (uint64_t)mask_applied; 594132718Skan lr->lr_mode = (uint64_t)vap->va_mode; 595132718Skan if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid)) 596132718Skan lr->lr_uid = fuidp->z_fuid_owner; 597132718Skan else 598132718Skan lr->lr_uid = (uint64_t)vap->va_uid; 599132718Skan 600132718Skan if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid)) 601132718Skan lr->lr_gid = fuidp->z_fuid_group; 602132718Skan else 603132718Skan lr->lr_gid = (uint64_t)vap->va_gid; 604132718Skan 605132718Skan lr->lr_size = (uint64_t)vap->va_size; 606132718Skan ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime); 607132718Skan ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime); 608132718Skan start = (lr_setattr_t *)(lr + 1); 609132718Skan if (vap->va_mask & AT_XVATTR) { 610132718Skan zfs_log_xvattr((lr_attr_t *)start, xvap); 611132718Skan start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize); 612132718Skan } 613132718Skan 614132718Skan /* 615132718Skan * Now stick on domain information if any on end 616132718Skan */ 617132718Skan 618132718Skan if (fuidp) 619132718Skan (void) zfs_log_fuid_domains(fuidp, start); 620132718Skan 621132718Skan itx->itx_sync = (zp->z_sync_cnt != 0); 622132718Skan seq = zil_itx_assign(zilog, itx, tx); 623132718Skan zp->z_last_itx = seq; 624132718Skan} 625132718Skan 626132718Skan/* 627132718Skan * zfs_log_acl() handles TX_ACL transactions. 628132718Skan */ 629132718Skanvoid 630132718Skanzfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, 631132718Skan vsecattr_t *vsecp, zfs_fuid_info_t *fuidp) 632132718Skan{ 633132718Skan itx_t *itx; 634132718Skan uint64_t seq; 635132718Skan lr_acl_v0_t *lrv0; 636132718Skan lr_acl_t *lr; 637132718Skan int txtype; 638132718Skan int lrsize; 639132718Skan size_t txsize; 640132718Skan size_t aclbytes = vsecp->vsa_aclentsz; 641132718Skan 642132718Skan if (zilog == NULL || zp->z_unlinked) 643132718Skan return; 644132718Skan 645132718Skan txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ? 646132718Skan TX_ACL_V0 : TX_ACL; 647132718Skan 648132718Skan if (txtype == TX_ACL) 649132718Skan lrsize = sizeof (*lr); 650132718Skan else 651132718Skan lrsize = sizeof (*lrv0); 652132718Skan 653132718Skan txsize = lrsize + 654132718Skan ((txtype == TX_ACL) ? ZIL_ACE_LENGTH(aclbytes) : aclbytes) + 655132718Skan (fuidp ? fuidp->z_domain_str_sz : 0) + 656132718Skan sizeof (uint64_t) * (fuidp ? fuidp->z_fuid_cnt : 0); 657132718Skan 658132718Skan itx = zil_itx_create(txtype, txsize); 659132718Skan 660132718Skan lr = (lr_acl_t *)&itx->itx_lr; 661132718Skan lr->lr_foid = zp->z_id; 662132718Skan if (txtype == TX_ACL) { 663132718Skan lr->lr_acl_bytes = aclbytes; 664132718Skan lr->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0; 665132718Skan lr->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0; 666132718Skan if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) 667132718Skan lr->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags; 668132718Skan else 669132718Skan lr->lr_acl_flags = 0; 670132718Skan } 671132718Skan lr->lr_aclcnt = (uint64_t)vsecp->vsa_aclcnt; 672132718Skan 673132718Skan if (txtype == TX_ACL_V0) { 674132718Skan lrv0 = (lr_acl_v0_t *)lr; 675132718Skan bcopy(vsecp->vsa_aclentp, (ace_t *)(lrv0 + 1), aclbytes); 676132718Skan } else { 677132718Skan void *start = (ace_t *)(lr + 1); 678132718Skan 679132718Skan bcopy(vsecp->vsa_aclentp, start, aclbytes); 680132718Skan 681132718Skan start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes); 682132718Skan 683132718Skan if (fuidp) { 684132718Skan start = zfs_log_fuid_ids(fuidp, start); 685132718Skan (void) zfs_log_fuid_domains(fuidp, start); 686132718Skan } 687132718Skan } 688132718Skan 689132718Skan itx->itx_sync = (zp->z_sync_cnt != 0); 690132718Skan seq = zil_itx_assign(zilog, itx, tx); 691132718Skan zp->z_last_itx = seq; 692132718Skan} 693132718Skan