zfs_log.c revision 185029
1168404Spjd/* 2168404Spjd * CDDL HEADER START 3168404Spjd * 4168404Spjd * The contents of this file are subject to the terms of the 5168404Spjd * Common Development and Distribution License (the "License"). 6168404Spjd * You may not use this file except in compliance with the License. 7168404Spjd * 8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9168404Spjd * or http://www.opensolaris.org/os/licensing. 10168404Spjd * See the License for the specific language governing permissions 11168404Spjd * and limitations under the License. 12168404Spjd * 13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each 14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15168404Spjd * If applicable, add the following below this CDDL HEADER, with the 16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18168404Spjd * 19168404Spjd * CDDL HEADER END 20168404Spjd */ 21168404Spjd/* 22185029Spjd * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23168404Spjd * Use is subject to license terms. 24168404Spjd */ 25168404Spjd 26168404Spjd#include <sys/types.h> 27168404Spjd#include <sys/param.h> 28168404Spjd#include <sys/systm.h> 29168404Spjd#include <sys/sysmacros.h> 30168404Spjd#include <sys/cmn_err.h> 31168404Spjd#include <sys/kmem.h> 32168404Spjd#include <sys/file.h> 33168404Spjd#include <sys/vfs.h> 34168404Spjd#include <sys/zfs_znode.h> 35168404Spjd#include <sys/zfs_dir.h> 36168404Spjd#include <sys/zil.h> 37185029Spjd#include <sys/zil_impl.h> 38168404Spjd#include <sys/byteorder.h> 39168962Spjd#include <sys/policy.h> 40168404Spjd#include <sys/stat.h> 41168404Spjd#include <sys/acl.h> 42168404Spjd#include <sys/dmu.h> 43168404Spjd#include <sys/spa.h> 44185029Spjd#include <sys/zfs_fuid.h> 45168404Spjd 46168404Spjd/* 47168404Spjd * All the functions in this file are used to construct the log entries 48185029Spjd * to record transactions. They allocate * an intent log transaction 49168404Spjd * structure (itx_t) and save within it all the information necessary to 50168404Spjd * possibly replay the transaction. The itx is then assigned a sequence 51168404Spjd * number and inserted in the in-memory list anchored in the zilog. 52168404Spjd */ 53168404Spjd 54185029Spjdint 55185029Spjdzfs_log_create_txtype(zil_create_t type, vsecattr_t *vsecp, vattr_t *vap) 56185029Spjd{ 57185029Spjd int isxvattr = (vap->va_mask & AT_XVATTR); 58185029Spjd switch (type) { 59185029Spjd case Z_FILE: 60185029Spjd if (vsecp == NULL && !isxvattr) 61185029Spjd return (TX_CREATE); 62185029Spjd if (vsecp && isxvattr) 63185029Spjd return (TX_CREATE_ACL_ATTR); 64185029Spjd if (vsecp) 65185029Spjd return (TX_CREATE_ACL); 66185029Spjd else 67185029Spjd return (TX_CREATE_ATTR); 68185029Spjd /*NOTREACHED*/ 69185029Spjd case Z_DIR: 70185029Spjd if (vsecp == NULL && !isxvattr) 71185029Spjd return (TX_MKDIR); 72185029Spjd if (vsecp && isxvattr) 73185029Spjd return (TX_MKDIR_ACL_ATTR); 74185029Spjd if (vsecp) 75185029Spjd return (TX_MKDIR_ACL); 76185029Spjd else 77185029Spjd return (TX_MKDIR_ATTR); 78185029Spjd case Z_XATTRDIR: 79185029Spjd return (TX_MKXATTR); 80185029Spjd } 81185029Spjd ASSERT(0); 82185029Spjd return (TX_MAX_TYPE); 83185029Spjd} 84185029Spjd 85168404Spjd/* 86185029Spjd * build up the log data necessary for logging xvattr_t 87185029Spjd * First lr_attr_t is initialized. following the lr_attr_t 88185029Spjd * is the mapsize and attribute bitmap copied from the xvattr_t. 89185029Spjd * Following the bitmap and bitmapsize two 64 bit words are reserved 90185029Spjd * for the create time which may be set. Following the create time 91185029Spjd * records a single 64 bit integer which has the bits to set on 92185029Spjd * replay for the xvattr. 93185029Spjd */ 94185029Spjdstatic void 95185029Spjdzfs_log_xvattr(lr_attr_t *lrattr, xvattr_t *xvap) 96185029Spjd{ 97185029Spjd uint32_t *bitmap; 98185029Spjd uint64_t *attrs; 99185029Spjd uint64_t *crtime; 100185029Spjd xoptattr_t *xoap; 101185029Spjd void *scanstamp; 102185029Spjd int i; 103185029Spjd 104185029Spjd xoap = xva_getxoptattr(xvap); 105185029Spjd ASSERT(xoap); 106185029Spjd 107185029Spjd lrattr->lr_attr_masksize = xvap->xva_mapsize; 108185029Spjd bitmap = &lrattr->lr_attr_bitmap; 109185029Spjd for (i = 0; i != xvap->xva_mapsize; i++, bitmap++) { 110185029Spjd *bitmap = xvap->xva_reqattrmap[i]; 111185029Spjd } 112185029Spjd 113185029Spjd /* Now pack the attributes up in a single uint64_t */ 114185029Spjd attrs = (uint64_t *)bitmap; 115185029Spjd crtime = attrs + 1; 116185029Spjd scanstamp = (caddr_t)(crtime + 2); 117185029Spjd *attrs = 0; 118185029Spjd if (XVA_ISSET_REQ(xvap, XAT_READONLY)) 119185029Spjd *attrs |= (xoap->xoa_readonly == 0) ? 0 : 120185029Spjd XAT0_READONLY; 121185029Spjd if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) 122185029Spjd *attrs |= (xoap->xoa_hidden == 0) ? 0 : 123185029Spjd XAT0_HIDDEN; 124185029Spjd if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) 125185029Spjd *attrs |= (xoap->xoa_system == 0) ? 0 : 126185029Spjd XAT0_SYSTEM; 127185029Spjd if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) 128185029Spjd *attrs |= (xoap->xoa_archive == 0) ? 0 : 129185029Spjd XAT0_ARCHIVE; 130185029Spjd if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) 131185029Spjd *attrs |= (xoap->xoa_immutable == 0) ? 0 : 132185029Spjd XAT0_IMMUTABLE; 133185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) 134185029Spjd *attrs |= (xoap->xoa_nounlink == 0) ? 0 : 135185029Spjd XAT0_NOUNLINK; 136185029Spjd if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) 137185029Spjd *attrs |= (xoap->xoa_appendonly == 0) ? 0 : 138185029Spjd XAT0_APPENDONLY; 139185029Spjd if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) 140185029Spjd *attrs |= (xoap->xoa_opaque == 0) ? 0 : 141185029Spjd XAT0_APPENDONLY; 142185029Spjd if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) 143185029Spjd *attrs |= (xoap->xoa_nodump == 0) ? 0 : 144185029Spjd XAT0_NODUMP; 145185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) 146185029Spjd *attrs |= (xoap->xoa_av_quarantined == 0) ? 0 : 147185029Spjd XAT0_AV_QUARANTINED; 148185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) 149185029Spjd *attrs |= (xoap->xoa_av_modified == 0) ? 0 : 150185029Spjd XAT0_AV_MODIFIED; 151185029Spjd if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) 152185029Spjd ZFS_TIME_ENCODE(&xoap->xoa_createtime, crtime); 153185029Spjd if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) 154185029Spjd bcopy(xoap->xoa_av_scanstamp, scanstamp, AV_SCANSTAMP_SZ); 155185029Spjd} 156185029Spjd 157185029Spjdstatic void * 158185029Spjdzfs_log_fuid_ids(zfs_fuid_info_t *fuidp, void *start) 159185029Spjd{ 160185029Spjd zfs_fuid_t *zfuid; 161185029Spjd uint64_t *fuidloc = start; 162185029Spjd 163185029Spjd /* First copy in the ACE FUIDs */ 164185029Spjd for (zfuid = list_head(&fuidp->z_fuids); zfuid; 165185029Spjd zfuid = list_next(&fuidp->z_fuids, zfuid)) { 166185029Spjd *fuidloc++ = zfuid->z_logfuid; 167185029Spjd } 168185029Spjd return (fuidloc); 169185029Spjd} 170185029Spjd 171185029Spjd 172185029Spjdstatic void * 173185029Spjdzfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start) 174185029Spjd{ 175185029Spjd zfs_fuid_domain_t *zdomain; 176185029Spjd 177185029Spjd /* now copy in the domain info, if any */ 178185029Spjd if (fuidp->z_domain_str_sz != 0) { 179185029Spjd for (zdomain = list_head(&fuidp->z_domains); zdomain; 180185029Spjd zdomain = list_next(&fuidp->z_domains, zdomain)) { 181185029Spjd bcopy((void *)zdomain->z_domain, start, 182185029Spjd strlen(zdomain->z_domain) + 1); 183185029Spjd start = (caddr_t)start + 184185029Spjd strlen(zdomain->z_domain) + 1; 185185029Spjd } 186185029Spjd } 187185029Spjd return (start); 188185029Spjd} 189185029Spjd 190185029Spjd/* 191185029Spjd * zfs_log_create() is used to handle TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, 192185029Spjd * TX_MKDIR_ATTR and TX_MKXATTR 193168404Spjd * transactions. 194185029Spjd * 195185029Spjd * TX_CREATE and TX_MKDIR are standard creates, but they may have FUID 196185029Spjd * domain information appended prior to the name. In this case the 197185029Spjd * uid/gid in the log record will be a log centric FUID. 198185029Spjd * 199185029Spjd * TX_CREATE_ACL_ATTR and TX_MKDIR_ACL_ATTR handle special creates that 200185029Spjd * may contain attributes, ACL and optional fuid information. 201185029Spjd * 202185029Spjd * TX_CREATE_ACL and TX_MKDIR_ACL handle special creates that specify 203185029Spjd * and ACL and normal users/groups in the ACEs. 204185029Spjd * 205185029Spjd * There may be an optional xvattr attribute information similar 206185029Spjd * to zfs_log_setattr. 207185029Spjd * 208185029Spjd * Also, after the file name "domain" strings may be appended. 209168404Spjd */ 210168404Spjdvoid 211185029Spjdzfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 212185029Spjd znode_t *dzp, znode_t *zp, char *name, vsecattr_t *vsecp, 213185029Spjd zfs_fuid_info_t *fuidp, vattr_t *vap) 214168404Spjd{ 215168404Spjd itx_t *itx; 216168404Spjd uint64_t seq; 217168404Spjd lr_create_t *lr; 218185029Spjd lr_acl_create_t *lracl; 219185029Spjd size_t aclsize; 220185029Spjd size_t xvatsize = 0; 221185029Spjd size_t txsize; 222185029Spjd xvattr_t *xvap = (xvattr_t *)vap; 223185029Spjd void *end; 224185029Spjd size_t lrsize; 225168404Spjd size_t namesize = strlen(name) + 1; 226185029Spjd size_t fuidsz = 0; 227168404Spjd 228168404Spjd if (zilog == NULL) 229168404Spjd return; 230168404Spjd 231185029Spjd /* 232185029Spjd * If we have FUIDs present then add in space for 233185029Spjd * domains and ACE fuid's if any. 234185029Spjd */ 235185029Spjd if (fuidp) { 236185029Spjd fuidsz += fuidp->z_domain_str_sz; 237185029Spjd fuidsz += fuidp->z_fuid_cnt * sizeof (uint64_t); 238185029Spjd } 239185029Spjd 240185029Spjd if (vap->va_mask & AT_XVATTR) 241185029Spjd xvatsize = ZIL_XVAT_SIZE(xvap->xva_mapsize); 242185029Spjd 243185029Spjd if ((int)txtype == TX_CREATE_ATTR || (int)txtype == TX_MKDIR_ATTR || 244185029Spjd (int)txtype == TX_CREATE || (int)txtype == TX_MKDIR || 245185029Spjd (int)txtype == TX_MKXATTR) { 246185029Spjd txsize = sizeof (*lr) + namesize + fuidsz + xvatsize; 247185029Spjd lrsize = sizeof (*lr); 248185029Spjd } else { 249185029Spjd aclsize = (vsecp) ? vsecp->vsa_aclentsz : 0; 250185029Spjd txsize = 251185029Spjd sizeof (lr_acl_create_t) + namesize + fuidsz + 252185029Spjd ZIL_ACE_LENGTH(aclsize) + xvatsize; 253185029Spjd lrsize = sizeof (lr_acl_create_t); 254185029Spjd } 255185029Spjd 256185029Spjd itx = zil_itx_create(txtype, txsize); 257185029Spjd 258168404Spjd lr = (lr_create_t *)&itx->itx_lr; 259168404Spjd lr->lr_doid = dzp->z_id; 260168404Spjd lr->lr_foid = zp->z_id; 261168404Spjd lr->lr_mode = zp->z_phys->zp_mode; 262185029Spjd if (!IS_EPHEMERAL(zp->z_phys->zp_uid)) { 263185029Spjd lr->lr_uid = (uint64_t)zp->z_phys->zp_uid; 264185029Spjd } else { 265185029Spjd lr->lr_uid = fuidp->z_fuid_owner; 266185029Spjd } 267185029Spjd if (!IS_EPHEMERAL(zp->z_phys->zp_gid)) { 268185029Spjd lr->lr_gid = (uint64_t)zp->z_phys->zp_gid; 269185029Spjd } else { 270185029Spjd lr->lr_gid = fuidp->z_fuid_group; 271185029Spjd } 272168404Spjd lr->lr_gen = zp->z_phys->zp_gen; 273168404Spjd lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; 274168404Spjd lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; 275168404Spjd lr->lr_rdev = zp->z_phys->zp_rdev; 276168404Spjd 277185029Spjd /* 278185029Spjd * Fill in xvattr info if any 279185029Spjd */ 280185029Spjd if (vap->va_mask & AT_XVATTR) { 281185029Spjd zfs_log_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), xvap); 282185029Spjd end = (caddr_t)lr + lrsize + xvatsize; 283185029Spjd } else { 284185029Spjd end = (caddr_t)lr + lrsize; 285185029Spjd } 286185029Spjd 287185029Spjd /* Now fill in any ACL info */ 288185029Spjd 289185029Spjd if (vsecp) { 290185029Spjd lracl = (lr_acl_create_t *)&itx->itx_lr; 291185029Spjd lracl->lr_aclcnt = vsecp->vsa_aclcnt; 292185029Spjd lracl->lr_acl_bytes = aclsize; 293185029Spjd lracl->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0; 294185029Spjd lracl->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0; 295185029Spjd if (vsecp->vsa_aclflags & VSA_ACE_ACLFLAGS) 296185029Spjd lracl->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags; 297185029Spjd else 298185029Spjd lracl->lr_acl_flags = 0; 299185029Spjd 300185029Spjd bcopy(vsecp->vsa_aclentp, end, aclsize); 301185029Spjd end = (caddr_t)end + ZIL_ACE_LENGTH(aclsize); 302185029Spjd } 303185029Spjd 304185029Spjd /* drop in FUID info */ 305185029Spjd if (fuidp) { 306185029Spjd end = zfs_log_fuid_ids(fuidp, end); 307185029Spjd end = zfs_log_fuid_domains(fuidp, end); 308185029Spjd } 309185029Spjd /* 310185029Spjd * Now place file name in log record 311185029Spjd */ 312185029Spjd bcopy(name, end, namesize); 313185029Spjd 314168404Spjd seq = zil_itx_assign(zilog, itx, tx); 315168404Spjd dzp->z_last_itx = seq; 316168404Spjd zp->z_last_itx = seq; 317168404Spjd} 318168404Spjd 319168404Spjd/* 320168404Spjd * zfs_log_remove() handles both TX_REMOVE and TX_RMDIR transactions. 321168404Spjd */ 322168404Spjdvoid 323185029Spjdzfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 324168404Spjd znode_t *dzp, char *name) 325168404Spjd{ 326168404Spjd itx_t *itx; 327168404Spjd uint64_t seq; 328168404Spjd lr_remove_t *lr; 329168404Spjd size_t namesize = strlen(name) + 1; 330168404Spjd 331168404Spjd if (zilog == NULL) 332168404Spjd return; 333168404Spjd 334168404Spjd itx = zil_itx_create(txtype, sizeof (*lr) + namesize); 335168404Spjd lr = (lr_remove_t *)&itx->itx_lr; 336168404Spjd lr->lr_doid = dzp->z_id; 337168404Spjd bcopy(name, (char *)(lr + 1), namesize); 338168404Spjd 339168404Spjd seq = zil_itx_assign(zilog, itx, tx); 340168404Spjd dzp->z_last_itx = seq; 341168404Spjd} 342168404Spjd 343168404Spjd/* 344168404Spjd * zfs_log_link() handles TX_LINK transactions. 345168404Spjd */ 346168404Spjdvoid 347185029Spjdzfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 348168404Spjd znode_t *dzp, znode_t *zp, char *name) 349168404Spjd{ 350168404Spjd itx_t *itx; 351168404Spjd uint64_t seq; 352168404Spjd lr_link_t *lr; 353168404Spjd size_t namesize = strlen(name) + 1; 354168404Spjd 355168404Spjd if (zilog == NULL) 356168404Spjd return; 357168404Spjd 358168404Spjd itx = zil_itx_create(txtype, sizeof (*lr) + namesize); 359168404Spjd lr = (lr_link_t *)&itx->itx_lr; 360168404Spjd lr->lr_doid = dzp->z_id; 361168404Spjd lr->lr_link_obj = zp->z_id; 362168404Spjd bcopy(name, (char *)(lr + 1), namesize); 363168404Spjd 364168404Spjd seq = zil_itx_assign(zilog, itx, tx); 365168404Spjd dzp->z_last_itx = seq; 366168404Spjd zp->z_last_itx = seq; 367168404Spjd} 368168404Spjd 369168404Spjd/* 370168404Spjd * zfs_log_symlink() handles TX_SYMLINK transactions. 371168404Spjd */ 372168404Spjdvoid 373185029Spjdzfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 374185029Spjd znode_t *dzp, znode_t *zp, char *name, char *link) 375168404Spjd{ 376168404Spjd itx_t *itx; 377168404Spjd uint64_t seq; 378168404Spjd lr_create_t *lr; 379168404Spjd size_t namesize = strlen(name) + 1; 380168404Spjd size_t linksize = strlen(link) + 1; 381168404Spjd 382168404Spjd if (zilog == NULL) 383168404Spjd return; 384168404Spjd 385168404Spjd itx = zil_itx_create(txtype, sizeof (*lr) + namesize + linksize); 386168404Spjd lr = (lr_create_t *)&itx->itx_lr; 387168404Spjd lr->lr_doid = dzp->z_id; 388168404Spjd lr->lr_foid = zp->z_id; 389168404Spjd lr->lr_mode = zp->z_phys->zp_mode; 390168404Spjd lr->lr_uid = zp->z_phys->zp_uid; 391168404Spjd lr->lr_gid = zp->z_phys->zp_gid; 392168404Spjd lr->lr_gen = zp->z_phys->zp_gen; 393168404Spjd lr->lr_crtime[0] = zp->z_phys->zp_crtime[0]; 394168404Spjd lr->lr_crtime[1] = zp->z_phys->zp_crtime[1]; 395168404Spjd bcopy(name, (char *)(lr + 1), namesize); 396168404Spjd bcopy(link, (char *)(lr + 1) + namesize, linksize); 397168404Spjd 398168404Spjd seq = zil_itx_assign(zilog, itx, tx); 399168404Spjd dzp->z_last_itx = seq; 400168404Spjd zp->z_last_itx = seq; 401168404Spjd} 402168404Spjd 403168404Spjd/* 404168404Spjd * zfs_log_rename() handles TX_RENAME transactions. 405168404Spjd */ 406168404Spjdvoid 407185029Spjdzfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, 408168404Spjd znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp) 409168404Spjd{ 410168404Spjd itx_t *itx; 411168404Spjd uint64_t seq; 412168404Spjd lr_rename_t *lr; 413168404Spjd size_t snamesize = strlen(sname) + 1; 414168404Spjd size_t dnamesize = strlen(dname) + 1; 415168404Spjd 416168404Spjd if (zilog == NULL) 417168404Spjd return; 418168404Spjd 419168404Spjd itx = zil_itx_create(txtype, sizeof (*lr) + snamesize + dnamesize); 420168404Spjd lr = (lr_rename_t *)&itx->itx_lr; 421168404Spjd lr->lr_sdoid = sdzp->z_id; 422168404Spjd lr->lr_tdoid = tdzp->z_id; 423168404Spjd bcopy(sname, (char *)(lr + 1), snamesize); 424168404Spjd bcopy(dname, (char *)(lr + 1) + snamesize, dnamesize); 425168404Spjd 426168404Spjd seq = zil_itx_assign(zilog, itx, tx); 427168404Spjd sdzp->z_last_itx = seq; 428168404Spjd tdzp->z_last_itx = seq; 429168404Spjd szp->z_last_itx = seq; 430168404Spjd} 431168404Spjd 432168404Spjd/* 433168404Spjd * zfs_log_write() handles TX_WRITE transactions. 434168404Spjd */ 435168404Spjdssize_t zfs_immediate_write_sz = 32768; 436168404Spjd 437185029Spjd#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_trailer_t) - \ 438185029Spjd sizeof (lr_write_t)) 439185029Spjd 440168404Spjdvoid 441168404Spjdzfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, 442185029Spjd znode_t *zp, offset_t off, ssize_t resid, int ioflag) 443168404Spjd{ 444168404Spjd itx_wr_state_t write_state; 445185029Spjd boolean_t slogging; 446185029Spjd uintptr_t fsync_cnt; 447168404Spjd 448168404Spjd if (zilog == NULL || zp->z_unlinked) 449168404Spjd return; 450168404Spjd 451168404Spjd /* 452168404Spjd * Writes are handled in three different ways: 453168404Spjd * 454168404Spjd * WR_INDIRECT: 455185029Spjd * In this mode, if we need to commit the write later, then the block 456185029Spjd * is immediately written into the file system (using dmu_sync), 457185029Spjd * and a pointer to the block is put into the log record. 458185029Spjd * When the txg commits the block is linked in. 459185029Spjd * This saves additionally writing the data into the log record. 460185029Spjd * There are a few requirements for this to occur: 461185029Spjd * - write is greater than zfs_immediate_write_sz 462185029Spjd * - not using slogs (as slogs are assumed to always be faster 463185029Spjd * than writing into the main pool) 464185029Spjd * - the write occupies only one block 465168404Spjd * WR_COPIED: 466168404Spjd * If we know we'll immediately be committing the 467185029Spjd * transaction (FSYNC or FDSYNC), the we allocate a larger 468168404Spjd * log record here for the data and copy the data in. 469168404Spjd * WR_NEED_COPY: 470168404Spjd * Otherwise we don't allocate a buffer, and *if* we need to 471168404Spjd * flush the write later then a buffer is allocated and 472168404Spjd * we retrieve the data using the dmu. 473168404Spjd */ 474185029Spjd slogging = spa_has_slogs(zilog->zl_spa); 475185029Spjd if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz) 476168404Spjd write_state = WR_INDIRECT; 477185029Spjd else if (ioflag & (FSYNC | FDSYNC)) 478168404Spjd write_state = WR_COPIED; 479168404Spjd else 480168404Spjd write_state = WR_NEED_COPY; 481168404Spjd 482185029Spjd if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) { 483185029Spjd (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1)); 484185029Spjd } 485185029Spjd 486185029Spjd while (resid) { 487185029Spjd itx_t *itx; 488185029Spjd lr_write_t *lr; 489185029Spjd ssize_t len; 490185029Spjd 491185029Spjd /* 492185029Spjd * If the write would overflow the largest block then split it. 493185029Spjd */ 494185029Spjd if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA) 495185029Spjd len = SPA_MAXBLOCKSIZE >> 1; 496185029Spjd else 497185029Spjd len = resid; 498185029Spjd 499185029Spjd itx = zil_itx_create(txtype, sizeof (*lr) + 500185029Spjd (write_state == WR_COPIED ? len : 0)); 501185029Spjd lr = (lr_write_t *)&itx->itx_lr; 502185029Spjd if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os, 503185029Spjd zp->z_id, off, len, lr + 1) != 0) { 504168404Spjd kmem_free(itx, offsetof(itx_t, itx_lr) + 505168404Spjd itx->itx_lr.lrc_reclen); 506168404Spjd itx = zil_itx_create(txtype, sizeof (*lr)); 507168404Spjd lr = (lr_write_t *)&itx->itx_lr; 508168404Spjd write_state = WR_NEED_COPY; 509168404Spjd } 510168404Spjd 511185029Spjd itx->itx_wr_state = write_state; 512185029Spjd if (write_state == WR_NEED_COPY) 513185029Spjd itx->itx_sod += len; 514185029Spjd lr->lr_foid = zp->z_id; 515185029Spjd lr->lr_offset = off; 516185029Spjd lr->lr_length = len; 517185029Spjd lr->lr_blkoff = 0; 518185029Spjd BP_ZERO(&lr->lr_blkptr); 519168404Spjd 520185029Spjd itx->itx_private = zp->z_zfsvfs; 521168404Spjd 522185029Spjd if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0) || 523185029Spjd (ioflag & (FSYNC | FDSYNC))) 524185029Spjd itx->itx_sync = B_TRUE; 525185029Spjd else 526185029Spjd itx->itx_sync = B_FALSE; 527185029Spjd 528185029Spjd zp->z_last_itx = zil_itx_assign(zilog, itx, tx); 529185029Spjd 530185029Spjd off += len; 531185029Spjd resid -= len; 532185029Spjd } 533168404Spjd} 534168404Spjd 535168404Spjd/* 536168404Spjd * zfs_log_truncate() handles TX_TRUNCATE transactions. 537168404Spjd */ 538168404Spjdvoid 539168404Spjdzfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, 540168404Spjd znode_t *zp, uint64_t off, uint64_t len) 541168404Spjd{ 542168404Spjd itx_t *itx; 543168404Spjd uint64_t seq; 544168404Spjd lr_truncate_t *lr; 545168404Spjd 546168404Spjd if (zilog == NULL || zp->z_unlinked) 547168404Spjd return; 548168404Spjd 549168404Spjd itx = zil_itx_create(txtype, sizeof (*lr)); 550168404Spjd lr = (lr_truncate_t *)&itx->itx_lr; 551168404Spjd lr->lr_foid = zp->z_id; 552168404Spjd lr->lr_offset = off; 553168404Spjd lr->lr_length = len; 554168404Spjd 555168404Spjd itx->itx_sync = (zp->z_sync_cnt != 0); 556168404Spjd seq = zil_itx_assign(zilog, itx, tx); 557168404Spjd zp->z_last_itx = seq; 558168404Spjd} 559168404Spjd 560168404Spjd/* 561168404Spjd * zfs_log_setattr() handles TX_SETATTR transactions. 562168404Spjd */ 563168404Spjdvoid 564168404Spjdzfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, 565185029Spjd znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp) 566168404Spjd{ 567185029Spjd itx_t *itx; 568185029Spjd uint64_t seq; 569185029Spjd lr_setattr_t *lr; 570185029Spjd xvattr_t *xvap = (xvattr_t *)vap; 571185029Spjd size_t recsize = sizeof (lr_setattr_t); 572185029Spjd void *start; 573168404Spjd 574185029Spjd 575168404Spjd if (zilog == NULL || zp->z_unlinked) 576168404Spjd return; 577168404Spjd 578185029Spjd /* 579185029Spjd * If XVATTR set, then log record size needs to allow 580185029Spjd * for lr_attr_t + xvattr mask, mapsize and create time 581185029Spjd * plus actual attribute values 582185029Spjd */ 583185029Spjd if (vap->va_mask & AT_XVATTR) 584185029Spjd recsize = sizeof (*lr) + ZIL_XVAT_SIZE(xvap->xva_mapsize); 585185029Spjd 586185029Spjd if (fuidp) 587185029Spjd recsize += fuidp->z_domain_str_sz; 588185029Spjd 589185029Spjd itx = zil_itx_create(txtype, recsize); 590168404Spjd lr = (lr_setattr_t *)&itx->itx_lr; 591168404Spjd lr->lr_foid = zp->z_id; 592168404Spjd lr->lr_mask = (uint64_t)mask_applied; 593168404Spjd lr->lr_mode = (uint64_t)vap->va_mode; 594185029Spjd if ((mask_applied & AT_UID) && IS_EPHEMERAL(vap->va_uid)) 595185029Spjd lr->lr_uid = fuidp->z_fuid_owner; 596185029Spjd else 597185029Spjd lr->lr_uid = (uint64_t)vap->va_uid; 598185029Spjd 599185029Spjd if ((mask_applied & AT_GID) && IS_EPHEMERAL(vap->va_gid)) 600185029Spjd lr->lr_gid = fuidp->z_fuid_group; 601185029Spjd else 602185029Spjd lr->lr_gid = (uint64_t)vap->va_gid; 603185029Spjd 604168404Spjd lr->lr_size = (uint64_t)vap->va_size; 605168404Spjd ZFS_TIME_ENCODE(&vap->va_atime, lr->lr_atime); 606168404Spjd ZFS_TIME_ENCODE(&vap->va_mtime, lr->lr_mtime); 607185029Spjd start = (lr_setattr_t *)(lr + 1); 608185029Spjd if (vap->va_mask & AT_XVATTR) { 609185029Spjd zfs_log_xvattr((lr_attr_t *)start, xvap); 610185029Spjd start = (caddr_t)start + ZIL_XVAT_SIZE(xvap->xva_mapsize); 611185029Spjd } 612168404Spjd 613185029Spjd /* 614185029Spjd * Now stick on domain information if any on end 615185029Spjd */ 616185029Spjd 617185029Spjd if (fuidp) 618185029Spjd (void) zfs_log_fuid_domains(fuidp, start); 619185029Spjd 620168404Spjd itx->itx_sync = (zp->z_sync_cnt != 0); 621168404Spjd seq = zil_itx_assign(zilog, itx, tx); 622168404Spjd zp->z_last_itx = seq; 623168404Spjd} 624168404Spjd 625168404Spjd/* 626168404Spjd * zfs_log_acl() handles TX_ACL transactions. 627168404Spjd */ 628168404Spjdvoid 629185029Spjdzfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, 630185029Spjd vsecattr_t *vsecp, zfs_fuid_info_t *fuidp) 631168404Spjd{ 632168404Spjd itx_t *itx; 633168404Spjd uint64_t seq; 634185029Spjd lr_acl_v0_t *lrv0; 635168404Spjd lr_acl_t *lr; 636185029Spjd int txtype; 637185029Spjd int lrsize; 638185029Spjd size_t txsize; 639185029Spjd size_t aclbytes = vsecp->vsa_aclentsz; 640168404Spjd 641168404Spjd if (zilog == NULL || zp->z_unlinked) 642168404Spjd return; 643168404Spjd 644185029Spjd txtype = (zp->z_zfsvfs->z_version < ZPL_VERSION_FUID) ? 645185029Spjd TX_ACL_V0 : TX_ACL; 646185029Spjd 647185029Spjd if (txtype == TX_ACL) 648185029Spjd lrsize = sizeof (*lr); 649185029Spjd else 650185029Spjd lrsize = sizeof (*lrv0); 651185029Spjd 652185029Spjd txsize = lrsize + 653185029Spjd ((txtype == TX_ACL) ? ZIL_ACE_LENGTH(aclbytes) : aclbytes) + 654185029Spjd (fuidp ? fuidp->z_domain_str_sz : 0) + 655185029Spjd sizeof (uint64_t) * (fuidp ? fuidp->z_fuid_cnt : 0); 656185029Spjd 657185029Spjd itx = zil_itx_create(txtype, txsize); 658185029Spjd 659168404Spjd lr = (lr_acl_t *)&itx->itx_lr; 660168404Spjd lr->lr_foid = zp->z_id; 661185029Spjd if (txtype == TX_ACL) { 662185029Spjd lr->lr_acl_bytes = aclbytes; 663185029Spjd lr->lr_domcnt = fuidp ? fuidp->z_domain_cnt : 0; 664185029Spjd lr->lr_fuidcnt = fuidp ? fuidp->z_fuid_cnt : 0; 665185029Spjd if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) 666185029Spjd lr->lr_acl_flags = (uint64_t)vsecp->vsa_aclflags; 667185029Spjd else 668185029Spjd lr->lr_acl_flags = 0; 669185029Spjd } 670185029Spjd lr->lr_aclcnt = (uint64_t)vsecp->vsa_aclcnt; 671168404Spjd 672185029Spjd if (txtype == TX_ACL_V0) { 673185029Spjd lrv0 = (lr_acl_v0_t *)lr; 674185029Spjd bcopy(vsecp->vsa_aclentp, (ace_t *)(lrv0 + 1), aclbytes); 675185029Spjd } else { 676185029Spjd void *start = (ace_t *)(lr + 1); 677185029Spjd 678185029Spjd bcopy(vsecp->vsa_aclentp, start, aclbytes); 679185029Spjd 680185029Spjd start = (caddr_t)start + ZIL_ACE_LENGTH(aclbytes); 681185029Spjd 682185029Spjd if (fuidp) { 683185029Spjd start = zfs_log_fuid_ids(fuidp, start); 684185029Spjd (void) zfs_log_fuid_domains(fuidp, start); 685185029Spjd } 686185029Spjd } 687185029Spjd 688168404Spjd itx->itx_sync = (zp->z_sync_cnt != 0); 689168404Spjd seq = zil_itx_assign(zilog, itx, tx); 690168404Spjd zp->z_last_itx = seq; 691168404Spjd} 692