1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/param.h>
29#include <sys/types.h>
30#include <sys/systm.h>
31#include <sys/cred.h>
32#include <sys/proc.h>
33#include <sys/user.h>
34#include <sys/time.h>
35#include <sys/vnode.h>
36#include <sys/vfs.h>
37#include <sys/file.h>
38#include <sys/filio.h>
39#include <sys/uio.h>
40#include <sys/buf.h>
41#include <sys/mman.h>
42#include <sys/tiuser.h>
43#include <sys/pathname.h>
44#include <sys/dirent.h>
45#include <sys/conf.h>
46#include <sys/debug.h>
47#include <sys/vmsystm.h>
48#include <sys/fcntl.h>
49#include <sys/flock.h>
50#include <sys/swap.h>
51#include <sys/errno.h>
52#include <sys/sysmacros.h>
53#include <sys/disp.h>
54#include <sys/kmem.h>
55#include <sys/cmn_err.h>
56#include <sys/vtrace.h>
57#include <sys/mount.h>
58#include <sys/bootconf.h>
59#include <sys/dnlc.h>
60#include <sys/stat.h>
61
62#include <vm/hat.h>
63#include <vm/as.h>
64#include <vm/page.h>
65#include <vm/pvn.h>
66#include <vm/seg.h>
67#include <vm/seg_map.h>
68#include <vm/seg_vn.h>
69#include <vm/rm.h>
70#include <sys/fs/cachefs_fs.h>
71#include <sys/fs/cachefs_dlog.h>
72#include <fs/fs_subr.h>
73
74static int cachefs_dlog_mapreserve(fscache_t *fscp, int size);
75
76#ifdef _LP64
77
78static void cachefs_dlog_attrchk(vattr_t *vap, char *funcname);
79
80#define	CACHEFS_DLOG_TS_COPY(in_tsp, out_tsp, str, str1)		\
81	{								\
82		int	ovferr = 0;					\
83		CACHEFS_TS_TO_CFS_TS_COPY(in_tsp, out_tsp, ovferr);	\
84		if (ovferr)						\
85			cmn_err(CE_WARN, "%s%s overflow", str, str1);	\
86	}
87
88#define	CACHEFS_DLOG_DEV_COPY(in_dev, out_dev, str, str1)		\
89	{								\
90		int	ovferr = 0;					\
91		CACHEFS_DEV_TO_CFS_DEV_COPY(in_dev, out_dev, ovferr);	\
92		if (ovferr)						\
93			cmn_err(CE_WARN, "%s%s 0x%lx -> 0x%x overflow",	\
94				str, str1, in_dev, (dev32_t)(out_dev));	\
95	}
96
97#define	CACHEFS_DLOG_VATTR_COPY(in_vap, out_vap, str)			\
98	{								\
99		int	ovferr = 0;					\
100		CACHEFS_VATTR_TO_CFS_VATTR_COPY(in_vap, out_vap, ovferr); \
101		if (ovferr)						\
102			cachefs_dlog_attrchk(in_vap, str);		\
103	}
104
105/*
106 * check attr error - if we get an overflow error copying vattr, make sure
107 * the field affected is actually wanted, or it might be junk
108 */
109static void
110cachefs_dlog_attrchk(vattr_t *vap, char *str)
111{
112	dev_t		tmpdev;
113	cfs_timestruc_t	ts;
114
115	if (vap->va_mask & AT_FSID) {
116		CACHEFS_DLOG_DEV_COPY(vap->va_fsid, tmpdev, str, ".va_fsid");
117	}
118	if (vap->va_mask & AT_RDEV) {
119		CACHEFS_DLOG_DEV_COPY(vap->va_rdev, tmpdev, str, ".va_rdev");
120	}
121	if (vap->va_mask & AT_MTIME) {
122		CACHEFS_DLOG_TS_COPY(&vap->va_mtime, &ts, str, ".va_mtime");
123	}
124	if (vap->va_mask & AT_ATIME) {
125		CACHEFS_DLOG_TS_COPY(&vap->va_atime, &ts, str, ".va_atime");
126	}
127	if (vap->va_mask & AT_CTIME) {
128		CACHEFS_DLOG_TS_COPY(&vap->va_ctime, &ts, str, ".va_ctime");
129	}
130}
131
132#else /* not _LP64 */
133
134#define	CACHEFS_DLOG_TS_COPY(in_tsp, out_tsp, str, str1)		\
135	CACHEFS_TS_TO_CFS_TS_COPY(in_tsp, out_tsp, error)
136
137#define	CACHEFS_DLOG_DEV_COPY(in_dev, out_dev, str, str1)		\
138	CACHEFS_DEV_TO_CFS_DEV_COPY(in_dev, out_dev, error)
139
140#define	CACHEFS_DLOG_VATTR_COPY(in_vap, out_vap, str)			\
141	CACHEFS_VATTR_TO_CFS_VATTR_COPY(in_vap, out_vap, error)
142
143#endif /* _LP64 */
144
145/*
146 *
147 * Cachefs used to know too much about how creds looked; since it's
148 * committed to persistent storage, we can't change the layout so
149 * it now has a "dl_cred_t" which (unsurprisingly) looks exactly like
150 * an old credential.
151 *
152 * The dst argument needs to point to:
153 *		struct dl_cred_t;
154 *		<buffer space>			buffer for groups
155 *
156 * The source is a proper kernel cred_t.
157 *
158 */
159static size_t
160copy_cred(cred_t *src, dl_cred_t *dst)
161{
162	int n;
163	const gid_t *sgrp = crgetgroups(src);
164
165	n = MIN(NGROUPS_MAX_DEFAULT, crgetngroups(src));
166
167	/* copy the fixed fields */
168	dst->cr_uid = crgetuid(src);
169	dst->cr_ruid = crgetruid(src);
170	dst->cr_suid = crgetsuid(src);
171	dst->cr_gid = crgetgid(src);
172	dst->cr_rgid = crgetrgid(src);
173	dst->cr_sgid = crgetsgid(src);
174	dst->cr_groups[0] = sgrp[0];
175
176	dst->cr_ngroups = n;
177	bcopy(sgrp, (void *)(dst + 1), (n - 1) * sizeof (gid_t));
178	return (sizeof (dl_cred_t) + (n - 1) * sizeof (gid_t));
179}
180
181/*
182 * Sets up for writing to the log files.
183 */
184int
185cachefs_dlog_setup(fscache_t *fscp, int createfile)
186{
187	struct vattr vattr;
188	int error = 0;
189	int createdone = 0;
190	int lookupdone = 0;
191	int version = CFS_DLOG_VERSION;
192	off_t offset;
193	struct cfs_dlog_trailer trailer;
194
195	mutex_enter(&fscp->fs_dlock);
196
197	/* all done if the log files already exist */
198	if (fscp->fs_dlogfile) {
199		ASSERT(fscp->fs_dmapfile);
200		goto out;
201	}
202
203	/* see if the log file exists */
204	error = VOP_LOOKUP(fscp->fs_fscdirvp, CACHEFS_DLOG_FILE,
205	    &fscp->fs_dlogfile, NULL, 0, NULL, kcred, NULL, NULL, NULL);
206	if (error && (createfile == 0))
207		goto out;
208
209	/* if the lookup failed then create file log files */
210	if (error) {
211		createdone++;
212
213		vattr.va_mode = S_IFREG | 0666;
214		vattr.va_uid = 0;
215		vattr.va_gid = 0;
216		vattr.va_type = VREG;
217		vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
218		error = VOP_CREATE(fscp->fs_fscdirvp, CACHEFS_DLOG_FILE,
219		    &vattr, 0, 0666, &fscp->fs_dlogfile, kcred, 0, NULL, NULL);
220		if (error) {
221#ifdef CFSDEBUG
222			CFS_DEBUG(CFSDEBUG_DLOG)
223				printf("cachefs: log file create fail %d\n",
224				    error);
225#endif
226			goto out;
227		}
228
229		/* write the version number into the log file */
230		error = vn_rdwr(UIO_WRITE, fscp->fs_dlogfile, (caddr_t)&version,
231		    sizeof (version), (offset_t)0, UIO_SYSSPACE, FSYNC,
232		    RLIM_INFINITY, kcred, NULL);
233		if (error) {
234#ifdef CFSDEBUG
235			CFS_DEBUG(CFSDEBUG_DLOG)
236				printf("cachefs: log file init fail %d\n",
237				    error);
238#endif
239			goto out;
240		}
241
242		vattr.va_mode = S_IFREG | 0666;
243		vattr.va_uid = 0;
244		vattr.va_gid = 0;
245		vattr.va_type = VREG;
246		vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
247		error = VOP_CREATE(fscp->fs_fscdirvp, CACHEFS_DMAP_FILE,
248		    &vattr, 0, 0666, &fscp->fs_dmapfile, kcred, 0, NULL, NULL);
249		if (error) {
250#ifdef CFSDEBUG
251			CFS_DEBUG(CFSDEBUG_DLOG)
252				printf("cachefs: map file create fail %d\n",
253				    error);
254#endif
255			goto out;
256		}
257
258		fscp->fs_dlogoff = sizeof (version);
259		fscp->fs_dlogseq = 0;
260		fscp->fs_dmapoff = 0;
261		fscp->fs_dmapsize = 0;
262	}
263
264	/*
265	 * Else the lookup succeeded.
266	 * Before mounting, fsck should have fixed any problems
267	 * in the log file.
268	 */
269	else {
270		lookupdone++;
271
272		/* find the end of the log file */
273		vattr.va_mask = AT_ALL;
274		error = VOP_GETATTR(fscp->fs_dlogfile, &vattr, 0, kcred, NULL);
275		if (error) {
276#ifdef CFSDEBUG
277			CFS_DEBUG(CFSDEBUG_DLOG)
278				printf("cachefs: log file getattr fail %d\n",
279				    error);
280#endif
281			goto out;
282		}
283		/*LINTED alignment okay*/
284		ASSERT(vattr.va_size <= MAXOFF_T);
285		fscp->fs_dlogoff = (off_t)vattr.va_size;
286
287		offset = vattr.va_size - sizeof (struct cfs_dlog_trailer);
288		/*
289		 * The last record in the dlog file is a trailer record
290		 * that contains the last sequence number used. This is
291		 * used to reset the sequence number when a logfile already
292		 * exists.
293		 */
294		error = vn_rdwr(UIO_READ, fscp->fs_dlogfile, (caddr_t)&trailer,
295		    sizeof (struct cfs_dlog_trailer), (offset_t)offset,
296		    UIO_SYSSPACE, FSYNC, RLIM_INFINITY, kcred, NULL);
297		if (error == 0) {
298			if (trailer.dl_op == CFS_DLOG_TRAILER) {
299				fscp->fs_dlogseq = trailer.dl_seq;
300				/*
301				 * Set the offset of the next record to be
302				 * written, to over write the current
303				 * trailer.
304				 */
305				fscp->fs_dlogoff = offset;
306			} else {
307#ifdef CFSDEBUG
308				CFS_DEBUG(CFSDEBUG_DLOG) {
309					cmn_err(CE_WARN,
310					    "cachefs: can't find dlog trailer");
311					cmn_err(CE_WARN,
312					    "cachefs: fsck required");
313				}
314#endif /* CFSDEBUG */
315				/*LINTED alignment okay*/
316				fscp->fs_dlogseq = (uint_t)vattr.va_size;
317			}
318		} else {
319#ifdef CFSDEBUG
320			CFS_DEBUG(CFSDEBUG_DLOG)
321				cmn_err(CE_WARN,
322				    "cachefs: error reading dlog trailer");
323#endif /* CFSDEBUG */
324			/*LINTED alignment okay*/
325			fscp->fs_dlogseq = (uint_t)vattr.va_size;
326		}
327
328
329		error = VOP_LOOKUP(fscp->fs_fscdirvp, CACHEFS_DMAP_FILE,
330		    &fscp->fs_dmapfile, NULL, 0, NULL, kcred, NULL, NULL, NULL);
331		if (error) {
332#ifdef CFSDEBUG
333			CFS_DEBUG(CFSDEBUG_DLOG)
334				printf("cachefs: map file lookup fail %d\n",
335				    error);
336#endif
337			goto out;
338		}
339
340		vattr.va_mask = AT_ALL;
341		error = VOP_GETATTR(fscp->fs_dmapfile, &vattr, 0, kcred, NULL);
342		if (error) {
343#ifdef CFSDEBUG
344			CFS_DEBUG(CFSDEBUG_DLOG)
345				printf("cachefs: map file getattr fail %d\n",
346				    error);
347#endif
348			goto out;
349		}
350		fscp->fs_dmapoff = (off_t)vattr.va_size;
351		fscp->fs_dmapsize = (off_t)vattr.va_size;
352	}
353
354out:
355	if (error) {
356		if (createdone) {
357			if (fscp->fs_dlogfile) {
358				VN_RELE(fscp->fs_dlogfile);
359				fscp->fs_dlogfile = NULL;
360				(void) VOP_REMOVE(fscp->fs_fscdirvp,
361				    CACHEFS_DLOG_FILE, kcred, NULL, 0);
362			}
363			if (fscp->fs_dmapfile) {
364				VN_RELE(fscp->fs_dmapfile);
365				fscp->fs_dmapfile = NULL;
366				(void) VOP_REMOVE(fscp->fs_fscdirvp,
367				    CACHEFS_DMAP_FILE, kcred, NULL, 0);
368			}
369		}
370		if (lookupdone) {
371			if (fscp->fs_dlogfile) {
372				VN_RELE(fscp->fs_dlogfile);
373				fscp->fs_dlogfile = NULL;
374			}
375			if (fscp->fs_dmapfile) {
376				VN_RELE(fscp->fs_dmapfile);
377				fscp->fs_dmapfile = NULL;
378			}
379		}
380	}
381
382	mutex_exit(&fscp->fs_dlock);
383	return (error);
384}
385
386/*
387 * Drops reference to the log file.
388 */
389void
390cachefs_dlog_teardown(fscache_t *fscp)
391{
392	vattr_t va;
393	/*LINTED: set but not used */
394	int error;
395
396	mutex_enter(&fscp->fs_dlock);
397
398	/* clean up the log file */
399	if (fscp->fs_dlogfile) {
400		VN_RELE(fscp->fs_dlogfile);
401		fscp->fs_dlogfile = NULL;
402	}
403
404	/* clean up the map file */
405	if (fscp->fs_dmapfile) {
406		/* set the map file to the actual size needed */
407		va.va_mask = AT_SIZE;
408		va.va_size = fscp->fs_dmapoff;
409		error = VOP_SETATTR(fscp->fs_dmapfile, &va, 0, kcred, NULL);
410#ifdef CFSDEBUG
411		if (error) {
412			cmn_err(CE_WARN, "cachefs: map setattr failed %d",
413			    error);
414		}
415#endif
416		VN_RELE(fscp->fs_dmapfile);
417		fscp->fs_dmapfile = NULL;
418	}
419	mutex_exit(&fscp->fs_dlock);
420}
421
422/*
423 * Outputs a dlog message to the log file.
424 */
425static off_t
426cachefs_dlog_output(fscache_t *fscp, cfs_dlog_entry_t *entp, uint_t *seqp)
427{
428	int error;
429	off_t offset;
430	int xx;
431	uint_t seq;
432	int len;
433	struct cfs_dlog_trailer *trail;
434
435	ASSERT(entp->dl_len <= CFS_DLOG_ENTRY_MAXSIZE);
436
437	if (fscp->fs_dlogfile == NULL) {
438		error = cachefs_dlog_setup(fscp, 1);
439		if (error) {
440			offset = 0;
441			goto out;
442		}
443	}
444
445	/* round up length to a 4 byte boundary */
446	len = entp->dl_len;
447	xx = len & 0x03;
448	if (xx) {
449		xx = 4 - xx;
450		bzero((void *)((uintptr_t)entp + len), (size_t)xx);
451		len += xx;
452		entp->dl_len = len;
453	}
454
455	/* XXX turn this on/off in sync with code in cachefs_dlog_setsecattr */
456#if 0
457	/* XXX debugging hack, round up to 16 byte boundary */
458	len = entp->dl_len;
459	xx = 16 - (len & 0x0f);
460	bcopy("UUUUUUUUUUUUUUUU", (void *)((uintptr_t)entp + len), (size_t)xx);
461	len += xx;
462	entp->dl_len = len;
463#endif
464
465	/*
466	 * All functions which allocate a dlog entry buffer must be sure
467	 * to allocate space for the trailer record. The trailer record,
468	 * is always located at the end of the log file. It contains the
469	 * highest sequence number used. This allows cachefs_dlog_setup()
470	 * to reset the sequence numbers properly when the log file
471	 * already exists.
472	 */
473	trail = (struct cfs_dlog_trailer *)((uintptr_t)entp + entp->dl_len);
474	trail->dl_len = sizeof (struct cfs_dlog_trailer);
475	trail->dl_op = CFS_DLOG_TRAILER;
476	trail->dl_valid = CFS_DLOG_VAL_COMMITTED;
477	mutex_enter(&fscp->fs_dlock);
478	ASSERT(fscp->fs_dlogfile);
479
480	/* get a sequence number for this log entry */
481	seq = fscp->fs_dlogseq + 1;
482	if (seq == 0) {
483		mutex_exit(&fscp->fs_dlock);
484		offset = 0;
485#ifdef CFSDEBUG
486		cmn_err(CE_WARN, "cachefs: logging failed, seq overflow");
487#endif
488		goto out;
489	}
490	fscp->fs_dlogseq++;
491	trail->dl_seq = fscp->fs_dlogseq;
492
493	/* add the sequence number to the record */
494	entp->dl_seq = seq;
495
496	/* get offset into file to write record */
497	offset = fscp->fs_dlogoff;
498
499	/* try to write the record to the log file */
500	/*
501	 * NOTE This write will over write the previous trailer record and
502	 * will add a new trailer record. This is done with a single
503	 * write for performance reasons.
504	 */
505	error = vn_rdwr(UIO_WRITE, fscp->fs_dlogfile, (caddr_t)entp,
506	    entp->dl_len+trail->dl_len, (offset_t)offset, UIO_SYSSPACE, FSYNC,
507	    RLIM_INFINITY, kcred, NULL);
508
509	if (error) {
510		offset = 0;
511		cmn_err(CE_WARN, "cachefs: logging failed (%d)", error);
512	} else {
513		fscp->fs_dlogoff += entp->dl_len;
514
515		/* get offset of valid field */
516		offset += offsetof(struct cfs_dlog_entry, dl_valid);
517	}
518
519	mutex_exit(&fscp->fs_dlock);
520
521	/* return sequence number used if requested */
522	if (seqp)
523		*seqp = seq;
524
525out:
526	return (offset);
527}
528
529/*
530 * Commits a previously written dlog message.
531 */
532int
533cachefs_dlog_commit(fscache_t *fscp, off_t offset, int error)
534{
535	cfs_dlog_val_t valid;
536
537	if (error)
538		valid = CFS_DLOG_VAL_ERROR;
539	else
540		valid = CFS_DLOG_VAL_COMMITTED;
541
542	error = vn_rdwr(UIO_WRITE, fscp->fs_dlogfile,
543	    (caddr_t)&valid, sizeof (valid), (offset_t)offset,
544	    UIO_SYSSPACE, FSYNC, RLIM_INFINITY, kcred, NULL);
545
546	if (error)
547		cmn_err(CE_WARN, "cachefs: logging commit failed (%d)", error);
548	return (error);
549}
550
551/*
552 * Reserves space in the map file.
553 */
554static int
555cachefs_dlog_mapreserve(fscache_t *fscp, int size)
556{
557	int error = 0;
558	int len;
559	char *bufp;
560
561	if (fscp->fs_dmapfile == NULL) {
562		error = cachefs_dlog_setup(fscp, 1);
563		if (error) {
564			return (error);
565		}
566	}
567
568	mutex_enter(&fscp->fs_dlock);
569	ASSERT(fscp->fs_dmapoff <= fscp->fs_dmapsize);
570	ASSERT(fscp->fs_dmapfile);
571
572	if ((fscp->fs_dmapoff + size) > fscp->fs_dmapsize) {
573		/* reserve 20% for optimal hashing */
574		size += MAXBSIZE / 5;
575
576		/* grow file by a MAXBSIZE chunk */
577		len = MAXBSIZE;
578		ASSERT((fscp->fs_dmapoff + size) < (fscp->fs_dmapsize + len));
579
580		bufp = cachefs_kmem_zalloc(len, KM_SLEEP);
581		error = vn_rdwr(UIO_WRITE, fscp->fs_dmapfile, (caddr_t)bufp,
582			len, (offset_t)fscp->fs_dmapsize, UIO_SYSSPACE, FSYNC,
583			RLIM_INFINITY, kcred, NULL);
584		if (error == 0) {
585			fscp->fs_dmapoff += size;
586			fscp->fs_dmapsize += len;
587		} else {
588			cmn_err(CE_WARN, "cachefs: logging secondary "
589			    "failed (%d)", error);
590		}
591		cachefs_kmem_free(bufp, len);
592	} else {
593		fscp->fs_dmapoff += size;
594	}
595	mutex_exit(&fscp->fs_dlock);
596	return (error);
597}
598
599/*
600 * Reserves space for one cid mapping in the mapping file.
601 */
602int
603cachefs_dlog_cidmap(fscache_t *fscp)
604{
605	int error;
606	error = cachefs_dlog_mapreserve(fscp,
607	    sizeof (struct cfs_dlog_mapping_space));
608	return (error);
609}
610
611off_t
612cachefs_dlog_setattr(fscache_t *fscp, struct vattr *vap, int flags,
613    cnode_t *cp, cred_t *cr)
614{
615	struct cfs_dlog_entry *entp;
616	struct cfs_dlog_setattr *up;
617	size_t	len;
618	off_t offset;
619
620	ASSERT(MUTEX_HELD(&cp->c_statelock));
621
622	entp = cachefs_kmem_alloc(sizeof (cfs_dlog_entry_t), KM_SLEEP);
623
624	entp->dl_valid = CFS_DLOG_VAL_CRASH;
625	entp->dl_op = CFS_DLOG_SETATTR;
626	up = &entp->dl_u.dl_setattr;
627	CACHEFS_DLOG_VATTR_COPY(vap, &up->dl_attrs,
628		"cachefs_dlog_setattr: dl_attr");
629	up->dl_flags = flags;
630	up->dl_cid = cp->c_id;
631	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_mtime,
632		&up->dl_times.tm_mtime, "cachefs_dlog_setattr: ", "mtime");
633	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_ctime,
634		&up->dl_times.tm_ctime, "cachefs_dlog_setattr: ", "ctime");
635
636	/* store the cred info */
637	len = copy_cred(cr, &up->dl_cred);
638
639	/* Calculate the length of this record */
640	entp->dl_len = (int)(((uintptr_t)&up->dl_cred + len) - (uintptr_t)entp);
641
642	/* write the record in the log */
643	offset = cachefs_dlog_output(fscp, entp, NULL);
644
645	cachefs_kmem_free(entp, sizeof (cfs_dlog_entry_t));
646	return (offset);
647}
648
649off_t
650/*ARGSUSED*/
651cachefs_dlog_setsecattr(fscache_t *fscp, vsecattr_t *vsec, int flags,
652    cnode_t *cp, cred_t *cr)
653{
654	struct cfs_dlog_entry *entp;
655	struct cfs_dlog_setsecattr *up;
656	size_t alen, clen, len;
657	off_t offset = 0;
658	aclent_t *aclp;
659
660	ASSERT(MUTEX_HELD(&cp->c_statelock));
661
662	/* paranoia */
663	ASSERT((vsec->vsa_mask & VSA_ACL) || (vsec->vsa_aclcnt == 0));
664	ASSERT((vsec->vsa_mask & VSA_DFACL) || (vsec->vsa_dfaclcnt == 0));
665	if ((vsec->vsa_mask & VSA_ACL) == 0)
666		vsec->vsa_aclcnt = 0;
667	if ((vsec->vsa_mask & VSA_DFACL) == 0)
668		vsec->vsa_dfaclcnt = 0;
669
670	/* calculate length of ACL and cred data */
671	alen = sizeof (aclent_t) * (vsec->vsa_aclcnt + vsec->vsa_dfaclcnt);
672	clen = sizeof (dl_cred_t) + (((long)crgetngroups(cr)) * sizeof (gid_t));
673
674	/*
675	 * allocate entry.  ACLs may be up to 24k currently, but they
676	 * usually won't, so we don't want to make cfs_dlog_entry_t
677	 * too big.  so, we must compute the length here.
678	 */
679
680	len = sizeof (cfs_dlog_entry_t) - sizeof (up->dl_buffer) -
681		sizeof (up->dl_cred) + alen + clen;
682
683
684#if 0
685	/* make up for weird behavior in cachefs_dlog_output */
686	/* XXX turn this on/off in sync with code in cachefs_dlog_output */
687	entp = cachefs_kmem_alloc(len + 32 + sizeof (struct cfs_dlog_trailer),
688	    KM_SLEEP);
689#else
690	entp = cachefs_kmem_alloc(len, KM_SLEEP);
691#endif
692
693	entp->dl_valid = CFS_DLOG_VAL_CRASH;
694	entp->dl_op = CFS_DLOG_SETSECATTR;
695
696	up = &entp->dl_u.dl_setsecattr;
697	up->dl_cid = cp->c_id;
698
699	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_mtime,
700		&up->dl_times.tm_mtime, "cachefs_dlog_setsecattr: ", "mtime");
701	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_ctime,
702		&up->dl_times.tm_ctime, "cachefs_dlog_setsecattr: ", "ctime");
703
704	/* get the creds */
705	(void) copy_cred(cr, &up->dl_cred);
706
707	/* mask and counts */
708	up->dl_mask = vsec->vsa_mask;
709	up->dl_aclcnt = vsec->vsa_aclcnt;
710	up->dl_dfaclcnt = vsec->vsa_dfaclcnt;
711
712	/* get the acls themselves */
713	aclp = (aclent_t *)((uintptr_t)(&up->dl_cred) + clen);
714	if (vsec->vsa_aclcnt > 0) {
715		bcopy(vsec->vsa_aclentp, aclp,
716		    vsec->vsa_aclcnt * sizeof (aclent_t));
717		aclp += vsec->vsa_aclcnt;
718	}
719	if (vsec->vsa_dfaclcnt > 0) {
720		bcopy(vsec->vsa_dfaclentp, aclp,
721		    vsec->vsa_dfaclcnt * sizeof (aclent_t));
722	}
723
724	entp->dl_len = (int)len;
725
726	offset = cachefs_dlog_output(fscp, entp, NULL);
727
728#if 0
729	/* XXX turn on/off in sync with code in cachefs_dlog_output */
730	cachefs_kmem_free(entp, len + 32 + sizeof (struct cfs_dlog_trailer));
731#else
732	cachefs_kmem_free(entp, len);
733#endif
734
735	return (offset);
736}
737
738off_t
739cachefs_dlog_create(fscache_t *fscp, cnode_t *pcp, char *nm,
740    vattr_t *vap, int excl, int mode, cnode_t *cp, int exists, cred_t *cr)
741{
742	struct cfs_dlog_entry *entp;
743	struct cfs_dlog_create *up;
744	size_t len;
745	caddr_t curp;
746	off_t offset;
747
748	ASSERT(MUTEX_HELD(&cp->c_statelock));
749
750	entp = cachefs_kmem_alloc(sizeof (cfs_dlog_entry_t), KM_SLEEP);
751
752	entp->dl_valid = CFS_DLOG_VAL_CRASH;
753	entp->dl_op = CFS_DLOG_CREATE;
754	up = &entp->dl_u.dl_create;
755	up->dl_parent_cid = pcp->c_id;
756	up->dl_new_cid = cp->c_id;
757	CACHEFS_DLOG_VATTR_COPY(vap, &up->dl_attrs,
758		"cachefs_dlog_create: dl_attr");
759	up->dl_excl = excl;
760	up->dl_mode = mode;
761	up->dl_exists = exists;
762	bzero(&up->dl_fid, sizeof (up->dl_fid));
763	if (exists) {
764		CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_mtime,
765			&up->dl_times.tm_mtime,
766			"cachefs_dlog_create: ", "mtime");
767		CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_ctime,
768			&up->dl_times.tm_ctime,
769			"cachefs_dlog_create: ", "ctime");
770	} else {
771		up->dl_times.tm_ctime.tv_sec = 0;
772		up->dl_times.tm_ctime.tv_nsec = 0;
773		up->dl_times.tm_mtime.tv_sec = 0;
774		up->dl_times.tm_mtime.tv_nsec = 0;
775	}
776
777	/* store the cred info */
778	len = copy_cred(cr, &up->dl_cred);
779
780	/* find the address in buffer past where the creds are stored */
781	curp = (caddr_t)(((uintptr_t)&up->dl_cred) + len);
782
783	/* store the created name */
784	len = strlen(nm) + 1;
785	bcopy(nm, curp, len);
786
787	/* calculate the length of this record */
788	entp->dl_len = (int)(((uintptr_t)curp + len) - (uintptr_t)entp);
789
790	/* write the record in the log */
791	offset = cachefs_dlog_output(fscp, entp, NULL);
792
793	cachefs_kmem_free(entp, sizeof (cfs_dlog_entry_t));
794	return (offset);
795}
796
797off_t
798cachefs_dlog_remove(fscache_t *fscp, cnode_t *pcp, char *nm, cnode_t *cp,
799    cred_t *cr)
800{
801	struct cfs_dlog_entry *entp;
802	struct cfs_dlog_remove *up;
803	size_t len;
804	caddr_t curp;
805	off_t offset;
806
807	entp = cachefs_kmem_alloc(sizeof (cfs_dlog_entry_t), KM_SLEEP);
808
809	entp->dl_valid = CFS_DLOG_VAL_CRASH;
810	entp->dl_op = CFS_DLOG_REMOVE;
811	up = &entp->dl_u.dl_remove;
812	up->dl_parent_cid = pcp->c_id;
813	up->dl_child_cid = cp->c_id;
814	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_mtime,
815		&up->dl_times.tm_mtime, "cachefs_dlog_remove: ", "mtime");
816	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_ctime,
817		&up->dl_times.tm_ctime, "cachefs_dlog_remove: ", "ctime");
818	/* store the cred info */
819	len = copy_cred(cr, &up->dl_cred);
820
821	/* find the address in buffer past where the creds are stored */
822	curp = (caddr_t)(((uintptr_t)&up->dl_cred) + len);
823
824	/* store the removed name */
825	len = strlen(nm) + 1;
826	bcopy(nm, curp, len);
827
828	/* calculate the length of this record */
829	entp->dl_len = (int)(((uintptr_t)curp + len) - (uintptr_t)entp);
830
831	/* write the record in the log */
832	offset = cachefs_dlog_output(fscp, entp, NULL);
833
834	cachefs_kmem_free(entp, sizeof (cfs_dlog_entry_t));
835	return (offset);
836}
837
838off_t
839cachefs_dlog_link(fscache_t *fscp, cnode_t *pcp, char *nm, cnode_t *cp,
840    cred_t *cr)
841{
842	struct cfs_dlog_entry *entp;
843	struct cfs_dlog_link *up;
844	size_t len;
845	caddr_t curp;
846	off_t offset;
847
848	entp = cachefs_kmem_alloc(sizeof (cfs_dlog_entry_t), KM_SLEEP);
849
850	entp->dl_valid = CFS_DLOG_VAL_CRASH;
851	entp->dl_op = CFS_DLOG_LINK;
852	up = &entp->dl_u.dl_link;
853	up->dl_parent_cid = pcp->c_id;
854	up->dl_child_cid = cp->c_id;
855	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_mtime,
856		&up->dl_times.tm_mtime, "cachefs_dlog_link: ", "mtime");
857	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_ctime,
858		&up->dl_times.tm_ctime, "cachefs_dlog_link: ", "ctime");
859
860	/* store the cred info */
861	len = copy_cred(cr, &up->dl_cred);
862
863	/* find the address in buffer past where the creds are stored */
864	curp = (caddr_t)(((uintptr_t)&up->dl_cred) + len);
865
866	/* store the link name */
867	len = strlen(nm) + 1;
868	bcopy(nm, curp, len);
869
870	/* calculate the length of this record */
871	entp->dl_len = (int)(((uintptr_t)curp + len) - (uintptr_t)entp);
872
873	/* write the record in the log */
874	offset = cachefs_dlog_output(fscp, entp, NULL);
875
876	cachefs_kmem_free(entp, sizeof (cfs_dlog_entry_t));
877	return (offset);
878}
879
880off_t
881cachefs_dlog_rename(fscache_t *fscp, cnode_t *odcp, char *onm, cnode_t *ndcp,
882    char *nnm, cred_t *cr, cnode_t *cp, cnode_t *delcp)
883{
884	struct cfs_dlog_entry *entp;
885	struct cfs_dlog_rename *up;
886	size_t len;
887	caddr_t curp;
888	off_t offset;
889
890	entp = cachefs_kmem_alloc(sizeof (cfs_dlog_entry_t), KM_SLEEP);
891
892	entp->dl_valid = CFS_DLOG_VAL_CRASH;
893	entp->dl_op = CFS_DLOG_RENAME;
894	up = &entp->dl_u.dl_rename;
895	up->dl_oparent_cid = odcp->c_id;
896	up->dl_nparent_cid = ndcp->c_id;
897	up->dl_child_cid = cp->c_id;
898	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_mtime,
899		&up->dl_times.tm_mtime, "cachefs_dlog_rename: ", "mtime");
900	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_ctime,
901		&up->dl_times.tm_ctime, "cachefs_dlog_rename: ", "ctime");
902	if (delcp) {
903		up->dl_del_cid = delcp->c_id;
904		CACHEFS_DLOG_TS_COPY(&delcp->c_metadata.md_vattr.va_mtime,
905			&up->dl_del_times.tm_mtime,
906			"cachefs_dlog_rename: ", "del mtime");
907		CACHEFS_DLOG_TS_COPY(&delcp->c_metadata.md_vattr.va_ctime,
908			&up->dl_del_times.tm_ctime,
909			"cachefs_dlog_rename: ", "del ctime");
910	} else {
911		up->dl_del_cid.cid_fileno = 0;
912		up->dl_del_cid.cid_flags = 0;
913		up->dl_del_times.tm_mtime.tv_sec = 0;
914		up->dl_del_times.tm_mtime.tv_nsec = 0;
915		up->dl_del_times.tm_ctime.tv_sec = 0;
916		up->dl_del_times.tm_ctime.tv_nsec = 0;
917	}
918
919	/* store the cred info */
920	len = copy_cred(cr, &up->dl_cred);
921
922	/* find the address in buffer past where the creds are stored */
923	curp = (caddr_t)(((uintptr_t)&up->dl_cred) + len);
924
925	/* store the old name */
926	len = strlen(onm) + 1;
927	bcopy(onm, curp, len);
928
929	/* store the new name */
930	curp += len;
931	len = strlen(nnm) + 1;
932	bcopy(nnm, curp, len);
933
934	/* calculate the length of this record */
935	entp->dl_len = (int)(((uintptr_t)curp + len) - (uintptr_t)entp);
936
937	/* write the record in the log */
938	offset = cachefs_dlog_output(fscp, entp, NULL);
939
940	cachefs_kmem_free(entp, sizeof (cfs_dlog_entry_t));
941	return (offset);
942}
943
944off_t
945cachefs_dlog_mkdir(fscache_t *fscp, cnode_t *pcp, cnode_t *cp, char *nm,
946    vattr_t *vap, cred_t *cr)
947{
948	struct cfs_dlog_entry *entp;
949	struct cfs_dlog_mkdir *up;
950	size_t len;
951	caddr_t curp;
952	off_t offset;
953
954	entp = cachefs_kmem_alloc(sizeof (cfs_dlog_entry_t), KM_SLEEP);
955
956	entp->dl_valid = CFS_DLOG_VAL_CRASH;
957	entp->dl_op = CFS_DLOG_MKDIR;
958	up = &entp->dl_u.dl_mkdir;
959	up->dl_parent_cid = pcp->c_id;
960	up->dl_child_cid = cp->c_id;
961	CACHEFS_DLOG_VATTR_COPY(vap, &up->dl_attrs,
962		"cachefs_dlog_mkdir: dl_attr");
963	bzero(&up->dl_fid, sizeof (up->dl_fid));
964
965	/* store the cred info */
966	len = copy_cred(cr, &up->dl_cred);
967
968	/* find the address in buffer past where the creds are stored */
969	curp = (caddr_t)(((uintptr_t)&up->dl_cred) + len);
970
971	/* store the new directory name */
972	len = strlen(nm) + 1;
973	bcopy(nm, curp, len);
974
975	/* calculate the length of this record */
976	entp->dl_len = (int)(((uintptr_t)curp + len) - (uintptr_t)entp);
977
978	/* write the record in the dlog */
979	offset = cachefs_dlog_output(fscp, entp, NULL);
980
981	cachefs_kmem_free(entp, sizeof (cfs_dlog_entry_t));
982	return (offset);
983}
984
985off_t
986cachefs_dlog_rmdir(fscache_t *fscp, cnode_t *pcp, char *nm, cnode_t *cp,
987    cred_t *cr)
988{
989	struct cfs_dlog_entry *entp;
990	struct cfs_dlog_rmdir *up;
991	size_t len;
992	caddr_t curp;
993	off_t offset;
994
995	/* if not a local dir, log the cid to fid mapping */
996	if ((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) {
997		if (cachefs_dlog_mapfid(fscp, cp))
998			return (0);
999		if (cachefs_dlog_cidmap(fscp))
1000			return (0);
1001	}
1002
1003	entp = cachefs_kmem_alloc(sizeof (cfs_dlog_entry_t), KM_SLEEP);
1004
1005	entp->dl_valid = CFS_DLOG_VAL_CRASH;
1006	entp->dl_op = CFS_DLOG_RMDIR;
1007	up = &entp->dl_u.dl_rmdir;
1008	up->dl_parent_cid = pcp->c_id;
1009
1010	/* store the cred info */
1011	len = copy_cred(cr, &up->dl_cred);
1012
1013	/* find the address in buffer past where the creds are stored */
1014	curp = (caddr_t)(((uintptr_t)&up->dl_cred) + len);
1015
1016	/* store the created name */
1017	len = strlen(nm) + 1;
1018	bcopy(nm, curp, len);
1019
1020	/* calculate the length of this record */
1021	entp->dl_len = (int)(((uintptr_t)curp + len) - (uintptr_t)entp);
1022
1023	/* write the record in the log */
1024	offset = cachefs_dlog_output(fscp, entp, NULL);
1025
1026	cachefs_kmem_free(entp, sizeof (cfs_dlog_entry_t));
1027	return (offset);
1028}
1029
1030off_t
1031cachefs_dlog_symlink(fscache_t *fscp, cnode_t *pcp, cnode_t *cp, char *lnm,
1032    vattr_t *vap, char *tnm, cred_t *cr)
1033{
1034	struct cfs_dlog_entry *entp;
1035	struct cfs_dlog_symlink *up;
1036	size_t len;
1037	caddr_t curp;
1038	off_t offset;
1039
1040	ASSERT(MUTEX_HELD(&cp->c_statelock));
1041
1042	entp = cachefs_kmem_alloc(sizeof (cfs_dlog_entry_t), KM_SLEEP);
1043
1044	entp->dl_valid = CFS_DLOG_VAL_CRASH;
1045	entp->dl_op = CFS_DLOG_SYMLINK;
1046	up = &entp->dl_u.dl_symlink;
1047	up->dl_parent_cid = pcp->c_id;
1048	up->dl_child_cid = cp->c_id;
1049	CACHEFS_DLOG_VATTR_COPY(vap, &up->dl_attrs,
1050		"cachefs_dlog_symlink: dl_attr");
1051	up->dl_times.tm_ctime.tv_sec = 0;
1052	up->dl_times.tm_ctime.tv_nsec = 0;
1053	up->dl_times.tm_mtime.tv_sec = 0;
1054	up->dl_times.tm_mtime.tv_nsec = 0;
1055	bzero(&up->dl_fid, sizeof (up->dl_fid));
1056
1057	/* store the cred info */
1058	len = copy_cred(cr, &up->dl_cred);
1059
1060	/* find the address in buffer past where the creds are stored */
1061	curp = (caddr_t)(((uintptr_t)&up->dl_cred) + len);
1062
1063	/* store the link name */
1064	len = strlen(lnm) + 1;
1065	bcopy(lnm, curp, len);
1066
1067	/* store new name */
1068	curp += len;
1069	len = strlen(tnm) + 1;
1070	bcopy(tnm, curp, len);
1071
1072	/* calculate the length of this record */
1073	entp->dl_len = (int)(((uintptr_t)curp + len) - (uintptr_t)entp);
1074
1075	/* write the record in the log */
1076	offset = cachefs_dlog_output(fscp, entp, NULL);
1077
1078	cachefs_kmem_free(entp, sizeof (cfs_dlog_entry_t));
1079	return (offset);
1080}
1081
1082off_t
1083cachefs_dlog_modify(fscache_t *fscp, cnode_t *cp, cred_t *cr, uint_t *seqp)
1084{
1085	struct cfs_dlog_entry *entp;
1086	struct cfs_dlog_modify *up;
1087	off_t offset;
1088	uint_t seq;
1089	size_t len;
1090
1091	entp = cachefs_kmem_alloc(sizeof (cfs_dlog_entry_t), KM_SLEEP);
1092
1093	entp->dl_valid = CFS_DLOG_VAL_CRASH;
1094	entp->dl_op = CFS_DLOG_MODIFIED;
1095	up = &entp->dl_u.dl_modify;
1096	up->dl_cid = cp->c_id;
1097	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_mtime,
1098		&up->dl_times.tm_mtime,
1099		"cachefs_dlog_modify: ", "mtime");
1100	CACHEFS_DLOG_TS_COPY(&cp->c_metadata.md_vattr.va_ctime,
1101		&up->dl_times.tm_ctime,
1102		"cachefs_dlog_modify: ", "ctime");
1103
1104	up->dl_next = 0;
1105
1106	/* store the cred info */
1107	len = copy_cred(cr, &up->dl_cred);
1108
1109	/* calculate the length of this record */
1110	entp->dl_len = (int)(((uintptr_t)&up->dl_cred + len) - (uintptr_t)entp);
1111
1112	/* write the record in the log */
1113	offset = cachefs_dlog_output(fscp, entp, &seq);
1114
1115	/* return sequence number */
1116	*seqp = seq;
1117
1118	cachefs_kmem_free(entp, sizeof (cfs_dlog_entry_t));
1119	return (offset);
1120}
1121
1122int
1123cachefs_dlog_mapfid(fscache_t *fscp, cnode_t *cp)
1124{
1125	struct cfs_dlog_entry *entp;
1126	struct cfs_dlog_mapfid *up;
1127	off_t offset;
1128
1129	entp = cachefs_kmem_alloc(sizeof (cfs_dlog_entry_t), KM_SLEEP);
1130
1131	entp->dl_valid = CFS_DLOG_VAL_COMMITTED;
1132	entp->dl_op = CFS_DLOG_MAPFID;
1133	up = &entp->dl_u.dl_mapfid;
1134	up->dl_cid = cp->c_id;
1135	CACHEFS_FID_COPY(&cp->c_cookie, &up->dl_fid);
1136
1137	/* calculate the length of this record */
1138	/* entp->dl_len = ((caddr_t)up - (caddr_t)entp + sizeof (*up)); */
1139	entp->dl_len = (int)(offsetof(struct cfs_dlog_entry, dl_u.dl_mapfid) +
1140				sizeof (struct cfs_dlog_mapfid));
1141
1142	/* write the record in the log */
1143	offset = cachefs_dlog_output(fscp, entp, NULL);
1144
1145	cachefs_kmem_free(entp, sizeof (cfs_dlog_entry_t));
1146	return (offset == 0);
1147}
1148
1149/* Returns the next sequence number, 0 if an error */
1150uint_t
1151cachefs_dlog_seqnext(fscache_t *fscp)
1152{
1153	int error;
1154	uint_t seq;
1155
1156	if (fscp->fs_dlogfile == NULL) {
1157		error = cachefs_dlog_setup(fscp, 1);
1158		if (error)
1159			return (0);
1160	}
1161
1162	mutex_enter(&fscp->fs_dlock);
1163	ASSERT(fscp->fs_dlogfile);
1164
1165	/* get a sequence number for this log entry */
1166	seq = fscp->fs_dlogseq + 1;
1167	if (seq != 0) {
1168		fscp->fs_dlogseq++;
1169	}
1170#ifdef CFSDEBUG
1171	else {
1172		cmn_err(CE_WARN, "cachefs: logging failed, seq overflow 2.");
1173	}
1174#endif
1175	mutex_exit(&fscp->fs_dlock);
1176	return (seq);
1177}
1178