nfs_log.c revision 7240:c4957ab6a78e
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#include <sys/cred.h>
29#include <sys/cmn_err.h>
30#include <sys/debug.h>
31#include <sys/systm.h>
32#include <sys/kmem.h>
33#include <sys/disp.h>
34#include <sys/atomic.h>
35#include <rpc/types.h>
36#include <nfs/nfs.h>
37#include <nfs/nfssys.h>
38#include <nfs/export.h>
39#include <nfs/rnode.h>
40#include <rpc/auth.h>
41#include <rpc/svc.h>
42#include <rpc/xdr.h>
43#include <rpc/clnt.h>
44#include <nfs/nfs_log.h>
45
46#define	NUM_RECORDS_TO_WRITE 256
47#define	NUM_BYTES_TO_WRITE 65536
48
49extern krwlock_t exported_lock;
50
51static int nfslog_num_records_to_write = NUM_RECORDS_TO_WRITE;
52static int nfslog_num_bytes_to_write = NUM_BYTES_TO_WRITE;
53
54/*
55 * This struct is used to 'hide' the details of managing the log
56 * records internally to the logging code.  Allocation routines
57 * are used to obtain pieces of memory for XDR encoding.  This struct
58 * is a 'header' to those areas and a opaque cookie is used to pass
59 * this data structure between the allocating function and the put
60 * function.
61 */
62struct lr_alloc {
63	struct lr_alloc		*next;		/* links for write queuing */
64	struct lr_alloc		*prev;
65#define	LR_ALLOC_NOFREE	0x1			/* not present, call free */
66	int			lr_flags;
67	caddr_t			log_record;	/* address to XDR encoding */
68	size_t			size;		/* final size of encoding */
69	struct kmem_cache	*alloc_cache;	/* keep track of cache ptr */
70	struct exportinfo	*exi;		/* who are we related to? */
71	struct log_buffer	*lb;
72};
73
74struct flush_thread_params {
75	struct nfsl_flush_args tp_args;
76	int tp_error;
77};
78
79static int log_file_create(caddr_t, struct log_file **);
80static void log_file_rele(struct log_file *);
81static struct log_buffer *log_buffer_create(caddr_t);
82static void log_buffer_rele(struct log_buffer *);
83static int nfslog_record_append2all(struct lr_alloc *);
84static int nfslog_logbuffer_rename(struct log_buffer *);
85static void nfslog_logfile_wait(struct log_file *);
86static int nfslog_logfile_rename(char *, char *);
87static void nfslog_do_flush(struct flush_thread_params *);
88static void create_buffer_header(caddr_t *, size_t *, size_t *);
89
90static int nfslog_write_logrecords(struct log_file *, struct lr_alloc *, int);
91static void nfslog_free_logrecords(struct lr_alloc *);
92static int nfslog_records_flush_to_disk(struct log_buffer *);
93static int nfslog_records_flush_to_disk_nolock(struct log_buffer *);
94
95/*
96 * Read/Write lock that protects 'nfslog_buffer_list'.
97 * This lock must be held when searching or modifying 'nfslog_buffer_list'.
98 */
99static krwlock_t nfslog_buffer_list_lock;
100
101/*
102 * The list of "log_buffer" structures.
103 */
104struct log_buffer *nfslog_buffer_list = NULL;
105
106
107#define	LOG_BUFFER_HOLD(lbp)	{ \
108	mutex_enter(&(lbp)->lb_lock); \
109	(lbp)->lb_refcnt++; \
110	mutex_exit(&(lbp)->lb_lock); \
111}
112
113#define	LOG_FILE_HOLD(lfp)	{ \
114	mutex_enter(&(lfp)->lf_lock); \
115	(lfp)->lf_refcnt++; \
116	mutex_exit(&(lfp)->lf_lock); \
117}
118
119#define	LOG_FILE_RELE(lfp)	{ \
120	log_file_rele(lfp); \
121}
122
123/*
124 * These two macros are used to prep a logfile data structure and
125 * associated file for writing data.  Note that the lf_lock is
126 * held as a result of the call to the first macro.  This is used
127 * for serialization correctness between the logbuffer struct and
128 * the logfile struct.
129 */
130#define	LOG_FILE_LOCK_TO_WRITE(lfp)	{ \
131	mutex_enter(&(lfp)->lf_lock); \
132	(lfp)->lf_refcnt++; \
133	(lfp)->lf_writers++; \
134}
135
136#define	LOG_FILE_UNLOCK_FROM_WRITE(lfp)	{ \
137	(lfp)->lf_writers--; \
138	if ((lfp)->lf_writers == 0 && ((lfp)->lf_flags & L_WAITING)) { \
139		(lfp)->lf_flags &= ~L_WAITING; \
140		cv_broadcast(&(lfp)->lf_cv_waiters); \
141	} \
142	mutex_exit(&(lfp)->lf_lock); \
143	log_file_rele(lfp); \
144}
145
146int rfsl_log_buffer = 0;
147static int rfsl_log_file = 0;
148
149/* This array is used for memory allocation of record encoding spaces */
150static struct {
151	int	size;
152	struct kmem_cache *mem_cache;
153	char	*cache_name;
154} nfslog_mem_alloc[] = {
155#define	SMALL_INDX 0
156	{ NFSLOG_SMALL_RECORD_SIZE - sizeof (struct lr_alloc),
157	NULL, NFSLOG_SMALL_REC_NAME },
158#define	MEDIUM_INDX 1
159	{ NFSLOG_MEDIUM_RECORD_SIZE - sizeof (struct lr_alloc),
160	NULL, NFSLOG_MEDIUM_REC_NAME },
161#define	LARGE_INDX 2
162	{ NFSLOG_LARGE_RECORD_SIZE - sizeof (struct lr_alloc),
163	NULL, NFSLOG_LARGE_REC_NAME },
164	{ (-1), NULL }
165};
166
167/* Used to calculate the 'real' allocation size */
168#define	ALLOC_SIZE(index) \
169	(nfslog_mem_alloc[index].size + sizeof (struct lr_alloc))
170
171/*
172 * Initialize logging data buffer cache
173 */
174void
175nfslog_init()
176{
177	int indx;
178
179	rw_init(&nfslog_buffer_list_lock, NULL, RW_DEFAULT, NULL);
180
181	/*
182	 * Initialize the kmem caches for encoding
183	 */
184	for (indx = 0; nfslog_mem_alloc[indx].size != (-1); indx++) {
185		nfslog_mem_alloc[indx].mem_cache =
186		    kmem_cache_create(nfslog_mem_alloc[indx].cache_name,
187		    ALLOC_SIZE(indx), 0, NULL, NULL, NULL, NULL, NULL, 0);
188	}
189}
190
191/*
192 * Sets up the necessary log file and related buffers to enable logging
193 * on the given export point.
194 * Returns 0 on success, non-zero on failure.
195 */
196int
197nfslog_setup(struct exportinfo *exi)
198{
199	struct exportdata *kex;
200	struct log_buffer *lbp;
201	struct log_buffer *nlbp;
202
203	kex = &exi->exi_export;
204	ASSERT(kex->ex_flags & EX_LOG);
205
206	/*
207	 * Logging is enabled for the new export point, check
208	 * the existing log_buffer structures to see if the
209	 * desired buffer has already been opened. If so, point
210	 * the new exportinfo's exi_logbuffer to the existing
211	 * one.
212	 */
213	rw_enter(&nfslog_buffer_list_lock, RW_READER);
214	for (lbp = nfslog_buffer_list; lbp != NULL; lbp = lbp->lb_next) {
215		LOGGING_DPRINT((10,
216		    "searching for buffer... found log_buffer '%s'\n",
217		    lbp->lb_path));
218		if (strcmp(lbp->lb_path, kex->ex_log_buffer) == 0) {
219			/* Found our match. Ref it and return */
220			LOG_BUFFER_HOLD(lbp);
221			exi->exi_logbuffer = lbp;
222			LOGGING_DPRINT((10,  "\tfound log_buffer for '%s'\n",
223			    kex->ex_log_buffer));
224			rw_exit(&nfslog_buffer_list_lock);
225			return (0);
226		}
227	}
228	rw_exit(&nfslog_buffer_list_lock);
229
230	/*
231	 * New buffer needed, allocate it.
232	 * The buffer list lock has been dropped so we will need to search
233	 * the list again to ensure that another thread has not added
234	 * a matching buffer.
235	 */
236	if ((nlbp = log_buffer_create(kex->ex_log_buffer)) == NULL) {
237		/*
238		 * Failed the buffer creation for some reason so we
239		 * will need to return.
240		 */
241		return (EIO);
242	}
243
244	rw_enter(&nfslog_buffer_list_lock, RW_WRITER);
245	for (lbp = nfslog_buffer_list; lbp != NULL;
246	    lbp = lbp->lb_next) {
247		if (strcmp(lbp->lb_path, kex->ex_log_buffer) == 0) {
248				/*
249				 * A log_buffer already exists for the
250				 * indicated buffer, use it instead.
251				 */
252			LOG_BUFFER_HOLD(lbp);
253
254			exi->exi_logbuffer = lbp;
255
256			LOGGING_DPRINT((10, "found log_buffer for '%s' "
257			    "after allocation\n", kex->ex_log_buffer));
258
259			rw_exit(&nfslog_buffer_list_lock);
260
261			log_buffer_rele(nlbp);
262
263			return (0);
264		}
265	}
266	/*
267	 * Didn't find an existing log_buffer for this buffer,
268	 * use the the newly created one, and add to list.  We
269	 * increment the reference count because the node is
270	 * entered into the global list.
271	 */
272	LOGGING_DPRINT((10, "exportfs: adding nlbp=%p to list\n",
273	    (void *)nlbp));
274
275	nlbp->lb_next = nfslog_buffer_list;
276	nfslog_buffer_list = nlbp;
277
278	LOG_BUFFER_HOLD(nlbp);	/* hold is for export entry */
279	exi->exi_logbuffer = nlbp;
280
281	rw_exit(&nfslog_buffer_list_lock);
282
283	return (0);
284}
285
286/*
287 * Disables logging for the given export point.
288 */
289void
290nfslog_disable(struct exportinfo *exi)
291{
292	log_buffer_rele(exi->exi_logbuffer);
293}
294
295/*
296 * Creates the corresponding log_buffer and log_file structures
297 * for the the buffer named 'name'.
298 * Returns a pointer to the log_buffer structure with reference one.
299 */
300static struct log_buffer *
301log_buffer_create(caddr_t name)
302{
303	struct log_buffer *buffer;
304	struct log_file *logfile;
305	int namelen = strlen(name);
306
307	LOGGING_DPRINT((10,  "log_buffer_create: %s\n", name));
308	if (log_file_create(name, &logfile))
309		return (NULL);
310
311	buffer = (struct log_buffer *)kmem_alloc(sizeof (*buffer), KM_SLEEP);
312	buffer->lb_refcnt = 1;
313	buffer->lb_rec_id = 0;
314	buffer->lb_path = (caddr_t)kmem_alloc(namelen + 1, KM_SLEEP);
315	bcopy(name, buffer->lb_path, namelen + 1);
316	buffer->lb_logfile = logfile;
317	buffer->lb_records = NULL;
318	buffer->lb_num_recs = 0;
319	buffer->lb_size_queued = 0;
320	mutex_init(&buffer->lb_lock, NULL, MUTEX_DEFAULT, NULL);
321	rfsl_log_buffer++;
322
323	return (buffer);
324}
325
326/*
327 * Release a log_buffer structure
328 */
329static void
330log_buffer_rele(struct log_buffer *lbp)
331{
332	int len;
333
334	mutex_enter(&lbp->lb_lock);
335	if (--lbp->lb_refcnt > 1) {
336		mutex_exit(&lbp->lb_lock);
337		return;
338	}
339
340	if (lbp->lb_refcnt < 0) {
341		panic("log_rele: log_buffer refcnt < 0");
342		/*NOTREACHED*/
343	}
344
345	/*
346	 * Need to drop the lb_lock before acquiring the
347	 * nfslog_buffer_list_lock. To avoid double free we need
348	 * to hold an additional reference to the log buffer.
349	 * This will ensure that no two threads will simultaneously
350	 * be trying to free the same log buffer.
351	 */
352
353	if (lbp->lb_refcnt == 1) {
354
355		/*
356		 * If the ref count is 1, then the last
357		 * unshare/reference has been given up and we need to
358		 * clean up the buffer and remove it from the buffer
359		 * list.
360		 */
361		LOGGING_DPRINT((10,
362		    "log_buffer_rele lbp=%p disconnecting\n", (void *)lbp));
363		/*
364		 * Hold additional reference before dropping the lb_lock
365		 */
366
367		lbp->lb_refcnt++;
368		mutex_exit(&lbp->lb_lock);
369
370		/*
371		 * Make sure that all of the buffered records are written.
372		 * Don't bother checking the write return value since there
373		 * isn't much we can do at this point.
374		 */
375		(void) nfslog_records_flush_to_disk(lbp);
376
377		rw_enter(&nfslog_buffer_list_lock, RW_WRITER);
378		mutex_enter(&lbp->lb_lock);
379		/*
380		 * Drop the reference count held above.
381		 * If the ref count is still > 1 then someone has
382		 * stepped in to use this log buffer.  unlock and return.
383		 */
384		if (--lbp->lb_refcnt > 1) {
385			mutex_exit(&lbp->lb_lock);
386			rw_exit(&nfslog_buffer_list_lock);
387			return;
388		}
389
390		if (lbp == nfslog_buffer_list) {
391			nfslog_buffer_list = lbp->lb_next;
392		} else {
393			struct log_buffer *tlbp;
394
395			/* Drop the log_buffer from the master list */
396			for (tlbp = nfslog_buffer_list; tlbp->lb_next != NULL;
397			    tlbp = tlbp->lb_next) {
398				if (tlbp->lb_next == lbp) {
399					tlbp->lb_next = lbp->lb_next;
400					break;
401				}
402			}
403		}
404
405		mutex_exit(&lbp->lb_lock);
406		rw_exit(&nfslog_buffer_list_lock);
407	}
408	/*
409	 * ref count zero; finish clean up.
410	 */
411	LOGGING_DPRINT((10, "log_buffer_rele lbp=%p freeing\n", (void *)lbp));
412
413	log_file_rele(lbp->lb_logfile);
414	len = strlen(lbp->lb_path) + 1;
415	kmem_free(lbp->lb_path, len);
416	kmem_free(lbp, sizeof (*lbp));
417	rfsl_log_buffer--;
418}
419
420/*
421 * Creates the corresponding log_file structure for the buffer
422 * named 'log_file_name'.
423 * 'log_file_name' is created by concatenating 'origname' and LOG_INPROG_STRING.
424 * 'logfile' is set to be the log_file structure with reference one.
425 */
426static int
427log_file_create(caddr_t origname, struct log_file **lfpp)
428{
429	vnode_t *vp = NULL;
430	char *name;
431	int namelen;
432	int error;
433	struct log_file *logfile = NULL;
434	vattr_t va;
435	caddr_t loghdr = NULL;
436	size_t loghdr_len = 0;
437	size_t loghdr_free = 0;
438
439	namelen = strlen(origname) + strlen(LOG_INPROG_STRING);
440	name = (caddr_t)kmem_alloc(namelen + 1, KM_SLEEP);
441	(void) sprintf(name, "%s%s", origname, LOG_INPROG_STRING);
442
443	LOGGING_DPRINT((3, "log_file_create: %s\n", name));
444	if (error = vn_open(name, UIO_SYSSPACE, FCREAT|FWRITE|FOFFMAX,
445	    LOG_MODE, &vp, CRCREAT, 0)) {
446		nfs_cmn_err(error, CE_WARN,
447		    "log_file_create: Can not open %s - error %m", name);
448		goto out;
449	}
450	LOGGING_DPRINT((3, "log_file_create: %s vp=%p v_count=%d\n",
451	    name, (void *)vp, vp->v_count));
452
453	logfile = (struct log_file *)kmem_zalloc(sizeof (*logfile), KM_SLEEP);
454	logfile->lf_path = name;
455	/*
456	 * No need to bump the vnode reference count since it is set
457	 * to one by vn_open().
458	 */
459	logfile->lf_vp = vp;
460	logfile->lf_refcnt = 1;
461	mutex_init(&logfile->lf_lock, NULL, MUTEX_DEFAULT, NULL);
462	rfsl_log_file++;
463
464	va.va_mask = AT_SIZE;
465	error = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
466	if (error) {
467		nfs_cmn_err(error, CE_WARN,
468		    "log_file_create: Can not stat %s - error = %m",  name);
469		goto out;
470	}
471
472	if (va.va_size == 0) {
473		struct lr_alloc lr;
474
475		/*
476		 * Write Header.
477		 */
478		create_buffer_header(&loghdr, &loghdr_len, &loghdr_free);
479		/*
480		 * Dummy up a lr_alloc struct for the write
481		 */
482		lr.next = lr.prev = &lr;
483		lr.lr_flags = 0;
484		lr.log_record = loghdr;
485		lr.size = loghdr_len;
486		lr.alloc_cache = NULL;
487		lr.exi = NULL;
488		lr.lb = NULL;
489
490		mutex_enter(&logfile->lf_lock);
491
492		error = nfslog_write_logrecords(logfile, &lr, 1);
493
494		mutex_exit(&logfile->lf_lock);
495
496		if (error != 0) {
497			nfs_cmn_err(error, CE_WARN,
498			    "log_file_create: Can not write header "
499			    "on %s - error = %m", name);
500			goto out;
501		}
502	}
503	*lfpp = logfile;
504
505	if (loghdr != NULL)
506		kmem_free(loghdr, loghdr_free);
507
508	return (0);
509
510out:
511	if (vp != NULL) {
512		int error1;
513		error1 = VOP_CLOSE(vp, FCREAT|FWRITE|FOFFMAX, 1, (offset_t)0,
514		    CRED(), NULL);
515		if (error1) {
516			nfs_cmn_err(error1, CE_WARN,
517			    "log_file_create: Can not close %s - "
518			    "error = %m", name);
519		}
520		VN_RELE(vp);
521	}
522
523	kmem_free(name, namelen + 1);
524	if (logfile != NULL) {
525		mutex_destroy(&logfile->lf_lock);
526		kmem_free(logfile, sizeof (*logfile));
527		rfsl_log_file--;
528	}
529	if (loghdr != NULL)
530		kmem_free(loghdr, loghdr_free);
531
532	return (error);
533}
534
535/*
536 * Release a log_file structure
537 */
538static void
539log_file_rele(struct log_file *lfp)
540{
541	int len;
542	int error;
543
544	mutex_enter(&lfp->lf_lock);
545	if (--lfp->lf_refcnt > 0) {
546		LOGGING_DPRINT((10,
547		    "log_file_rele lfp=%p decremented refcnt to %d\n",
548		    (void *)lfp, lfp->lf_refcnt));
549		mutex_exit(&lfp->lf_lock);
550		return;
551	}
552	if (lfp->lf_refcnt < 0) {
553		panic("log_file_rele: log_file refcnt < 0");
554		/*NOTREACHED*/
555	}
556
557	LOGGING_DPRINT((10, "log_file_rele lfp=%p freeing node\n",
558	    (void *)lfp));
559
560	lfp->lf_flags &= ~(L_PRINTED | L_ERROR);
561
562	ASSERT(lfp->lf_flags == 0);
563	ASSERT(lfp->lf_writers == 0);
564
565	if (error = VOP_CLOSE(lfp->lf_vp, FCREAT|FWRITE|FOFFMAX, 1, (offset_t)0,
566	    CRED(), NULL)) {
567		nfs_cmn_err(error, CE_WARN,
568		    "NFS: Could not close log buffer %s - error = %m",
569		    lfp->lf_path);
570#ifdef DEBUG
571	} else {
572		LOGGING_DPRINT((3,
573		    "log_file_rele: %s has been closed vp=%p v_count=%d\n",
574		    lfp->lf_path, (void *)lfp->lf_vp, lfp->lf_vp->v_count));
575#endif
576	}
577	VN_RELE(lfp->lf_vp);
578
579	len = strlen(lfp->lf_path) + 1;
580	kmem_free(lfp->lf_path, len);
581	kmem_free(lfp, sizeof (*lfp));
582	rfsl_log_file--;
583}
584
585/*
586 * Allocates a record of the size specified.
587 * 'exi' identifies the exportinfo structure being logged.
588 * 'size' indicates how much memory should be allocated
589 * 'cookie' is used to store an opaque value for the caller for later use
590 * 'flags' currently ignored.
591 *
592 * Returns a pointer to the beginning of the allocated memory.
593 * 'cookie' is a pointer to the 'lr_alloc' struct; this will be used
594 * to keep track of the encoded record and contains all the info
595 * for enqueuing the record on the log buffer for later writing.
596 *
597 * nfslog_record_put() must be used to 'free' this record or allocation.
598 */
599/* ARGSUSED */
600void *
601nfslog_record_alloc(
602	struct exportinfo *exi,
603	int alloc_indx,
604	void **cookie,
605	int flags)
606{
607	struct lr_alloc *lrp;
608
609	lrp = (struct lr_alloc *)
610	    kmem_cache_alloc(nfslog_mem_alloc[alloc_indx].mem_cache,
611	    KM_NOSLEEP);
612
613	if (lrp == NULL) {
614		*cookie = NULL;
615		return (NULL);
616	}
617
618	lrp->next = lrp;
619	lrp->prev = lrp;
620	lrp->lr_flags = 0;
621
622	lrp->log_record = (caddr_t)((uintptr_t)lrp +
623	    (uintptr_t)sizeof (struct lr_alloc));
624	lrp->size = nfslog_mem_alloc[alloc_indx].size;
625	lrp->alloc_cache = nfslog_mem_alloc[alloc_indx].mem_cache;
626	lrp->exi = exi;
627
628	if (exi->exi_export.ex_flags & EX_LOG) {
629		LOG_BUFFER_HOLD(exi->exi_logbuffer);
630		lrp->lb = exi->exi_logbuffer;
631	} else {
632		lrp->lb = NULL;
633	}
634
635	*cookie = (void *)lrp;
636
637	LOGGING_DPRINT((3,
638	    "nfslog_record_alloc(log_buffer=%p mem=%p size=%lu)\n",
639	    (void *)exi->exi_logbuffer, (void *)lrp->log_record, lrp->size));
640	return (lrp->log_record);
641}
642
643/*
644 * After the above nfslog_record_alloc() has been called and a record
645 * encoded into the buffer that was returned, this function is called
646 * to handle appropriate disposition of the newly created record.
647 * The cookie value is the one that was returned from nfslog_record_alloc().
648 * Size is the actual size of the record that was encoded.  This is
649 * passed in because the size used for the alloc was just an approximation.
650 * The sync parameter is used to tell us if we need to force this record
651 * to disk and if not it will be queued for later writing.
652 *
653 * Note that if the size parameter has a value of 0, then the record is
654 * not written to the log and the associated data structures are released.
655 */
656void
657nfslog_record_put(void *cookie, size_t size, bool_t sync,
658	unsigned int which_buffers)
659{
660	struct lr_alloc *lrp = (struct lr_alloc *)cookie;
661	struct log_buffer *lbp = lrp->lb;
662
663	/*
664	 * If the caller has nothing to write or if there is
665	 * an apparent error, rele the buffer and free.
666	 */
667	if (size == 0 || size > lrp->size) {
668		nfslog_free_logrecords(lrp);
669		return;
670	}
671
672	/*
673	 * Reset the size to what actually needs to be written
674	 * This is used later on when the iovec is built for
675	 * writing the records to the log file.
676	 */
677	lrp->size = size;
678
679	/* append to all if public exi */
680	if (which_buffers == NFSLOG_ALL_BUFFERS) {
681		(void) nfslog_record_append2all(lrp);
682		nfslog_free_logrecords(lrp);
683		return;
684	}
685
686	/* Insert the record on the list to be written */
687	mutex_enter(&lbp->lb_lock);
688	if (lbp->lb_records == NULL) {
689		lbp->lb_records = (caddr_t)lrp;
690		lbp->lb_num_recs = 1;
691		lbp->lb_size_queued = lrp->size;
692	} else {
693		insque(lrp, ((struct lr_alloc *)lbp->lb_records)->prev);
694		lbp->lb_num_recs++;
695		lbp->lb_size_queued += lrp->size;
696	}
697
698	/*
699	 * Determine if the queue for this log buffer should be flushed.
700	 * This is done by either the number of records queued, the total
701	 * size of all records queued or by the request of the caller
702	 * via the sync parameter.
703	 */
704	if (lbp->lb_size_queued >= nfslog_num_bytes_to_write ||
705	    lbp->lb_num_recs > nfslog_num_records_to_write || sync == TRUE) {
706		mutex_exit(&lbp->lb_lock);
707		(void) nfslog_records_flush_to_disk(lbp);
708	} else {
709		mutex_exit(&lbp->lb_lock);
710	}
711
712}
713
714/*
715 * Examine the log_buffer struct to see if there are queue log records
716 * that need to be written to disk.  If some exist, pull them off of
717 * the log buffer and write them to the log file.
718 */
719static int
720nfslog_records_flush_to_disk(struct log_buffer *lbp)
721{
722
723	mutex_enter(&lbp->lb_lock);
724
725	if (lbp->lb_records == NULL) {
726		mutex_exit(&lbp->lb_lock);
727		return (0);
728	}
729	return	(nfslog_records_flush_to_disk_nolock(lbp));
730}
731
732/*
733 * Function requires that the caller holds lb_lock.
734 * Function flushes any records in the log buffer to the disk.
735 * Function drops the lb_lock on return.
736 */
737
738static int
739nfslog_records_flush_to_disk_nolock(struct log_buffer *lbp)
740{
741	struct log_file *lfp = NULL;
742	struct lr_alloc *lrp_writers;
743	int num_recs;
744	int error = 0;
745
746	ASSERT(MUTEX_HELD(&lbp->lb_lock));
747
748	lfp = lbp->lb_logfile;
749
750	LOG_FILE_LOCK_TO_WRITE(lfp);
751	ASSERT(lbp->lb_records != NULL);
752
753	lrp_writers = (struct lr_alloc *)lbp->lb_records;
754	lbp->lb_records = NULL;
755	num_recs = lbp->lb_num_recs;
756	lbp->lb_num_recs = 0;
757	lbp->lb_size_queued = 0;
758	mutex_exit(&lbp->lb_lock);
759	error = nfslog_write_logrecords(lfp, lrp_writers, num_recs);
760
761	LOG_FILE_UNLOCK_FROM_WRITE(lfp);
762
763	nfslog_free_logrecords(lrp_writers);
764	return (error);
765}
766
767
768/*
769 * Take care of writing the provided log record(s) to the log file.
770 * We group the log records with an iovec and use VOP_WRITE to append
771 * them to the end of the log file.
772 */
773static int
774nfslog_write_logrecords(struct log_file *lfp,
775	struct lr_alloc *lrp_writers, int num_recs)
776{
777	struct uio uio;
778	struct iovec *iovp;
779	int size_iovecs;
780	vnode_t *vp;
781	struct vattr va;
782	struct lr_alloc *lrp;
783	int i;
784	ssize_t len;
785	int ioflag = FAPPEND;
786	int error = 0;
787
788	ASSERT(MUTEX_HELD(&lfp->lf_lock));
789
790	vp = lfp->lf_vp;
791
792	size_iovecs = sizeof (struct iovec) * num_recs;
793	iovp = (struct iovec *)kmem_alloc(size_iovecs, KM_NOSLEEP);
794
795	if (iovp == NULL) {
796		error = ENOMEM;
797		goto out;
798	}
799
800	/* Build the iovec based on the list of log records */
801	i = 0;
802	len = 0;
803	lrp = lrp_writers;
804	do {
805		iovp[i].iov_base = lrp->log_record;
806		iovp[i].iov_len = lrp->size;
807		len += lrp->size;
808		lrp = lrp->next;
809		i++;
810	} while (lrp != lrp_writers);
811
812	ASSERT(i == num_recs);
813
814	uio.uio_iov = iovp;
815	uio.uio_iovcnt = num_recs;
816	uio.uio_loffset = 0;
817	uio.uio_segflg = (short)UIO_SYSSPACE;
818	uio.uio_resid = len;
819	uio.uio_llimit = (rlim64_t)MAXOFFSET_T;
820	uio.uio_fmode = FWRITE;
821	uio.uio_extflg = UIO_COPY_DEFAULT;
822
823	/*
824	 * Save the size. If the write fails, reset the size to avoid
825	 * corrupted log buffer files.
826	 */
827	va.va_mask = AT_SIZE;
828
829	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);  /* UIO_WRITE */
830	if ((error = VOP_GETATTR(vp, &va, 0, CRED(), NULL)) == 0) {
831		if ((len + va.va_size) < (MAXOFF32_T)) {
832			error = VOP_WRITE(vp, &uio, ioflag, CRED(), NULL);
833			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
834			if (uio.uio_resid)
835				error = ENOSPC;
836			if (error)
837				(void) VOP_SETATTR(vp, &va, 0, CRED(), NULL);
838		} else {
839			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
840			if (!(lfp->lf_flags & L_PRINTED)) {
841				cmn_err(CE_WARN,
842				    "NFS Logging: buffer file %s exceeds 2GB; "
843				    "stopped writing buffer \n", lfp->lf_path);
844			}
845			error = ENOSPC;
846		}
847	} else {
848		VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
849	}
850
851	kmem_free(iovp, size_iovecs);
852
853out:
854	if (error) {
855		if (!(lfp->lf_flags & L_PRINTED)) {
856			nfs_cmn_err(error, CE_WARN,
857			    "NFS Logging disabled for buffer %s - "
858			    "write error = %m\n", lfp->lf_path);
859			lfp->lf_flags |= L_PRINTED;
860		}
861	} else if (lfp->lf_flags & (L_ERROR | L_PRINTED)) {
862		lfp->lf_flags &= ~(L_ERROR | L_PRINTED);
863		cmn_err(CE_WARN,
864		    "NFS Logging re-enabled for buffer %s\n", lfp->lf_path);
865	}
866
867	return (error);
868}
869
870static void
871nfslog_free_logrecords(struct lr_alloc *lrp_writers)
872{
873	struct lr_alloc *lrp = lrp_writers;
874	struct lr_alloc *lrp_free;
875
876	do {
877		lrp_free = lrp;
878
879		lrp = lrp->next;
880
881		/*
882		 * Check to see if we are supposed to free this structure
883		 * and relese the log_buffer ref count.
884		 * It may be the case that the caller does not want this
885		 * structure and its record contents freed just yet.
886		 */
887		if ((lrp_free->lr_flags & LR_ALLOC_NOFREE) == 0) {
888			if (lrp_free->lb != NULL)
889				log_buffer_rele(lrp_free->lb);
890			if (lrp_free->alloc_cache) /* double check */
891				kmem_cache_free(lrp_free->alloc_cache,
892				    (void *)lrp_free);
893		} else {
894			/*
895			 * after being pulled from the list the
896			 * pointers need to be reinitialized.
897			 */
898			lrp_free->next = lrp_free;
899			lrp_free->prev = lrp_free;
900		}
901
902	} while (lrp != lrp_writers);
903}
904
905/*
906 * Rename lbp->lb_logfile to reflect the true name requested by 'share'
907 */
908static int
909nfslog_logbuffer_rename(struct log_buffer *lbp)
910{
911	struct log_file *lf;
912	int error;
913	struct log_file *logfile;
914
915	/*
916	 * Try our best to get the cache records into the log file
917	 * before the rename occurs.
918	 */
919	(void) nfslog_records_flush_to_disk(lbp);
920
921	/*
922	 * Hold lb_lock before retrieving
923	 * lb_logfile.
924	 * Hold a reference to the
925	 * "lf" structure. this is
926	 * same as LOG_FILE_HOLD()
927	 */
928	mutex_enter(&(lbp)->lb_lock);
929	lf = lbp->lb_logfile;
930	mutex_enter(&(lf)->lf_lock);
931	mutex_exit(&(lbp)->lb_lock);
932	lf->lf_refcnt++;
933	mutex_exit(&(lf)->lf_lock);
934
935	LOGGING_DPRINT((10, "nfslog_logbuffer_rename: renaming %s to %s\n",
936	    lf->lf_path, lbp->lb_path));
937
938	/*
939	 * rename the current buffer to what the daemon expects
940	 */
941	if (error = nfslog_logfile_rename(lf->lf_path, lbp->lb_path))
942		goto out;
943
944	/*
945	 * Create a new working buffer file and have all new data sent there.
946	 */
947	if (error = log_file_create(lbp->lb_path, &logfile)) {
948		/* Attempt to rename to original */
949		(void) nfslog_logfile_rename(lbp->lb_path, lf->lf_path);
950		goto out;
951	}
952
953	/*
954	 * Hold the lb_lock here, this will make
955	 * all the threads trying to access lb->logfile block
956	 * and get a new logfile structure instead of old one.
957	 */
958	mutex_enter(&(lbp)->lb_lock);
959	lbp->lb_logfile = logfile;
960	mutex_exit(&(lbp)->lb_lock);
961
962	LOG_FILE_RELE(lf);	/* release log_buffer's reference */
963
964	/*
965	 * Wait for log_file to be in a quiescent state before we
966	 * return to our caller to let it proceed with the reading of
967	 * this file.
968	 */
969	nfslog_logfile_wait(lf);
970
971out:
972	/*
973	 * Release our reference on "lf" in two different cases.
974	 * 1. Error condition, release only the reference
975	 *    that we held at the begining of this
976	 *    routine on "lf" structure.
977	 * 2. Fall through condition, no errors but the old
978	 *    logfile structure "lf" has been replaced with
979	 *    the new "logfile" structure, so release the
980	 *    reference that was part of the creation of
981	 *    "lf" structure to free up the resources.
982	 */
983
984	LOG_FILE_RELE(lf);
985
986	return (error);
987}
988
989/*
990 * Renames the 'from' file to 'new'.
991 */
992static int
993nfslog_logfile_rename(char *from, char *new)
994{
995	int error;
996
997	if (error = vn_rename(from, new, UIO_SYSSPACE)) {
998		cmn_err(CE_WARN,
999		    "nfslog_logfile_rename: couldn't rename %s to %s\n",
1000		    from, new);
1001	}
1002	return (error);
1003}
1004
1005/*
1006 * Wait for the log_file writers to finish before returning
1007 */
1008static void
1009nfslog_logfile_wait(struct log_file *lf)
1010{
1011	mutex_enter(&lf->lf_lock);
1012	while (lf->lf_writers > 0) {
1013		lf->lf_flags |= L_WAITING;
1014		(void) cv_wait_sig(&lf->lf_cv_waiters, &lf->lf_lock);
1015	}
1016	mutex_exit(&lf->lf_lock);
1017}
1018
1019static int
1020nfslog_record_append2all(struct lr_alloc *lrp)
1021{
1022	struct log_buffer *lbp, *nlbp;
1023	int error, ret_error = 0;
1024	int lr_flags = lrp->lr_flags;
1025
1026	rw_enter(&nfslog_buffer_list_lock, RW_READER);
1027	if ((lbp = nfslog_buffer_list) != NULL)
1028		LOG_BUFFER_HOLD(lbp);
1029	for (nlbp = NULL; lbp != NULL; lbp = nlbp) {
1030		if ((nlbp = lbp->lb_next) != NULL) {
1031			/*
1032			 * Remember next element in the list
1033			 */
1034			LOG_BUFFER_HOLD(nlbp);
1035		}
1036		rw_exit(&nfslog_buffer_list_lock);
1037
1038		/*
1039		 * Insert the record on the buffer's list to be written
1040		 * and then flush the records to the log file.
1041		 * Make sure to set the no free flag so that the
1042		 * record can be used for the next write
1043		 */
1044		lrp->lr_flags = LR_ALLOC_NOFREE;
1045
1046		ASSERT(lbp != NULL);
1047		mutex_enter(&lbp->lb_lock);
1048		if (lbp->lb_records == NULL) {
1049			lbp->lb_records = (caddr_t)lrp;
1050			lbp->lb_num_recs = 1;
1051			lbp->lb_size_queued = lrp->size;
1052		} else {
1053			insque(lrp, ((struct lr_alloc *)lbp->lb_records)->prev);
1054			lbp->lb_num_recs++;
1055			lbp->lb_size_queued += lrp->size;
1056		}
1057
1058		/*
1059		 * Flush log records to disk.
1060		 * Function is called with lb_lock held.
1061		 * Function drops the lb_lock on return.
1062		 */
1063		error = nfslog_records_flush_to_disk_nolock(lbp);
1064
1065		if (error) {
1066			ret_error = -1;
1067			nfs_cmn_err(error, CE_WARN,
1068			    "rfsl_log_pubfh: could not append record to "
1069			    "\"%s\" error = %m\n", lbp->lb_path);
1070		}
1071		log_buffer_rele(lbp);
1072		rw_enter(&nfslog_buffer_list_lock, RW_READER);
1073	}
1074	rw_exit(&nfslog_buffer_list_lock);
1075
1076	lrp->lr_flags = lr_flags;
1077
1078	return (ret_error);
1079}
1080
1081#ifdef DEBUG
1082static int logging_debug = 0;
1083
1084/*
1085 * 0) no debugging
1086 * 3) current test software
1087 * 10) random stuff
1088 */
1089void
1090nfslog_dprint(const int level, const char *fmt, ...)
1091{
1092	va_list args;
1093
1094	if (logging_debug == level ||
1095	    (logging_debug > 10 && (logging_debug - 10) >= level)) {
1096		va_start(args, fmt);
1097		(void) vprintf(fmt, args);
1098		va_end(args);
1099	}
1100}
1101
1102#endif /* DEBUG */
1103
1104/*
1105 * NFS Log Flush system call
1106 * Caller must check privileges.
1107 */
1108/* ARGSUSED */
1109int
1110nfsl_flush(struct nfsl_flush_args *args, model_t model)
1111{
1112	struct flush_thread_params *tparams;
1113	struct nfsl_flush_args *nfsl_args;
1114	int error = 0;
1115	ulong_t buffer_len;
1116	STRUCT_HANDLE(nfsl_flush_args, uap);
1117
1118	STRUCT_SET_HANDLE(uap, model, args);
1119
1120	tparams = (struct flush_thread_params *)
1121	    kmem_zalloc(sizeof (*tparams), KM_SLEEP);
1122
1123	nfsl_args = &tparams->tp_args;
1124	nfsl_args->version =  STRUCT_FGET(uap, version);
1125	if (nfsl_args->version != NFSL_FLUSH_ARGS_VERS) {
1126		cmn_err(CE_WARN, "nfsl_flush: exected version %d, got %d",
1127		    NFSL_FLUSH_ARGS_VERS, nfsl_args->version);
1128		return (EIO);
1129	}
1130
1131	nfsl_args->directive = STRUCT_FGET(uap, directive);
1132	if ((nfsl_args->directive & NFSL_ALL) == 0) {
1133		/*
1134		 * Process a specific buffer
1135		 */
1136		nfsl_args->buff_len = STRUCT_FGET(uap, buff_len);
1137
1138		nfsl_args->buff = (char *)
1139		    kmem_alloc(nfsl_args->buff_len, KM_NOSLEEP);
1140		if (nfsl_args->buff == NULL)
1141			return (ENOMEM);
1142
1143		error = copyinstr((const char *)STRUCT_FGETP(uap, buff),
1144		    nfsl_args->buff, nfsl_args->buff_len, &buffer_len);
1145		if (error)
1146			return (EFAULT);
1147
1148		if (nfsl_args->buff_len != buffer_len)
1149			return (EFAULT);
1150	}
1151
1152	LOGGING_DPRINT((10, "nfsl_flush: Flushing %s buffer(s)\n",
1153	    nfsl_args->directive & NFSL_ALL ? "all" : nfsl_args->buff));
1154
1155	if (nfsl_args->directive & NFSL_SYNC) {
1156		/*
1157		 * Do the work synchronously
1158		 */
1159		nfslog_do_flush(tparams);
1160		error = tparams->tp_error;
1161		kmem_free(nfsl_args->buff, nfsl_args->buff_len);
1162		kmem_free(tparams, sizeof (*tparams));
1163	} else {
1164		/*
1165		 * Do the work asynchronously
1166		 */
1167		(void) thread_create(NULL, 0, nfslog_do_flush,
1168		    tparams, 0, &p0, TS_RUN, minclsyspri);
1169	}
1170
1171	return (error);
1172}
1173
1174/*
1175 * This is where buffer flushing would occur, but there is no buffering
1176 * at this time.
1177 * Possibly rename the log buffer for processing.
1178 * Sets tparams->ta_error equal to the value of the error that occurred,
1179 * 0 otherwise.
1180 * Returns ENOENT if the buffer is not found.
1181 */
1182static void
1183nfslog_do_flush(struct flush_thread_params *tparams)
1184{
1185	struct nfsl_flush_args *args;
1186	struct log_buffer *lbp, *nlbp;
1187	int error = ENOENT;
1188	int found = 0;
1189	char *buf_inprog;	/* name of buff in progress */
1190	int buf_inprog_len;
1191
1192	/*
1193	 * Sanity check on the arguments.
1194	 */
1195	if (!tparams)
1196		return;
1197	args = &tparams->tp_args;
1198	if (!args)
1199		return;
1200
1201	rw_enter(&nfslog_buffer_list_lock, RW_READER);
1202	if ((lbp = nfslog_buffer_list) != NULL) {
1203		LOG_BUFFER_HOLD(lbp);
1204	}
1205	for (nlbp = NULL; lbp != NULL; lbp = nlbp) {
1206		if ((nlbp = lbp->lb_next) != NULL) {
1207			LOG_BUFFER_HOLD(nlbp);
1208		}
1209		rw_exit(&nfslog_buffer_list_lock);
1210		if (args->directive & NFSL_ALL) {
1211			(void) nfslog_records_flush_to_disk(lbp);
1212		} else {
1213			if ((strcmp(lbp->lb_path, args->buff) == 0) &&
1214			    (args->directive & NFSL_RENAME)) {
1215				error = nfslog_logbuffer_rename(lbp);
1216				found++;
1217				if (nlbp != NULL)
1218					log_buffer_rele(nlbp);
1219				log_buffer_rele(lbp);
1220				break;
1221			}
1222		}
1223		log_buffer_rele(lbp);
1224		rw_enter(&nfslog_buffer_list_lock, RW_READER);
1225	}
1226	if (!found)
1227		rw_exit(&nfslog_buffer_list_lock);
1228
1229	if (!found && ((args->directive & NFSL_ALL) == 0) &&
1230	    (args->directive & NFSL_RENAME)) {
1231		/*
1232		 * The specified buffer is not currently in use,
1233		 * simply rename the file indicated.
1234		 */
1235		buf_inprog_len = strlen(args->buff) +
1236		    strlen(LOG_INPROG_STRING) + 1;
1237		buf_inprog = (caddr_t)kmem_alloc(buf_inprog_len, KM_SLEEP);
1238		(void) sprintf(buf_inprog, "%s%s",
1239		    args->buff, LOG_INPROG_STRING);
1240
1241		error = nfslog_logfile_rename(buf_inprog, args->buff);
1242
1243		kmem_free(buf_inprog, buf_inprog_len);
1244	}
1245
1246out:
1247	if ((args->directive & NFSL_SYNC) == 0) {
1248		/*
1249		 * Work was performed asynchronously, the caller is
1250		 * no longer waiting for us.
1251		 * Free the thread arguments and exit.
1252		 */
1253		kmem_free(args->buff, args->buff_len);
1254		kmem_free(tparams, sizeof (*tparams));
1255		thread_exit();
1256		/* NOTREACHED */
1257	}
1258
1259	tparams->tp_error = error;
1260}
1261
1262/*
1263 * Generate buffer_header.
1264 * 'loghdr' points the the buffer_header, and *reclen
1265 * contains the length of the buffer.
1266 */
1267static void
1268create_buffer_header(caddr_t *loghdr, size_t *reclen, size_t *freesize)
1269{
1270	timestruc_t		now;
1271	nfslog_buffer_header	lh;
1272	XDR			xdrs;
1273	unsigned int		final_size;
1274
1275
1276	/* pick some size that will hold the buffer_header */
1277	*freesize = NFSLOG_SMALL_RECORD_SIZE;
1278
1279	/*
1280	 * Fill header
1281	 */
1282	lh.bh_length = 0;	/* don't know yet how large it will be */
1283	lh.bh_version = NFSLOG_BUF_VERSION;
1284	lh.bh_flags = 0;
1285	lh.bh_offset = 0;
1286	gethrestime(&now);
1287	TIMESPEC_TO_TIMESPEC32(&lh.bh_timestamp, &now);
1288
1289	/*
1290	 * Encode the header
1291	 */
1292	*loghdr = (caddr_t)kmem_alloc(*freesize, KM_SLEEP);
1293	xdrmem_create(&xdrs, *loghdr, *freesize, XDR_ENCODE);
1294
1295	(void) xdr_nfslog_buffer_header(&xdrs, &lh);
1296
1297	/*
1298	 * Reset with final size of the encoded data
1299	 */
1300	final_size = xdr_getpos(&xdrs);
1301	xdr_setpos(&xdrs, 0);
1302	(void) xdr_u_int(&xdrs, &final_size);
1303
1304	*reclen = (size_t)final_size;
1305}
1306
1307/*
1308 * ****************************************************************
1309 * RPC dispatch table for logging
1310 * Indexed by program, version, proc
1311 * Based on NFS dispatch table.
1312 */
1313struct nfslog_proc_disp {
1314	bool_t	(*xdrargs)();
1315	bool_t	(*xdrres)();
1316	bool_t	affects_transactions;	/* Operation affects transaction */
1317					/* processing */
1318};
1319
1320struct nfslog_vers_disp {
1321	int	nfslog_dis_nprocs;			/* number of procs */
1322	struct nfslog_proc_disp	*nfslog_dis_proc_table;	/* proc array */
1323};
1324
1325struct nfslog_prog_disp {
1326	int	nfslog_dis_prog;		/* program number */
1327	int	nfslog_dis_versmin;		/* Minimum version value */
1328	int	nfslog_dis_nvers;		/* Number of version values */
1329	struct nfslog_vers_disp	*nfslog_dis_vers_table;	/* versions array */
1330};
1331
1332static int rfs_log_bad = 0;	/* incremented on bad log attempts */
1333static int rfs_log_good = 0;	/* incremented on successful log attempts */
1334
1335/*
1336 * Define the actions taken per prog/vers/proc:
1337 *
1338 * In some cases, the nl types are the same as the nfs types and a simple
1339 * bcopy should suffice. Rather that define tens of identical procedures,
1340 * simply define these to bcopy. Similarly this takes care of different
1341 * procs that use same parameter struct.
1342 */
1343
1344static struct nfslog_proc_disp nfslog_proc_v2[] = {
1345	/*
1346	 * NFS VERSION 2
1347	 */
1348
1349	/* RFS_NULL = 0 */
1350	{xdr_void, xdr_void, FALSE},
1351
1352	/* RFS_GETATTR = 1 */
1353	{xdr_fhandle, xdr_nfslog_getattrres, FALSE},
1354
1355	/* RFS_SETATTR = 2 */
1356	{xdr_nfslog_setattrargs, xdr_nfsstat, TRUE},
1357
1358	/* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
1359	{xdr_void, xdr_void, FALSE},
1360
1361	/* RFS_LOOKUP = 4 */
1362	{xdr_nfslog_diropargs, xdr_nfslog_diropres, TRUE},
1363
1364	/* RFS_READLINK = 5 */
1365	{xdr_fhandle, xdr_nfslog_rdlnres, FALSE},
1366
1367	/* RFS_READ = 6 */
1368	{xdr_nfslog_nfsreadargs, xdr_nfslog_rdresult, TRUE},
1369
1370	/* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
1371	{xdr_void, xdr_void, FALSE},
1372
1373	/* RFS_WRITE = 8 */
1374	{xdr_nfslog_writeargs, xdr_nfslog_writeresult, TRUE},
1375
1376	/* RFS_CREATE = 9 */
1377	{xdr_nfslog_createargs, xdr_nfslog_diropres, TRUE},
1378
1379	/* RFS_REMOVE = 10 */
1380	{xdr_nfslog_diropargs, xdr_nfsstat, TRUE},
1381
1382	/* RFS_RENAME = 11 */
1383	{xdr_nfslog_rnmargs, xdr_nfsstat, TRUE},
1384
1385	/* RFS_LINK = 12 */
1386	{xdr_nfslog_linkargs, xdr_nfsstat, TRUE},
1387
1388	/* RFS_SYMLINK = 13 */
1389	{xdr_nfslog_symlinkargs, xdr_nfsstat, TRUE},
1390
1391	/* RFS_MKDIR = 14 */
1392	{xdr_nfslog_createargs, xdr_nfslog_diropres, TRUE},
1393
1394	/* RFS_RMDIR = 15 */
1395	{xdr_nfslog_diropargs, xdr_nfsstat, TRUE},
1396
1397	/* RFS_READDIR = 16 */
1398	{xdr_nfslog_rddirargs, xdr_nfslog_rddirres, TRUE},
1399
1400	/* RFS_STATFS = 17 */
1401	{xdr_fhandle, xdr_nfslog_statfs, FALSE},
1402};
1403
1404
1405/*
1406 * NFS VERSION 3
1407 */
1408
1409static struct nfslog_proc_disp nfslog_proc_v3[] = {
1410
1411	/* NFSPROC3_NULL = 0 */
1412	{xdr_void, xdr_void, FALSE},
1413
1414	/* NFSPROC3_GETATTR = 1 */
1415	{xdr_nfslog_nfs_fh3, xdr_nfslog_GETATTR3res, FALSE},
1416
1417	/* NFSPROC3_SETATTR = 2 */
1418	{xdr_nfslog_SETATTR3args, xdr_nfslog_SETATTR3res, TRUE},
1419
1420	/* NFSPROC3_LOOKUP = 3 */
1421	{xdr_nfslog_diropargs3, xdr_nfslog_LOOKUP3res, TRUE},
1422
1423	/* NFSPROC3_ACCESS = 4 */
1424	{xdr_nfslog_ACCESS3args, xdr_nfslog_ACCESS3res, FALSE},
1425
1426	/* NFSPROC3_READLINK = 5 */
1427	{xdr_nfslog_nfs_fh3, xdr_nfslog_READLINK3res, FALSE},
1428
1429	/* NFSPROC3_READ = 6 */
1430	{xdr_nfslog_READ3args, xdr_nfslog_READ3res, TRUE},
1431
1432	/* NFSPROC3_WRITE = 7 */
1433	{xdr_nfslog_WRITE3args, xdr_nfslog_WRITE3res, TRUE},
1434
1435	/* NFSPROC3_CREATE = 8 */
1436	{xdr_nfslog_CREATE3args, xdr_nfslog_CREATE3res, TRUE},
1437
1438	/* NFSPROC3_MKDIR = 9 */
1439	{xdr_nfslog_MKDIR3args, xdr_nfslog_MKDIR3res, TRUE},
1440
1441	/* NFSPROC3_SYMLINK = 10 */
1442	{xdr_nfslog_SYMLINK3args, xdr_nfslog_SYMLINK3res, TRUE},
1443
1444	/* NFSPROC3_MKNOD = 11 */
1445	{xdr_nfslog_MKNOD3args, xdr_nfslog_MKNOD3res, TRUE},
1446
1447	/* NFSPROC3_REMOVE = 12 */
1448	{xdr_nfslog_REMOVE3args, xdr_nfslog_REMOVE3res, TRUE},
1449
1450	/* NFSPROC3_RMDIR = 13 */
1451	{xdr_nfslog_RMDIR3args, xdr_nfslog_RMDIR3res, TRUE},
1452
1453	/* NFSPROC3_RENAME = 14 */
1454	{xdr_nfslog_RENAME3args, xdr_nfslog_RENAME3res, TRUE},
1455
1456	/* NFSPROC3_LINK = 15 */
1457	{xdr_nfslog_LINK3args, xdr_nfslog_LINK3res, TRUE},
1458
1459	/* NFSPROC3_READDIR = 16 */
1460	{xdr_nfslog_READDIR3args, xdr_nfslog_READDIR3res, TRUE},
1461
1462	/* NFSPROC3_READDIRPLUS = 17 */
1463	{xdr_nfslog_READDIRPLUS3args, xdr_nfslog_READDIRPLUS3res, TRUE},
1464
1465	/* NFSPROC3_FSSTAT = 18 */
1466	{xdr_nfslog_FSSTAT3args, xdr_nfslog_FSSTAT3res, FALSE},
1467
1468	/* NFSPROC3_FSINFO = 19 */
1469	{xdr_nfslog_FSINFO3args, xdr_nfslog_FSINFO3res, FALSE},
1470
1471	/* NFSPROC3_PATHCONF = 20 */
1472	{xdr_nfslog_PATHCONF3args, xdr_nfslog_PATHCONF3res, FALSE},
1473
1474	/* NFSPROC3_COMMIT = 21 */
1475	{xdr_nfslog_COMMIT3args, xdr_nfslog_COMMIT3res, FALSE},
1476};
1477
1478static struct nfslog_proc_disp nfslog_proc_v1[] = {
1479	/*
1480	 * NFSLOG VERSION 1
1481	 */
1482
1483	/* NFSLOG_NULL = 0 */
1484	{xdr_void, xdr_void, TRUE},
1485
1486	/* NFSLOG_SHARE = 1 */
1487	{xdr_nfslog_sharefsargs, xdr_nfslog_sharefsres, TRUE},
1488
1489	/* NFSLOG_UNSHARE = 2 */
1490	{xdr_nfslog_sharefsargs, xdr_nfslog_sharefsres, TRUE},
1491
1492	/* NFSLOG_LOOKUP = 3 */
1493	{xdr_nfslog_diropargs3, xdr_nfslog_LOOKUP3res, TRUE},
1494
1495	/* NFSLOG_GETFH = 4 */
1496	{xdr_nfslog_getfhargs, xdr_nfsstat, TRUE},
1497};
1498
1499static struct nfslog_vers_disp nfslog_vers_disptable[] = {
1500	{sizeof (nfslog_proc_v2) / sizeof (nfslog_proc_v2[0]),
1501	    nfslog_proc_v2},
1502	{sizeof (nfslog_proc_v3) / sizeof (nfslog_proc_v3[0]),
1503	    nfslog_proc_v3},
1504};
1505
1506static struct nfslog_vers_disp nfslog_nfslog_vers_disptable[] = {
1507	{sizeof (nfslog_proc_v1) / sizeof (nfslog_proc_v1[0]),
1508	    nfslog_proc_v1},
1509};
1510
1511static struct nfslog_prog_disp nfslog_dispatch_table[] = {
1512	{NFS_PROGRAM, NFS_VERSMIN,
1513		(sizeof (nfslog_vers_disptable) /
1514		sizeof (nfslog_vers_disptable[0])),
1515		nfslog_vers_disptable},
1516
1517	{NFSLOG_PROGRAM, NFSLOG_VERSMIN,
1518		(sizeof (nfslog_nfslog_vers_disptable) /
1519		sizeof (nfslog_nfslog_vers_disptable[0])),
1520		nfslog_nfslog_vers_disptable},
1521};
1522
1523static int	nfslog_dispatch_table_arglen = sizeof (nfslog_dispatch_table) /
1524					sizeof (nfslog_dispatch_table[0]);
1525
1526/*
1527 * This function will determine the appropriate export info struct to use
1528 * and allocate a record id to be used in the written log buffer.
1529 * Usually this is a straightforward operation but the existence of the
1530 * multicomponent lookup and its semantics of crossing file system
1531 * boundaries add to the complexity.  See the comments below...
1532 */
1533struct exportinfo *
1534nfslog_get_exi(
1535	struct exportinfo *exi,
1536	struct svc_req *req,
1537	caddr_t res,
1538	unsigned int *nfslog_rec_id)
1539{
1540	struct log_buffer *lb;
1541	struct exportinfo *exi_ret = NULL;
1542	fhandle_t		*fh;
1543	nfs_fh3			*fh3;
1544
1545	if (exi == NULL)
1546		return (NULL);
1547
1548	/*
1549	 * If the exi is marked for logging, allocate a record id and return
1550	 */
1551	if (exi->exi_export.ex_flags & EX_LOG) {
1552		lb = exi->exi_logbuffer;
1553
1554		/* obtain the unique record id for the caller */
1555		*nfslog_rec_id = atomic_add_32_nv(&lb->lb_rec_id, (int32_t)1);
1556
1557		/*
1558		 * The caller will expect to be able to exi_rele() it,
1559		 * so exi->exi_count must be incremented before it can
1560		 * be returned, to make it uniform with exi_ret->exi_count
1561		 */
1562		mutex_enter(&exi->exi_lock);
1563		exi->exi_count++;
1564		mutex_exit(&exi->exi_lock);
1565
1566		return (exi);
1567	}
1568
1569	if (exi != exi_public)
1570		return (NULL);
1571
1572	/*
1573	 * Here we have an exi that is not marked for logging.
1574	 * It is possible that this request is a multicomponent lookup
1575	 * that was done from the public file handle (not logged) and
1576	 * the resulting file handle being returned to the client exists
1577	 * in a file system that is being logged.  If this is the case
1578	 * we need to log this multicomponent lookup to the appropriate
1579	 * log buffer.  This will allow for the appropriate path name
1580	 * mapping to occur at user level.
1581	 */
1582	if (req->rq_prog == NFS_PROGRAM) {
1583		switch (req->rq_vers) {
1584		case NFS_V3:
1585			if ((req->rq_proc == NFSPROC3_LOOKUP) &&
1586			    (((LOOKUP3res *)res)->status == NFS3_OK)) {
1587				fh3 = &((LOOKUP3res *)res)->res_u.ok.object;
1588				exi_ret = checkexport(&fh3->fh3_fsid,
1589				    FH3TOXFIDP(fh3));
1590			}
1591			break;
1592
1593		case NFS_VERSION:
1594			if ((req->rq_proc == RFS_LOOKUP) &&
1595			    (((struct nfsdiropres *)
1596			    res)->dr_status == NFS_OK)) {
1597				fh =  &((struct nfsdiropres *)res)->
1598				    dr_u.dr_drok_u.drok_fhandle;
1599				exi_ret = checkexport(&fh->fh_fsid,
1600				    (fid_t *)&fh->fh_xlen);
1601			}
1602			break;
1603		default:
1604			break;
1605		}
1606	}
1607
1608	if (exi_ret != NULL && exi_ret->exi_export.ex_flags & EX_LOG) {
1609		lb = exi_ret->exi_logbuffer;
1610		/* obtain the unique record id for the caller */
1611		*nfslog_rec_id = atomic_add_32_nv(&lb->lb_rec_id, (int32_t)1);
1612
1613		return (exi_ret);
1614	}
1615	return (NULL);
1616}
1617
1618#ifdef DEBUG
1619static long long rfslog_records_ignored = 0;
1620#endif
1621
1622/*
1623 * nfslog_write_record - Fill in the record buffer for writing out.
1624 * If logrecp is null, log it, otherwise, malloc the record and return it.
1625 *
1626 * It is the responsibility of the caller to check whether this exportinfo
1627 * has logging enabled.
1628 * Note that nfslog_share_public_record() only needs to check for the
1629 * existence of at least one logbuffer to which the public filehandle record
1630 * needs to be logged.
1631 */
1632void
1633nfslog_write_record(struct exportinfo *exi, struct svc_req *req,
1634	caddr_t args, caddr_t res, cred_t *cr, struct netbuf *pnb,
1635	unsigned int record_id, unsigned int which_buffers)
1636{
1637	struct nfslog_prog_disp	*progtable;	/* prog struct */
1638	struct nfslog_vers_disp	*verstable;	/* version struct */
1639	struct nfslog_proc_disp	*disp = NULL;	/* proc struct */
1640	int			i, vers;
1641	void			*log_cookie;	/* for logrecord if */
1642	caddr_t			buffer;
1643	XDR			xdrs;
1644	unsigned int		final_size;
1645	int			encode_ok;
1646	int			alloc_indx;
1647
1648	ASSERT(exi != NULL); ASSERT(req != NULL); ASSERT(args != NULL);
1649	ASSERT(res != NULL); ASSERT(cr != NULL);
1650
1651	/*
1652	 * Find program element
1653	 * Search the list since program can not be used as index
1654	 */
1655	for (i = 0; (i < nfslog_dispatch_table_arglen); i++) {
1656		if (req->rq_prog == nfslog_dispatch_table[i].nfslog_dis_prog)
1657			break;
1658	}
1659	if (i >= nfslog_dispatch_table_arglen) {	/* program not logged */
1660		/* not an error */
1661		return;
1662	}
1663
1664	/*
1665	 * Extract the dispatch functions based on program/version
1666	 */
1667	progtable = &nfslog_dispatch_table[i];
1668	vers = req->rq_vers - progtable->nfslog_dis_versmin;
1669	verstable = &progtable->nfslog_dis_vers_table[vers];
1670	disp = &verstable->nfslog_dis_proc_table[req->rq_proc];
1671
1672	if (!(exi->exi_export.ex_flags & EX_LOG_ALLOPS) &&
1673	    !disp->affects_transactions) {
1674		/*
1675		 * Only interested in logging operations affecting
1676		 * transaction generation. This is not one of them.
1677		 */
1678#ifdef DEBUG
1679		rfslog_records_ignored++;
1680#endif
1681		return;
1682	}
1683
1684	switch (req->rq_prog) {
1685	case NFS_PROGRAM:
1686		switch (req->rq_vers) {
1687		case NFS_V3:
1688			switch (req->rq_proc) {
1689			case NFSPROC3_READDIRPLUS:
1690				alloc_indx = MEDIUM_INDX;
1691				break;
1692			default:
1693				alloc_indx = SMALL_INDX;
1694				break;
1695			}
1696			break;
1697		default:
1698			alloc_indx = SMALL_INDX;
1699			break;
1700		}
1701		break;
1702	case NFSLOG_PROGRAM:
1703		alloc_indx = MEDIUM_INDX;
1704		break;
1705	default:
1706		alloc_indx = SMALL_INDX;
1707		break;
1708	}
1709
1710	do {
1711		encode_ok = FALSE;
1712
1713		/* Pick the size to alloc; end of the road - return */
1714		if (nfslog_mem_alloc[alloc_indx].size == (-1)) {
1715			cmn_err(CE_WARN,
1716			    "NFSLOG: unable to encode record - prog=%d "
1717			    "proc = %d", req->rq_prog, req->rq_proc);
1718			return;
1719		}
1720
1721		buffer = nfslog_record_alloc(exi, alloc_indx, &log_cookie, 0);
1722		if (buffer == NULL) {
1723			/* Error processing - no space alloced */
1724			rfs_log_bad++;
1725			cmn_err(CE_WARN, "NFSLOG: can't get record");
1726			return;
1727		}
1728
1729		xdrmem_create(&xdrs, buffer,
1730		    nfslog_mem_alloc[alloc_indx].size, XDR_ENCODE);
1731
1732		/*
1733		 * Encode the header, args and results of the record
1734		 */
1735		if (xdr_nfslog_request_record(&xdrs, exi, req, cr, pnb,
1736		    nfslog_mem_alloc[alloc_indx].size, record_id) &&
1737		    (*disp->xdrargs)(&xdrs, args) &&
1738		    (*disp->xdrres)(&xdrs, res)) {
1739				encode_ok = TRUE;
1740
1741				rfs_log_good++;
1742				/*
1743				 * Get the final size of the encoded
1744				 * data and insert that length at the
1745				 * beginning.
1746				 */
1747				final_size = xdr_getpos(&xdrs);
1748				xdr_setpos(&xdrs, 0);
1749				(void) xdr_u_int(&xdrs, &final_size);
1750		} else {
1751			/* Oops, the encode failed so we need to free memory */
1752			nfslog_record_put(log_cookie, 0, FALSE, which_buffers);
1753			alloc_indx++;
1754		}
1755
1756	} while (encode_ok == FALSE);
1757
1758
1759	/*
1760	 * Take the final log record and put it in the log file.
1761	 * This may be queued to the file internally and written
1762	 * later unless the last parameter is TRUE.
1763	 * If the record_id is 0 then this is most likely a share/unshare
1764	 * request and it should be written synchronously to the log file.
1765	 */
1766	nfslog_record_put(log_cookie,
1767	    final_size, (record_id == 0), which_buffers);
1768}
1769
1770static char *
1771get_publicfh_path(int *alloc_length)
1772{
1773	extern struct exportinfo *exi_public;
1774	char *pubpath;
1775
1776	rw_enter(&exported_lock, RW_READER);
1777
1778	*alloc_length = exi_public->exi_export.ex_pathlen + 1;
1779	pubpath = kmem_alloc(*alloc_length, KM_SLEEP);
1780
1781	(void) strcpy(pubpath, exi_public->exi_export.ex_path);
1782
1783	rw_exit(&exported_lock);
1784
1785	return (pubpath);
1786}
1787
1788static void
1789log_public_record(struct exportinfo *exi, cred_t *cr)
1790{
1791	struct svc_req	req;
1792	struct netbuf	nb = {0, 0, NULL};
1793	int free_length = 0;
1794	diropargs3 args;
1795	LOOKUP3res res;
1796
1797	bzero(&req, sizeof (req));
1798	req.rq_prog = NFSLOG_PROGRAM;
1799	req.rq_vers = NFSLOG_VERSION;
1800	req.rq_proc = NFSLOG_LOOKUP;
1801	req.rq_cred.oa_flavor = AUTH_NONE;
1802
1803	bzero(&args, sizeof (diropargs3));
1804	bzero(&res, sizeof (LOOKUP3res));
1805
1806	args.dir.fh3_length = 0;
1807	if ((args.name = get_publicfh_path(&free_length)) == NULL)
1808		return;
1809	args.dirp = &args.dir;
1810
1811	res.status = NFS3_OK;
1812	res.res_u.ok.object.fh3_length = 0;
1813
1814	/*
1815	 * Calling this function with the exi_public
1816	 * will have the effect of appending the record
1817	 * to each of the open log buffers
1818	 */
1819	nfslog_write_record(exi, &req,
1820	    (caddr_t)&args, (caddr_t)&res, cr, &nb, 0, NFSLOG_ALL_BUFFERS);
1821
1822	kmem_free(args.name, free_length);
1823}
1824
1825/*
1826 * nfslog_share_record - logs a share request.
1827 * This is not an NFS request, but we pretend here...
1828 */
1829void
1830nfslog_share_record(struct exportinfo *exi, cred_t *cr)
1831{
1832	struct svc_req	req;
1833	int		res = 0;
1834	struct netbuf	nb = {0, 0, NULL};
1835
1836	ASSERT(exi != NULL);
1837
1838	if (nfslog_buffer_list == NULL)
1839		return;
1840
1841	if (exi->exi_export.ex_flags & EX_LOG) {
1842		bzero(&req, sizeof (req));
1843		req.rq_prog = NFSLOG_PROGRAM;
1844		req.rq_vers = NFSLOG_VERSION;
1845		req.rq_proc = NFSLOG_SHARE;
1846		req.rq_cred.oa_flavor = AUTH_NONE;
1847		nfslog_write_record(exi, &req, (caddr_t)exi, (caddr_t)&res, cr,
1848		    &nb, 0, NFSLOG_ONE_BUFFER);
1849	}
1850
1851	log_public_record(exi, cr);
1852}
1853
1854/*
1855 * nfslog_unshare_record - logs an unshare request.
1856 * This is not an NFS request, but we pretend here...
1857 */
1858void
1859nfslog_unshare_record(struct exportinfo *exi, cred_t *cr)
1860{
1861	struct svc_req	req;
1862	int		res = 0;
1863	struct netbuf	nb = {0, 0, NULL};
1864
1865	ASSERT(exi != NULL);
1866	ASSERT(exi->exi_export.ex_flags & EX_LOG);
1867
1868	bzero(&req, sizeof (req));
1869	req.rq_prog = NFSLOG_PROGRAM;
1870	req.rq_vers = NFSLOG_VERSION;
1871	req.rq_proc = NFSLOG_UNSHARE;
1872	req.rq_cred.oa_flavor = AUTH_NONE;
1873	nfslog_write_record(exi, &req,
1874	    (caddr_t)exi, (caddr_t)&res, cr, &nb, 0, NFSLOG_ONE_BUFFER);
1875}
1876
1877
1878void
1879nfslog_getfh(struct exportinfo *exi,
1880	fhandle *fh,
1881	char *fname,
1882	enum uio_seg seg,
1883	cred_t *cr)
1884{
1885	struct svc_req	req;
1886	int		res = 0;
1887	struct netbuf	nb = {0, 0, NULL};
1888	int		error = 0;
1889	char		*namebuf;
1890	size_t		len;
1891	nfslog_getfhargs gfh;
1892
1893	ASSERT(exi != NULL);
1894	ASSERT(exi->exi_export.ex_flags & EX_LOG);
1895
1896	bzero(&req, sizeof (req));
1897	req.rq_prog = NFSLOG_PROGRAM;
1898	req.rq_vers = NFSLOG_VERSION;
1899	req.rq_proc = NFSLOG_GETFH;
1900	req.rq_cred.oa_flavor = AUTH_NONE;
1901
1902	namebuf = kmem_alloc(MAXPATHLEN + 4, KM_SLEEP);
1903	if (seg == UIO_USERSPACE) {
1904		error = copyinstr(fname, namebuf, MAXPATHLEN, &len);
1905	} else {
1906		error = copystr(fname, namebuf, MAXPATHLEN, &len);
1907	}
1908
1909	if (!error) {
1910		gfh.gfh_fh_buf = *fh;
1911		gfh.gfh_path = namebuf;
1912
1913		nfslog_write_record(exi, &req, (caddr_t)&gfh, (caddr_t)&res,
1914		    cr, &nb, 0, NFSLOG_ONE_BUFFER);
1915	}
1916	kmem_free(namebuf, MAXPATHLEN + 4);
1917}
1918