nfs4_client_debug.c revision 8863:94039d51dda4
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 *	Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 *	Use is subject to license terms.
24 */
25
26#include <sys/cred.h>
27#include <sys/kstat.h>
28#include <sys/list.h>
29#include <sys/systm.h>
30#include <sys/vfs.h>
31#include <sys/vnode.h>
32#include <sys/cmn_err.h>
33
34#include <nfs/nfs4_clnt.h>
35#include <nfs/rnode4.h>
36
37/*
38 * Recovery kstats
39 */
40typedef struct rkstat {
41	kstat_named_t	badhandle;
42	kstat_named_t	badowner;
43	kstat_named_t	clientid;
44	kstat_named_t	dead_file;
45	kstat_named_t	delay;
46	kstat_named_t	fail_relock;
47	kstat_named_t	file_diff;
48	kstat_named_t	no_grace;
49	kstat_named_t	not_responding;
50	kstat_named_t	opens_changed;
51	kstat_named_t	siglost;
52	kstat_named_t	unexp_action;
53	kstat_named_t	unexp_errno;
54	kstat_named_t	unexp_status;
55	kstat_named_t	wrongsec;
56	kstat_named_t	lost_state_bad_op;
57} rkstat_t;
58
59static rkstat_t rkstat_template = {
60	{ "badhandle",		KSTAT_DATA_ULONG },
61	{ "badowner",		KSTAT_DATA_ULONG },
62	{ "clientid",		KSTAT_DATA_ULONG },
63	{ "dead_file",		KSTAT_DATA_ULONG },
64	{ "delay",		KSTAT_DATA_ULONG },
65	{ "fail_relock",	KSTAT_DATA_ULONG },
66	{ "file_diff",		KSTAT_DATA_ULONG },
67	{ "no_grace",		KSTAT_DATA_ULONG },
68	{ "not_responding",	KSTAT_DATA_ULONG },
69	{ "opens_changed",	KSTAT_DATA_ULONG },
70	{ "siglost",		KSTAT_DATA_ULONG },
71	{ "unexp_action",	KSTAT_DATA_ULONG },
72	{ "unexp_errno",	KSTAT_DATA_ULONG },
73	{ "unexp_status",	KSTAT_DATA_ULONG },
74	{ "wrongsec",		KSTAT_DATA_ULONG },
75	{ "bad_op",		KSTAT_DATA_ULONG },
76};
77
78/* maximum number of messages allowed on the mi's mi_msg_list */
79int nfs4_msg_max = NFS4_MSG_MAX;
80#define	DEFAULT_LEASE	180
81
82/*
83 * Sets the appropiate fields of "ep", given "id" and various parameters.
84 * Assumes that ep's fields have been initialized to zero/null, except for
85 * re_type and mount point info, which are already set.
86 */
87static void
88set_event(nfs4_event_type_t id, nfs4_revent_t *ep, mntinfo4_t *mi,
89    rnode4_t *rp1, rnode4_t *rp2, uint_t count, pid_t pid, nfsstat4 nfs4_error,
90    char *server1, char *why, nfs4_tag_type_t tag1, nfs4_tag_type_t tag2,
91    seqid4 seqid1, seqid4 seqid2)
92{
93	int len;
94
95	switch (id) {
96	case RE_BAD_SEQID:
97		ep->re_mi = mi;
98
99		/* bad seqid'd file <path/component name> */
100		if (rp1 && rp1->r_svnode.sv_name)
101			ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
102		else
103			ep->re_char1 = NULL;
104		ep->re_rp1 = rp1;
105
106		/* for LOCK/LOCKU */
107		ep->re_pid = pid;
108
109		ep->re_stat4 = nfs4_error;
110		ep->re_tag1 = tag1;
111		ep->re_tag2 = tag2;
112		ep->re_seqid1 = seqid1;
113		ep->re_seqid2 = seqid2;
114		break;
115	case RE_BADHANDLE:
116		ASSERT(rp1 != NULL);
117
118		/* dead file <path/component name> */
119		if (rp1->r_svnode.sv_name)
120			ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
121		else
122			ep->re_char1 = NULL;
123		ep->re_rp1 = rp1;
124		break;
125	case RE_CLIENTID:
126		ep->re_mi = mi;
127
128		/* the error we failed with */
129		ep->re_uint = count;
130		ep->re_stat4 = nfs4_error;
131		break;
132	case RE_DEAD_FILE:
133		ASSERT(rp1 != NULL);
134
135		/* dead file <path/component name> */
136		if (rp1->r_svnode.sv_name)
137			ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
138		else
139			ep->re_char1 = NULL;
140		ep->re_rp1 = rp1;
141
142		/* why the file got killed */
143		if (why) {
144			len = strlen(why);
145			ep->re_char2 = kmem_alloc(len + 1, KM_SLEEP);
146			bcopy(why, ep->re_char2, len);
147			ep->re_char2[len] = '\0';
148		} else
149			ep->re_char2 = NULL;
150
151		ep->re_stat4 = nfs4_error;
152		break;
153	case RE_END:
154		/* first rnode */
155		if (rp1 && rp1->r_svnode.sv_name)
156			ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
157		else
158			ep->re_char1 = NULL;
159		ep->re_rp1 = rp1;
160
161		/* second rnode */
162		if (rp2 && rp2->r_svnode.sv_name)
163			ep->re_char2 = fn_path(rp2->r_svnode.sv_name);
164		else
165			ep->re_char2 = NULL;
166		ep->re_rp2 = rp2;
167
168		ep->re_mi = mi;
169		break;
170	case RE_FAIL_RELOCK:
171		ASSERT(rp1 != NULL);
172
173		/* error on fail relock */
174		ep->re_uint = count;
175
176		/* process that failed */
177		ep->re_pid = pid;
178
179		/* nfs4 error */
180		ep->re_stat4 = nfs4_error;
181
182		/* file <path/component name> */
183		if (rp1->r_svnode.sv_name)
184			ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
185		else
186			ep->re_char1 = NULL;
187		ep->re_rp1 = rp1;
188		break;
189	case RE_FAIL_REMAP_LEN:
190		/* length of returned filehandle */
191		ep->re_uint = count;
192		break;
193	case RE_FAIL_REMAP_OP:
194		break;
195	case RE_FAILOVER:
196		/* server we're failing over to (if not picking original) */
197		if (server1 != NULL) {
198			len = strlen(server1);
199			ep->re_char1 = kmem_alloc(len + 1, KM_SLEEP);
200			bcopy(server1, ep->re_char1, len);
201			ep->re_char1[len] = '\0';
202		} else {
203			ep->re_char1 = NULL;
204		}
205		break;
206	case RE_FILE_DIFF:
207		ASSERT(rp1 != NULL);
208
209		/* dead file <path/component name> */
210		if (rp1->r_svnode.sv_name)
211			ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
212		else
213			ep->re_char1 = NULL;
214		ep->re_rp1 = rp1;
215		break;
216	case RE_LOST_STATE:
217		ep->re_uint = count;		/* op number */
218		if (rp1 && rp1->r_svnode.sv_name)
219			ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
220		else
221			ep->re_char1 = NULL;
222		ep->re_rp1 = rp1;
223		if (rp2 && rp2->r_svnode.sv_name)
224			ep->re_char2 = fn_path(rp2->r_svnode.sv_name);
225		else
226			ep->re_char2 = NULL;
227		ep->re_rp2 = rp2;
228		break;
229	case RE_OPENS_CHANGED:
230		ep->re_mi = mi;
231
232		/* original number of open files */
233		ep->re_uint = count;
234		/* new number of open files */
235		ep->re_pid = pid;
236		break;
237	case RE_SIGLOST:
238	case RE_SIGLOST_NO_DUMP:
239		ASSERT(rp1 != NULL);
240
241		/* file <path/component name> */
242		if (rp1->r_svnode.sv_name)
243			ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
244		else
245			ep->re_char1 = NULL;
246		ep->re_rp1 = rp1;
247		ep->re_pid = pid;
248		ep->re_uint = count;
249		ep->re_stat4 = nfs4_error;
250		break;
251	case RE_START:
252		/* file <path/component name> */
253		if (rp1 && rp1->r_svnode.sv_name)
254			ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
255		else
256			ep->re_char1 = NULL;
257		ep->re_rp1 = rp1;
258
259		/* file <path/component name> */
260		if (rp2 && rp2->r_svnode.sv_name)
261			ep->re_char2 = fn_path(rp2->r_svnode.sv_name);
262		else
263			ep->re_char2 = NULL;
264		ep->re_rp2 = rp2;
265
266		ep->re_mi = mi;
267		ep->re_uint = count;
268		break;
269	case RE_UNEXPECTED_ACTION:
270	case RE_UNEXPECTED_ERRNO:
271		/* the error that is unexpected */
272		ep->re_uint = count;
273		break;
274	case RE_UNEXPECTED_STATUS:
275		/* nfsstat4 error */
276		ep->re_stat4 = nfs4_error;
277		break;
278	case RE_WRONGSEC:
279		/* the error we failed with */
280		ep->re_uint = count;
281
282		/* file <path/component name> */
283		if (rp1 && rp1->r_svnode.sv_name)
284			ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
285		else
286			ep->re_char1 = NULL;
287		ep->re_rp1 = rp1;
288
289		/* file <path/component name> */
290		if (rp2 && rp2->r_svnode.sv_name)
291			ep->re_char2 = fn_path(rp2->r_svnode.sv_name);
292		else
293			ep->re_char2 = NULL;
294		ep->re_rp2 = rp2;
295		break;
296	case RE_LOST_STATE_BAD_OP:
297		ep->re_uint = count;	/* the unexpected op */
298		ep->re_pid = pid;
299		ep->re_rp1 = rp1;
300		if (rp1 != NULL && rp1->r_svnode.sv_name != NULL)
301			ep->re_char1 = fn_path(rp1->r_svnode.sv_name);
302		ep->re_rp2 = rp2;
303		if (rp2 != NULL && rp2->r_svnode.sv_name != NULL)
304			ep->re_char2 = fn_path(rp2->r_svnode.sv_name);
305		break;
306	default:
307		break;
308	}
309}
310
311/*
312 * Sets the appropiate fields of the 'fact' for this 'id'.
313 */
314static void
315set_fact(nfs4_fact_type_t id, nfs4_rfact_t *fp, nfsstat4 stat4,
316    nfs4_recov_t raction, nfs_opnum4 op, bool_t reboot, int error,
317    vnode_t *vp)
318{
319	rnode4_t *rp1;
320
321	switch (id) {
322	case RF_BADOWNER:
323		fp->rf_op = op;
324		fp->rf_reboot = reboot;
325		fp->rf_stat4 = stat4;
326		break;
327	case RF_RENEW_EXPIRED:
328		break;
329	case RF_ERR:
330		fp->rf_op = op;
331		fp->rf_reboot = reboot;
332		fp->rf_stat4 = stat4;
333		fp->rf_action = raction;
334		fp->rf_error = error;
335		break;
336	case RF_SRV_OK:
337		break;
338	case RF_SRV_NOT_RESPOND:
339		break;
340	case RF_SRVS_OK:
341		break;
342	case RF_SRVS_NOT_RESPOND:
343		gethrestime(&fp->rf_time);
344		break;
345	case RF_DELMAP_CB_ERR:
346		fp->rf_op = op;
347		fp->rf_stat4 = stat4;
348
349		rp1 = VTOR4(vp);
350		fp->rf_rp1 = rp1;
351		if (rp1 && rp1->r_svnode.sv_name)
352			fp->rf_char1 = fn_path(rp1->r_svnode.sv_name);
353		else
354			fp->rf_char1 = NULL;
355		break;
356	default:
357		zcmn_err(getzoneid(), CE_NOTE, "illegal fact %d", id);
358		break;
359	}
360}
361
362/*
363 * Returns 1 if the event/fact is of a successful communication
364 * from the server; 0 otherwise.
365 */
366static int
367successful_comm(nfs4_debug_msg_t *msgp)
368{
369	if (msgp->msg_type == RM_EVENT) {
370		switch (msgp->rmsg_u.msg_event.re_type) {
371		case RE_BAD_SEQID:
372		case RE_BADHANDLE:
373		case RE_FAIL_REMAP_LEN:
374		case RE_FAIL_REMAP_OP:
375		case RE_FILE_DIFF:
376		case RE_START:
377		case RE_UNEXPECTED_ACTION:
378		case RE_UNEXPECTED_ERRNO:
379		case RE_UNEXPECTED_STATUS:
380		case RE_WRONGSEC:
381			return (1);
382		case RE_CLIENTID:
383		case RE_DEAD_FILE:
384		case RE_END:
385		case RE_FAIL_RELOCK:
386		case RE_FAILOVER:
387		case RE_LOST_STATE:
388		case RE_OPENS_CHANGED:
389		case RE_SIGLOST:
390		case RE_SIGLOST_NO_DUMP:
391		case RE_LOST_STATE_BAD_OP:
392			return (0);
393		default:
394			return (0);
395		}
396	} else {
397		switch (msgp->rmsg_u.msg_fact.rf_type) {
398		case RF_BADOWNER:
399		case RF_ERR:
400		case RF_RENEW_EXPIRED:
401		case RF_SRV_OK:
402		case RF_SRVS_OK:
403		case RF_DELMAP_CB_ERR:
404			return (1);
405		case RF_SRV_NOT_RESPOND:
406		case RF_SRVS_NOT_RESPOND:
407			return (0);
408		default:
409			return (0);
410		}
411	}
412}
413
414/*
415 * Iterate backwards through the mi's mi_msg_list to find the earliest
416 * message that we should find relevant facts to investigate.
417 */
418static nfs4_debug_msg_t *
419find_beginning(nfs4_debug_msg_t *first_msg, mntinfo4_t *mi)
420{
421	nfs4_debug_msg_t	*oldest_msg, *cur_msg;
422	time_t			lease;
423
424	ASSERT(mutex_owned(&mi->mi_msg_list_lock));
425	if (mi->mi_lease_period > 0)
426		lease = 2 * mi->mi_lease_period;
427	else
428		lease = DEFAULT_LEASE;
429
430	oldest_msg = first_msg;
431	cur_msg = list_prev(&mi->mi_msg_list, first_msg);
432	while (cur_msg &&
433	    first_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) {
434		oldest_msg = cur_msg;
435		if ((cur_msg->msg_type == RM_FACT) &&
436		    (cur_msg->rmsg_u.msg_fact.rf_type == RF_SRV_OK)) {
437			/* find where we lost contact with the server */
438			while (cur_msg) {
439				if ((cur_msg->msg_type == RM_FACT) &&
440				    (cur_msg->rmsg_u.msg_fact.rf_type ==
441				    RF_SRV_NOT_RESPOND))
442					break;
443				oldest_msg = cur_msg;
444				cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
445			}
446			/*
447			 * Find the first successful message before
448			 * we lost contact with the server.
449			 */
450			if (cur_msg) {
451				cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
452				while (cur_msg && !successful_comm(cur_msg)) {
453					oldest_msg = cur_msg;
454					cur_msg = list_prev(&mi->mi_msg_list,
455					    cur_msg);
456				}
457			}
458			/*
459			 * If we're not at the dummy head pointer,
460			 * set the oldest and current message.
461			 */
462			if (cur_msg) {
463				first_msg = cur_msg;
464				oldest_msg = cur_msg;
465				cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
466			}
467		} else
468			cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
469	}
470
471	return (oldest_msg);
472}
473
474/*
475 * Returns 1 if facts have been found; 0 otherwise.
476 */
477static int
478get_facts(nfs4_debug_msg_t *msgp, nfs4_rfact_t *ret_fp, char **mnt_pt,
479mntinfo4_t *mi)
480{
481	nfs4_debug_msg_t	*cur_msg, *oldest_msg;
482	nfs4_rfact_t		*cur_fp;
483	int			found_a_fact = 0;
484	int			len;
485
486	cur_msg = msgp;
487
488	/* find the oldest msg to search backwards to */
489	oldest_msg = find_beginning(cur_msg, mi);
490	ASSERT(oldest_msg != NULL);
491
492	/*
493	 * Create a fact sheet by searching from our current message
494	 * backwards to the 'oldest_msg', recording facts along the way
495	 * until we found facts that have been inspected by another time.
496	 */
497	while (cur_msg && cur_msg != list_prev(&mi->mi_msg_list, oldest_msg)) {
498		if (cur_msg->msg_type != RM_FACT) {
499			cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
500			continue;
501		}
502
503		cur_fp = &cur_msg->rmsg_u.msg_fact;
504		/*
505		 * If this fact has already been looked at, then so
506		 * have all preceding facts.  Return Now.
507		 */
508		if (cur_fp->rf_status == RFS_INSPECT)
509			return (found_a_fact);
510
511		cur_fp->rf_status = RFS_INSPECT;
512		found_a_fact = 1;
513		switch (cur_fp->rf_type) {
514		case RF_BADOWNER:
515			break;
516		case RF_ERR:
517			/*
518			 * Don't want to overwrite a fact that was
519			 * previously found during our current search.
520			 */
521			if (!ret_fp->rf_reboot)
522				ret_fp->rf_reboot = cur_fp->rf_reboot;
523			if (!ret_fp->rf_stat4)
524				ret_fp->rf_stat4 = cur_fp->rf_stat4;
525			if (!ret_fp->rf_action)
526				ret_fp->rf_action = cur_fp->rf_action;
527			break;
528		case RF_RENEW_EXPIRED:
529			if (cur_msg->msg_mntpt && !(*mnt_pt)) {
530				len = strlen(cur_msg->msg_mntpt) + 1;
531				*mnt_pt = kmem_alloc(len, KM_SLEEP);
532				bcopy(cur_msg->msg_mntpt, *mnt_pt, len);
533			}
534			break;
535		case RF_SRV_OK:
536			break;
537		case RF_SRV_NOT_RESPOND:
538			/*
539			 * Okay to overwrite this fact as
540			 * we want the earliest time.
541			 */
542			ret_fp->rf_time = cur_fp->rf_time;
543			break;
544		case RF_SRVS_OK:
545			break;
546		case RF_SRVS_NOT_RESPOND:
547			break;
548		case RF_DELMAP_CB_ERR:
549			break;
550		default:
551			zcmn_err(getzoneid(), CE_NOTE,
552			    "get facts: illegal fact %d", cur_fp->rf_type);
553			break;
554		}
555		cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
556	}
557
558	return (found_a_fact);
559}
560
561/*
562 * Returns 1 if this fact is identical to the last fact recorded
563 * (only checks for a match within the last 2 lease periods).
564 */
565static int
566facts_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg,
567    mntinfo4_t *mi)
568{
569	nfs4_rfact_t	*fp1, *fp2;
570	int		lease, len;
571
572	ASSERT(mutex_owned(&mi->mi_msg_list_lock));
573	if (mi->mi_lease_period > 0)
574		lease = 2 * mi->mi_lease_period;
575	else
576		lease = DEFAULT_LEASE;
577
578	fp2 = &new_msg->rmsg_u.msg_fact;
579
580	while (cur_msg &&
581	    new_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) {
582		if (cur_msg->msg_type != RM_FACT) {
583			cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
584			continue;
585		}
586		fp1 = &cur_msg->rmsg_u.msg_fact;
587		if (fp1->rf_type != fp2->rf_type)
588			return (0);
589
590		/* now actually compare the facts */
591		if (fp1->rf_action != fp2->rf_action)
592			return (0);
593		if (fp1->rf_stat4 != fp2->rf_stat4)
594			return (0);
595		if (fp1->rf_reboot != fp2->rf_reboot)
596			return (0);
597		if (fp1->rf_op != fp2->rf_op)
598			return (0);
599		if (fp1->rf_time.tv_sec != fp2->rf_time.tv_sec)
600			return (0);
601		if (fp1->rf_error != fp2->rf_error)
602			return (0);
603		if (fp1->rf_rp1 != fp2->rf_rp1)
604			return (0);
605		if (cur_msg->msg_srv != NULL) {
606			if (new_msg->msg_srv == NULL)
607				return (0);
608			len = strlen(cur_msg->msg_srv);
609			if (strncmp(cur_msg->msg_srv, new_msg->msg_srv,
610			    len) != 0)
611				return (0);
612		} else if (new_msg->msg_srv != NULL) {
613			return (0);
614		}
615		if (cur_msg->msg_mntpt != NULL) {
616			if (new_msg->msg_mntpt == NULL)
617				return (0);
618			len = strlen(cur_msg->msg_mntpt);
619			if (strncmp(cur_msg->msg_mntpt, new_msg->msg_mntpt,
620			    len) != 0)
621				return (0);
622		} else if (new_msg->msg_mntpt != NULL) {
623			return (0);
624		}
625		if (fp1->rf_char1 != NULL) {
626			if (fp2->rf_char1 == NULL)
627				return (0);
628			len = strlen(fp1->rf_char1);
629			if (strncmp(fp1->rf_char1, fp2->rf_char1, len) != 0)
630				return (0);
631		} else if (fp2->rf_char1 != NULL) {
632			return (0);
633		}
634		return (1);
635	}
636
637	return (0);
638}
639
640/*
641 * Returns 1 if these two messages are identical; 0 otherwise.
642 */
643static int
644events_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg,
645    mntinfo4_t *mi)
646{
647	nfs4_revent_t	*ep1, *ep2;
648	int		len;
649
650	/* find the last event, bypassing all facts */
651	while (cur_msg && cur_msg->msg_type != RM_EVENT)
652		cur_msg = list_prev(&mi->mi_msg_list, cur_msg);
653
654	if (!cur_msg)
655		return (0);
656
657	if (cur_msg->msg_type != RM_EVENT)
658		return (0);
659
660	ep1 = &cur_msg->rmsg_u.msg_event;
661	ep2 = &new_msg->rmsg_u.msg_event;
662	if (ep1->re_type != ep2->re_type)
663		return (0);
664
665	/*
666	 * Since we zalloc the buffer, then the two nfs4_debug_msg's
667	 * must match up even if all the fields weren't filled in
668	 * the first place.
669	 */
670	if (ep1->re_mi != ep2->re_mi)
671		return (0);
672	if (ep1->re_uint != ep2->re_uint)
673		return (0);
674	if (ep1->re_stat4 != ep2->re_stat4)
675		return (0);
676	if (ep1->re_pid != ep2->re_pid)
677		return (0);
678	if (ep1->re_rp1 != ep2->re_rp1)
679		return (0);
680	if (ep1->re_rp2 != ep2->re_rp2)
681		return (0);
682	if (ep1->re_tag1 != ep2->re_tag1)
683		return (0);
684	if (ep1->re_tag2 != ep2->re_tag2)
685		return (0);
686	if (ep1->re_seqid1 != ep2->re_seqid1)
687		return (0);
688	if (ep1->re_seqid2 != ep2->re_seqid2)
689		return (0);
690
691	if (cur_msg->msg_srv != NULL) {
692		if (new_msg->msg_srv == NULL)
693			return (0);
694		len = strlen(cur_msg->msg_srv);
695		if (strncmp(cur_msg->msg_srv, new_msg->msg_srv, len) != 0)
696			return (0);
697	} else if (new_msg->msg_srv != NULL) {
698		return (0);
699	}
700
701	if (ep1->re_char1 != NULL) {
702		if (ep2->re_char1 == NULL)
703			return (0);
704		len = strlen(ep1->re_char1);
705		if (strncmp(ep1->re_char1, ep2->re_char1, len) != 0)
706			return (0);
707	} else if (ep2->re_char1 != NULL) {
708		return (0);
709	}
710
711	if (ep1->re_char2 != NULL) {
712		if (ep2->re_char2 == NULL)
713			return (0);
714		len = strlen(ep1->re_char2);
715		if (strncmp(ep1->re_char2, ep2->re_char2, len) != 0)
716			return (0);
717	} else if (ep2->re_char2 != NULL) {
718		return (0);
719	}
720
721	if (cur_msg->msg_mntpt != NULL) {
722		if (new_msg->msg_mntpt == NULL)
723			return (0);
724		len = strlen(cur_msg->msg_mntpt);
725		if (strncmp(cur_msg->msg_mntpt, cur_msg->msg_mntpt, len) != 0)
726			return (0);
727	} else if (new_msg->msg_mntpt != NULL) {
728		return (0);
729	}
730
731	return (1);
732}
733
734/*
735 * Free up a recovery event.
736 */
737static void
738free_event(nfs4_revent_t *ep)
739{
740	int	len;
741
742	if (ep->re_char1) {
743		len = strlen(ep->re_char1) + 1;
744		kmem_free(ep->re_char1, len);
745	}
746	if (ep->re_char2) {
747		len = strlen(ep->re_char2) + 1;
748		kmem_free(ep->re_char2, len);
749	}
750}
751
752/*
753 * Free up a recovery fact.
754 */
755static void
756free_fact(nfs4_rfact_t *fp)
757{
758	int	len;
759
760	if (fp->rf_char1) {
761		len = strlen(fp->rf_char1) + 1;
762		kmem_free(fp->rf_char1, len);
763	}
764}
765
766/*
767 * Free up the message.
768 */
769void
770nfs4_free_msg(nfs4_debug_msg_t *msg)
771{
772	int len;
773
774	if (msg->msg_type == RM_EVENT)
775		free_event(&msg->rmsg_u.msg_event);
776	else
777		free_fact(&msg->rmsg_u.msg_fact);
778
779	if (msg->msg_srv) {
780		len = strlen(msg->msg_srv) + 1;
781		kmem_free(msg->msg_srv, len);
782	}
783
784	if (msg->msg_mntpt) {
785		len = strlen(msg->msg_mntpt) + 1;
786		kmem_free(msg->msg_mntpt, len);
787	}
788
789	/* free up the data structure itself */
790	kmem_free(msg, sizeof (*msg));
791}
792
793/*
794 * Prints out the interesting facts for recovery events:
795 * -DEAD_FILE
796 * -SIGLOST(_NO_DUMP)
797 */
798static void
799print_facts(nfs4_debug_msg_t *msg, mntinfo4_t *mi)
800{
801	nfs4_rfact_t *fp;
802	char *mount_pt;
803	int len;
804
805	if (msg->rmsg_u.msg_event.re_type != RE_DEAD_FILE &&
806	    msg->rmsg_u.msg_event.re_type != RE_SIGLOST &&
807	    msg->rmsg_u.msg_event.re_type != RE_SIGLOST_NO_DUMP)
808		return;
809
810	fp = kmem_zalloc(sizeof (*fp), KM_SLEEP);
811	mount_pt = NULL;
812
813	if (get_facts(msg, fp, &mount_pt, mi)) {
814		char	time[256];
815
816
817		if (fp->rf_time.tv_sec)
818			(void) snprintf(time, 256, "%ld",
819			    (gethrestime_sec() - fp->rf_time.tv_sec)/60);
820		zcmn_err(mi->mi_zone->zone_id, CE_NOTE,
821		    "!NFS4 FACT SHEET: %s%s %s%s %s %s%s%s %s%s",
822		    fp->rf_action ? "\n Action: " : "",
823		    fp->rf_action ? nfs4_recov_action_to_str(fp->rf_action) :
824		    "",
825		    fp->rf_stat4 ? "\n NFS4 error: " : "",
826		    fp->rf_stat4 ? nfs4_stat_to_str(fp->rf_stat4) : "",
827		    fp->rf_reboot ? "\n Suspected server reboot. " : "",
828		    fp->rf_time.tv_sec ? "\n Server was down for " : "",
829		    fp->rf_time.tv_sec ? time : "",
830		    fp->rf_time.tv_sec ? " minutes." : "",
831		    mount_pt ? " \n Client's lease expired on mount " : "",
832		    mount_pt ? mount_pt : "");
833	}
834
835	if (mount_pt) {
836		len = strlen(mount_pt) + 1;
837		kmem_free(mount_pt, len);
838	}
839
840	/* free the fact struct itself */
841	if (fp)
842		kmem_free(fp, sizeof (*fp));
843}
844
845/*
846 * Print an event message to /var/adm/messages
847 * The last argument to this fuction dictates the repeat status
848 * of the event. If set to 1, it means that we are dumping this
849 * event and it will _never_ be printed after this time. Else if
850 * set to 0 it will be printed again.
851 */
852static void
853queue_print_event(nfs4_debug_msg_t *msg, mntinfo4_t *mi, int dump)
854{
855	nfs4_revent_t		*ep;
856	zoneid_t		zoneid;
857
858	ep = &msg->rmsg_u.msg_event;
859	zoneid = mi->mi_zone->zone_id;
860
861	switch (ep->re_type) {
862	case RE_BAD_SEQID:
863		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
864		    "Operation %s for file %s (rnode_pt 0x%p), pid %d using "
865		    "seqid %d got %s.  Last good seqid was %d for "
866		    "operation %s.",
867		    msg->msg_srv, msg->msg_mntpt,
868		    nfs4_ctags[ep->re_tag1].ct_str, ep->re_char1,
869		    (void *)ep->re_rp1, ep->re_pid, ep->re_seqid1,
870		    nfs4_stat_to_str(ep->re_stat4), ep->re_seqid2,
871		    nfs4_ctags[ep->re_tag2].ct_str);
872		break;
873	case RE_BADHANDLE:
874		ASSERT(ep->re_rp1 != NULL);
875		if (ep->re_char1 != NULL) {
876			zcmn_err(zoneid, CE_NOTE,
877			    "![NFS4][Server: %s][Mntpt: %s]"
878			    "server %s said filehandle was "
879			    "invalid for file: %s (rnode_pt 0x%p) on mount %s",
880			    msg->msg_srv, msg->msg_mntpt, msg->msg_srv,
881			    ep->re_char1, (void *)ep->re_rp1, msg->msg_mntpt);
882		} else {
883			zcmn_err(zoneid, CE_NOTE,
884			    "![NFS4][Server: %s][Mntpt: %s]"
885			    "server %s said filehandle was "
886			    "invalid for file: (rnode_pt 0x%p) on mount %s"
887			    " for fh:", msg->msg_srv, msg->msg_mntpt,
888			    msg->msg_srv, (void *)ep->re_rp1, msg->msg_mntpt);
889			sfh4_printfhandle(ep->re_rp1->r_fh);
890		}
891		break;
892	case RE_CLIENTID:
893		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
894		    "Can't recover clientid on mount point %s "
895		    "(mi 0x%p) due to error %d (%s), for server %s.  Marking "
896		    "file system as unusable.",
897		    msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt,
898		    (void *)ep->re_mi, ep->re_uint,
899		    nfs4_stat_to_str(ep->re_stat4),
900		    msg->msg_srv);
901		break;
902	case RE_DEAD_FILE:
903		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
904		    "File %s (rnode_pt: %p) was closed due to NFS "
905		    "recovery error on server %s(%s %s)", msg->msg_srv,
906		    msg->msg_mntpt, ep->re_char1, (void *)ep->re_rp1,
907		    msg->msg_srv, ep->re_char2 ? ep->re_char2 : "",
908		    ep->re_stat4 ? nfs4_stat_to_str(ep->re_stat4) : "");
909		break;
910	case RE_END:
911		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
912		    "NFS Recovery done for mount %s (mi 0x%p) "
913		    "on server %s, rnode_pt1 %s (0x%p), "
914		    "rnode_pt2 %s (0x%p)", msg->msg_srv, msg->msg_mntpt,
915		    msg->msg_mntpt, (void *)ep->re_mi, msg->msg_srv,
916		    ep->re_char1, (void *)ep->re_rp1, ep->re_char2,
917		    (void *)ep->re_rp2);
918		break;
919	case RE_FAIL_RELOCK:
920		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
921		    "Couldn't reclaim lock for pid %d for "
922		    "file %s (rnode_pt 0x%p) on (server %s): error %d",
923		    msg->msg_srv, msg->msg_mntpt, ep->re_pid, ep->re_char1,
924		    (void *)ep->re_rp1, msg->msg_srv,
925		    ep->re_uint ? ep->re_uint : ep->re_stat4);
926		break;
927	case RE_FAIL_REMAP_LEN:
928		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
929		    "remap_lookup: server %s returned bad "
930		    "fhandle length (%d)", msg->msg_srv, msg->msg_mntpt,
931		    msg->msg_srv, ep->re_uint);
932		break;
933	case RE_FAIL_REMAP_OP:
934		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
935		    "remap_lookup: didn't get expected OP_GETFH"
936		    " for server %s", msg->msg_srv, msg->msg_mntpt,
937		    msg->msg_srv);
938		break;
939	case RE_FAILOVER:
940		if (ep->re_char1)
941			zcmn_err(zoneid, CE_NOTE,
942			    "![NFS4][Server: %s][Mntpt: %s]"
943			    "failing over from %s to %s", msg->msg_srv,
944			    msg->msg_mntpt, msg->msg_srv, ep->re_char1);
945		else
946			zcmn_err(zoneid, CE_NOTE,
947			    "![NFS4][Server: %s][Mntpt: %s]"
948			    "NFS4: failing over: selecting "
949			    "original server %s", msg->msg_srv, msg->msg_mntpt,
950			    msg->msg_srv);
951		break;
952	case RE_FILE_DIFF:
953		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
954		    "File %s (rnode_pt: %p) on server %s was closed "
955		    "and failed attempted failover since its is different than "
956		    "the original file", msg->msg_srv, msg->msg_mntpt,
957		    ep->re_char1, (void *)ep->re_rp1, msg->msg_srv);
958		break;
959	case RE_LOST_STATE:
960		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
961		    "Lost %s request for fs %s, file %s (rnode_pt: 0x%p), "
962		    "dir %s (0x%p) for server %s", msg->msg_srv, msg->msg_mntpt,
963		    nfs4_op_to_str(ep->re_uint), msg->msg_mntpt,
964		    ep->re_char1, (void *)ep->re_rp1, ep->re_char2,
965		    (void *)ep->re_rp2, msg->msg_srv);
966		break;
967	case RE_OPENS_CHANGED:
968		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
969		    "The number of open files to reopen changed "
970		    "for mount %s mi 0x%p (old %d, new %d) on server %s",
971		    msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt,
972		    (void *)ep->re_mi, ep->re_uint, ep->re_pid, msg->msg_srv);
973		break;
974	case RE_SIGLOST:
975	case RE_SIGLOST_NO_DUMP:
976		if (ep->re_uint)
977			zcmn_err(zoneid, CE_NOTE,
978			    "![NFS4][Server: %s][Mntpt: %s]"
979			    "Process %d lost its locks on "
980			    "file %s (rnode_pt: %p) due to NFS recovery error "
981			    "(%d) on server %s.", msg->msg_srv, msg->msg_mntpt,
982			    ep->re_pid, ep->re_char1, (void *)ep->re_rp1,
983			    ep->re_uint, msg->msg_srv);
984		else
985			zcmn_err(zoneid, CE_NOTE,
986			    "![NFS4][Server: %s][Mntpt: %s]"
987			    "Process %d lost its locks on "
988			    "file %s (rnode_pt: %p) due to NFS recovery error "
989			    "(%s) on server %s.", msg->msg_srv, msg->msg_mntpt,
990			    ep->re_pid, ep->re_char1, (void *)ep->re_rp1,
991			    nfs4_stat_to_str(ep->re_stat4), msg->msg_srv);
992		break;
993	case RE_START:
994		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
995		    "NFS Starting recovery for mount %s "
996		    "(mi 0x%p mi_recovflags [0x%x]) on server %s, "
997		    "rnode_pt1 %s (0x%p), rnode_pt2 %s (0x%p)", msg->msg_srv,
998		    msg->msg_mntpt, msg->msg_mntpt, (void *)ep->re_mi,
999		    ep->re_uint, msg->msg_srv, ep->re_char1, (void *)ep->re_rp1,
1000		    ep->re_char2, (void *)ep->re_rp2);
1001		break;
1002	case RE_UNEXPECTED_ACTION:
1003		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1004		    "NFS recovery: unexpected action (%s) on server %s",
1005		    msg->msg_srv, msg->msg_mntpt,
1006		    nfs4_recov_action_to_str(ep->re_uint), msg->msg_srv);
1007		break;
1008	case RE_UNEXPECTED_ERRNO:
1009		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1010		    "NFS recovery: unexpected errno (%d) on server %s",
1011		    msg->msg_srv, msg->msg_mntpt, ep->re_uint, msg->msg_srv);
1012		break;
1013	case RE_UNEXPECTED_STATUS:
1014		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1015		    "NFS recovery: unexpected NFS status code (%s) "
1016		    "on server %s", msg->msg_srv, msg->msg_mntpt,
1017		    nfs4_stat_to_str(ep->re_stat4),
1018		    msg->msg_srv);
1019		break;
1020	case RE_WRONGSEC:
1021		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1022		    "NFS can't recover from NFS4ERR_WRONGSEC."
1023		    "  error %d for server %s: rnode_pt1 %s (0x%p)"
1024		    " rnode_pt2 %s (0x%p)", msg->msg_srv, msg->msg_mntpt,
1025		    ep->re_uint, msg->msg_srv, ep->re_char1, (void *)ep->re_rp1,
1026		    ep->re_char2, (void *)ep->re_rp2);
1027		break;
1028	case RE_LOST_STATE_BAD_OP:
1029		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1030		    "NFS lost state with unrecognized op (%d)."
1031		    "  fs %s, server %s, pid %d, file %s (rnode_pt: 0x%p), "
1032		    "dir %s (0x%p)", msg->msg_srv, msg->msg_mntpt,
1033		    ep->re_uint, msg->msg_mntpt, msg->msg_srv, ep->re_pid,
1034		    ep->re_char1, (void *)ep->re_rp1, ep->re_char2,
1035		    (void *)ep->re_rp2);
1036		break;
1037	default:
1038		zcmn_err(zoneid, CE_WARN,
1039		    "!queue_print_event: illegal event %d", ep->re_type);
1040		break;
1041	}
1042
1043	print_facts(msg, mi);
1044
1045	/*
1046	 * If set this event will not be printed again and is considered
1047	 * dumped.
1048	 */
1049	if (dump)
1050		msg->msg_status = NFS4_MS_NO_DUMP;
1051}
1052
1053/*
1054 * Print a fact message to /var/adm/messages
1055 */
1056static void
1057queue_print_fact(nfs4_debug_msg_t *msg, int dump)
1058{
1059	nfs4_rfact_t	*fp;
1060	zoneid_t	zoneid;
1061
1062	fp = &msg->rmsg_u.msg_fact;
1063	zoneid = getzoneid();
1064
1065	switch (fp->rf_type) {
1066	case RF_BADOWNER:
1067		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1068		    "NFSMAPID_DOMAIN does not match the server: %s domain\n"
1069		    "Please check configuration", msg->msg_srv, msg->msg_mntpt,
1070		    msg->msg_srv);
1071		break;
1072	case RF_ERR:
1073		if (fp->rf_error)
1074			zcmn_err(zoneid, CE_NOTE,
1075			    "![NFS4][Server: %s][Mntpt: %s]NFS op %s got "
1076			    "error %d causing recovery action %s.%s",
1077			    msg->msg_srv, msg->msg_mntpt,
1078			    nfs4_op_to_str(fp->rf_op), fp->rf_error,
1079			    nfs4_recov_action_to_str(fp->rf_action),
1080			    fp->rf_reboot ?
1081			    "  Client also suspects that the server rebooted,"
1082			    " or experienced a network partition." : "");
1083		else
1084			zcmn_err(zoneid, CE_NOTE,
1085			    "![NFS4][Server: %s][Mntpt: %s]NFS op %s got "
1086			    "error %s causing recovery action %s.%s",
1087			    msg->msg_srv, msg->msg_mntpt,
1088			    nfs4_op_to_str(fp->rf_op),
1089			    nfs4_stat_to_str(fp->rf_stat4),
1090			    nfs4_recov_action_to_str(fp->rf_action),
1091			    fp->rf_reboot ?
1092			    "  Client also suspects that the server rebooted,"
1093			    " or experienced a network partition." : "");
1094		break;
1095	case RF_RENEW_EXPIRED:
1096		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1097		    "NFS4 renew thread detected client's "
1098		    "lease has expired. Current open files/locks/IO may fail",
1099		    msg->msg_srv, msg->msg_mntpt);
1100		break;
1101	case RF_SRV_NOT_RESPOND:
1102		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1103		    "NFS server %s not responding; still trying\n",
1104		    msg->msg_srv, msg->msg_mntpt, msg->msg_srv);
1105		break;
1106	case RF_SRV_OK:
1107		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1108		    "NFS server %s ok", msg->msg_srv, msg->msg_mntpt,
1109		    msg->msg_srv);
1110		break;
1111	case RF_SRVS_NOT_RESPOND:
1112		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1113		    "NFS servers %s not responding; still trying", msg->msg_srv,
1114		    msg->msg_mntpt, msg->msg_srv);
1115		break;
1116	case RF_SRVS_OK:
1117		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1118		    "NFS servers %s ok", msg->msg_srv, msg->msg_mntpt,
1119		    msg->msg_srv);
1120		break;
1121	case RF_DELMAP_CB_ERR:
1122		zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]"
1123		    "NFS op %s got error %s when executing delmap on file %s "
1124		    "(rnode_pt 0x%p).",
1125		    msg->msg_srv, msg->msg_mntpt, nfs4_op_to_str(fp->rf_op),
1126		    nfs4_stat_to_str(fp->rf_stat4), fp->rf_char1,
1127		    (void *)fp->rf_rp1);
1128		break;
1129	default:
1130		zcmn_err(zoneid, CE_WARN, "!queue_print_fact: illegal fact %d",
1131		    fp->rf_type);
1132	}
1133
1134	/*
1135	 * If set this fact will not be printed again and is considered
1136	 * dumped.
1137	 */
1138	if (dump)
1139		msg->msg_status = NFS4_MS_NO_DUMP;
1140}
1141
1142/*
1143 * Returns 1 if the entire queue should be dumped, 0 otherwise.
1144 */
1145static int
1146id_to_dump_queue(nfs4_event_type_t id)
1147{
1148	switch (id) {
1149	case RE_DEAD_FILE:
1150	case RE_SIGLOST:
1151	case RE_WRONGSEC:
1152	case RE_CLIENTID:
1153		return (1);
1154	default:
1155		return (0);
1156	}
1157}
1158
1159/*
1160 * Returns 1 if the event (but not the entire queue) should be printed;
1161 * 0 otherwise.
1162 */
1163static int
1164id_to_dump_solo_event(nfs4_event_type_t id)
1165{
1166	switch (id) {
1167	case RE_BAD_SEQID:
1168	case RE_BADHANDLE:
1169	case RE_FAIL_REMAP_LEN:
1170	case RE_FAIL_REMAP_OP:
1171	case RE_FAILOVER:
1172	case RE_OPENS_CHANGED:
1173	case RE_SIGLOST_NO_DUMP:
1174	case RE_UNEXPECTED_ACTION:
1175	case RE_UNEXPECTED_ERRNO:
1176	case RE_UNEXPECTED_STATUS:
1177	case RE_LOST_STATE_BAD_OP:
1178		return (1);
1179	default:
1180		return (0);
1181	}
1182}
1183
1184/*
1185 * Returns 1 if the fact (but not the entire queue) should be printed;
1186 * 0 otherwise.
1187 */
1188static int
1189id_to_dump_solo_fact(nfs4_fact_type_t id)
1190{
1191	switch (id) {
1192	case RF_SRV_NOT_RESPOND:
1193	case RF_SRV_OK:
1194	case RF_SRVS_NOT_RESPOND:
1195	case RF_SRVS_OK:
1196		return (1);
1197	default:
1198		return (0);
1199	}
1200}
1201
1202/*
1203 * Update a kernel stat
1204 */
1205static void
1206update_recov_kstats(nfs4_debug_msg_t *msg, mntinfo4_t *mi)
1207{
1208	rkstat_t	*rsp;
1209
1210	if (!mi->mi_recov_ksp)
1211		return;
1212
1213	rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data;
1214
1215	if (msg->msg_type == RM_EVENT) {
1216		switch (msg->rmsg_u.msg_event.re_type) {
1217		case RE_BADHANDLE:
1218			rsp->badhandle.value.ul++;
1219			break;
1220		case RE_CLIENTID:
1221			rsp->clientid.value.ul++;
1222			break;
1223		case RE_DEAD_FILE:
1224			rsp->dead_file.value.ul++;
1225			break;
1226		case RE_FAIL_RELOCK:
1227			rsp->fail_relock.value.ul++;
1228			break;
1229		case RE_FILE_DIFF:
1230			rsp->file_diff.value.ul++;
1231			break;
1232		case RE_OPENS_CHANGED:
1233			rsp->opens_changed.value.ul++;
1234			break;
1235		case RE_SIGLOST:
1236		case RE_SIGLOST_NO_DUMP:
1237			rsp->siglost.value.ul++;
1238			break;
1239		case RE_UNEXPECTED_ACTION:
1240			rsp->unexp_action.value.ul++;
1241			break;
1242		case RE_UNEXPECTED_ERRNO:
1243			rsp->unexp_errno.value.ul++;
1244			break;
1245		case RE_UNEXPECTED_STATUS:
1246			rsp->unexp_status.value.ul++;
1247			break;
1248		case RE_WRONGSEC:
1249			rsp->wrongsec.value.ul++;
1250			break;
1251		case RE_LOST_STATE_BAD_OP:
1252			rsp->lost_state_bad_op.value.ul++;
1253			break;
1254		default:
1255			break;
1256		}
1257	} else if (msg->msg_type == RM_FACT) {
1258		switch (msg->rmsg_u.msg_fact.rf_type) {
1259		case RF_BADOWNER:
1260			rsp->badowner.value.ul++;
1261			break;
1262		case RF_SRV_NOT_RESPOND:
1263			rsp->not_responding.value.ul++;
1264			break;
1265		default:
1266			break;
1267		}
1268	}
1269}
1270
1271/*
1272 * Dump the mi's mi_msg_list of recovery messages.
1273 */
1274static void
1275dump_queue(mntinfo4_t *mi, nfs4_debug_msg_t *msg)
1276{
1277	nfs4_debug_msg_t *tmp_msg;
1278
1279	ASSERT(mutex_owned(&mi->mi_msg_list_lock));
1280
1281	/* update kstats */
1282	update_recov_kstats(msg, mi);
1283
1284	/*
1285	 * If we aren't supposed to dump the queue then see if we
1286	 * should just print this single message, then return.
1287	 */
1288	if (!id_to_dump_queue(msg->rmsg_u.msg_event.re_type)) {
1289		if (id_to_dump_solo_event(msg->rmsg_u.msg_event.re_type))
1290			queue_print_event(msg, mi, 0);
1291		return;
1292	}
1293
1294	/*
1295	 * Write all events/facts in the queue that haven't been
1296	 * previously written to disk.
1297	 */
1298	tmp_msg = list_head(&mi->mi_msg_list);
1299	while (tmp_msg) {
1300		if (tmp_msg->msg_status == NFS4_MS_DUMP) {
1301			if (tmp_msg->msg_type == RM_EVENT)
1302				queue_print_event(tmp_msg, mi, 1);
1303			else if (tmp_msg->msg_type == RM_FACT)
1304				queue_print_fact(tmp_msg, 1);
1305		}
1306		tmp_msg = list_next(&mi->mi_msg_list, tmp_msg);
1307	}
1308}
1309
1310/*
1311 * Places the event into mi's debug recovery message queue.  Some of the
1312 * fields can be overloaded to be a generic value, depending on the event
1313 * type.  These include "count", "why".
1314 */
1315void
1316nfs4_queue_event(nfs4_event_type_t id, mntinfo4_t *mi, char *server1,
1317    uint_t count, vnode_t *vp1, vnode_t *vp2, nfsstat4 nfs4_error,
1318    char *why, pid_t pid, nfs4_tag_type_t tag1, nfs4_tag_type_t tag2,
1319    seqid4 seqid1, seqid4 seqid2)
1320{
1321	nfs4_debug_msg_t	*msg;
1322	nfs4_revent_t		*ep;
1323	char			*cur_srv;
1324	rnode4_t		*rp1 = NULL, *rp2 = NULL;
1325	refstr_t		*mntpt;
1326
1327	ASSERT(mi != NULL);
1328	if (vp1)
1329		rp1 = VTOR4(vp1);
1330	if (vp2)
1331		rp2 = VTOR4(vp2);
1332
1333	/*
1334	 * Initialize the message with the relevant server/mount_pt/time
1335	 * information. Also place the relevent event related info.
1336	 */
1337	msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
1338	msg->msg_type = RM_EVENT;
1339	msg->msg_status = NFS4_MS_DUMP;
1340	ep = &msg->rmsg_u.msg_event;
1341	ep->re_type = id;
1342	gethrestime(&msg->msg_time);
1343
1344	cur_srv = mi->mi_curr_serv->sv_hostname;
1345	msg->msg_srv = strdup(cur_srv);
1346	mntpt = vfs_getmntpoint(mi->mi_vfsp);
1347	msg->msg_mntpt = strdup(refstr_value(mntpt));
1348	refstr_rele(mntpt);
1349
1350	set_event(id, ep, mi, rp1, rp2, count, pid, nfs4_error, server1,
1351	    why, tag1, tag2, seqid1, seqid2);
1352
1353	mutex_enter(&mi->mi_msg_list_lock);
1354
1355	/* if this event is the same as the last event, drop it */
1356	if (events_same(list_tail(&mi->mi_msg_list), msg, mi)) {
1357		mutex_exit(&mi->mi_msg_list_lock);
1358		nfs4_free_msg(msg);
1359		return;
1360	}
1361
1362	/* queue the message at the end of the list */
1363	list_insert_tail(&mi->mi_msg_list, msg);
1364
1365	dump_queue(mi, msg);
1366
1367	if (mi->mi_msg_count == nfs4_msg_max) {
1368		nfs4_debug_msg_t *rm_msg;
1369
1370		/* remove the queue'd message at the front of the list */
1371		rm_msg = list_head(&mi->mi_msg_list);
1372		list_remove(&mi->mi_msg_list, rm_msg);
1373		mutex_exit(&mi->mi_msg_list_lock);
1374		nfs4_free_msg(rm_msg);
1375	} else {
1376		mi->mi_msg_count++;
1377		mutex_exit(&mi->mi_msg_list_lock);
1378	}
1379}
1380
1381/*
1382 * Places the fact into mi's debug recovery messages queue.
1383 */
1384void
1385nfs4_queue_fact(nfs4_fact_type_t fid, mntinfo4_t *mi, nfsstat4 stat4,
1386    nfs4_recov_t raction, nfs_opnum4 op, bool_t reboot, char *srvname,
1387    int error, vnode_t *vp)
1388{
1389	nfs4_debug_msg_t	*msg;
1390	nfs4_rfact_t		*fp;
1391	char			*cur_srv;
1392	refstr_t		*mntpt;
1393
1394	/*
1395	 * Initialize the message with the relevant server/mount_pt/time
1396	 * information. Also place the relevant fact related info.
1397	 */
1398	msg = kmem_zalloc(sizeof (*msg), KM_SLEEP);
1399	msg->msg_type = RM_FACT;
1400	msg->msg_status = NFS4_MS_DUMP;
1401	gethrestime(&msg->msg_time);
1402
1403	if (srvname)
1404		cur_srv = srvname;
1405	else
1406		cur_srv = mi->mi_curr_serv->sv_hostname;
1407
1408	msg->msg_srv = strdup(cur_srv);
1409	mntpt = vfs_getmntpoint(mi->mi_vfsp);
1410	msg->msg_mntpt = strdup(refstr_value(mntpt));
1411	refstr_rele(mntpt);
1412
1413	fp = &msg->rmsg_u.msg_fact;
1414	fp->rf_type = fid;
1415	fp->rf_status = RFS_NO_INSPECT;
1416	set_fact(fid, fp, stat4, raction, op, reboot, error, vp);
1417
1418	update_recov_kstats(msg, mi);
1419
1420	mutex_enter(&mi->mi_msg_list_lock);
1421
1422	/* if this fact is the same as the last fact, drop it */
1423	if (facts_same(list_tail(&mi->mi_msg_list), msg, mi)) {
1424		mutex_exit(&mi->mi_msg_list_lock);
1425		nfs4_free_msg(msg);
1426		return;
1427	}
1428
1429	/* queue the message at the end of the list */
1430	list_insert_tail(&mi->mi_msg_list, msg);
1431
1432	if (id_to_dump_solo_fact(msg->rmsg_u.msg_fact.rf_type))
1433		queue_print_fact(msg, 0);
1434
1435	if (mi->mi_msg_count == nfs4_msg_max) {
1436		nfs4_debug_msg_t *rm_msg;
1437
1438		/* remove the queue'd message at the front of the list */
1439		rm_msg = list_head(&mi->mi_msg_list);
1440		list_remove(&mi->mi_msg_list, rm_msg);
1441		mutex_exit(&mi->mi_msg_list_lock);
1442		nfs4_free_msg(rm_msg);
1443	} else {
1444		mi->mi_msg_count++;
1445		mutex_exit(&mi->mi_msg_list_lock);
1446	}
1447}
1448
1449/*
1450 * Initialize the 'mi_recov_kstat' kstat.
1451 */
1452void
1453nfs4_mnt_recov_kstat_init(vfs_t *vfsp)
1454{
1455	mntinfo4_t *mi = VFTOMI4(vfsp);
1456	kstat_t		*ksp;
1457	zoneid_t	zoneid = mi->mi_zone->zone_id;
1458
1459	/*
1460	 * Create the version specific kstats.
1461	 *
1462	 * PSARC 2001/697 Contract Private Interface
1463	 * All nfs kstats are under SunMC contract
1464	 * Please refer to the PSARC listed above and contact
1465	 * SunMC before making any changes!
1466	 *
1467	 * Changes must be reviewed by Solaris File Sharing
1468	 * Changes must be communicated to contract-2001-697@sun.com
1469	 *
1470	 */
1471
1472	if ((ksp = kstat_create_zone("nfs", getminor(vfsp->vfs_dev),
1473	    "mi_recov_kstat", "misc", KSTAT_TYPE_NAMED,
1474	    sizeof (rkstat_t) / sizeof (kstat_named_t),
1475	    KSTAT_FLAG_WRITABLE, zoneid)) == NULL) {
1476		mi->mi_recov_ksp = NULL;
1477		zcmn_err(GLOBAL_ZONEID, CE_NOTE,
1478		    "!mi_recov_kstat for mi %p failed\n",
1479		    (void *)mi);
1480		return;
1481	}
1482	if (zoneid != GLOBAL_ZONEID)
1483		kstat_zone_add(ksp, GLOBAL_ZONEID);
1484	mi->mi_recov_ksp = ksp;
1485	bcopy(&rkstat_template, ksp->ks_data, sizeof (rkstat_t));
1486	kstat_install(ksp);
1487}
1488
1489/*
1490 * Increment the "delay" kstat.
1491 */
1492void
1493nfs4_mi_kstat_inc_delay(mntinfo4_t *mi)
1494{
1495	rkstat_t    *rsp;
1496
1497	if (!mi->mi_recov_ksp)
1498		return;
1499
1500	rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data;
1501	rsp->delay.value.ul++;
1502}
1503
1504/*
1505 * Increment the "no_grace" kstat.
1506 */
1507void
1508nfs4_mi_kstat_inc_no_grace(mntinfo4_t *mi)
1509{
1510	rkstat_t	*rsp;
1511
1512	if (!mi->mi_recov_ksp)
1513		return;
1514
1515	rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data;
1516	rsp->no_grace.value.ul++;
1517}
1518