1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * Dump memory to NFS swap file after a panic.
28 * We have no timeouts, context switches, etc.
29 */
30
31#include <rpc/types.h>
32#include <sys/param.h>
33#include <sys/errno.h>
34#include <sys/vnode.h>
35#include <sys/bootconf.h>
36#include <nfs/nfs.h>
37#include <rpc/auth.h>
38#include <rpc/xdr.h>
39#include <rpc/rpc_msg.h>
40#include <rpc/clnt.h>
41#include <netinet/in.h>
42#include <sys/tiuser.h>
43#include <nfs/nfs_clnt.h>
44#include <sys/t_kuser.h>
45#include <sys/file.h>
46#include <sys/netconfig.h>
47#include <sys/utsname.h>
48#include <sys/sysmacros.h>
49#include <sys/thread.h>
50#include <sys/cred.h>
51#include <sys/strsubr.h>
52#include <nfs/rnode.h>
53#include <sys/varargs.h>
54#include <sys/cmn_err.h>
55#include <sys/systm.h>
56#include <sys/dumphdr.h>
57#include <sys/debug.h>
58#include <sys/sunddi.h>
59
60#define	TIMEOUT		(2 * hz)
61#define	RETRIES		(5)
62#define	HDR_SIZE	(256)
63
64static struct knetconfig	nfsdump_cf;
65static struct netbuf		nfsdump_addr;
66static fhandle_t		nfsdump_fhandle2;
67static nfs_fh3			nfsdump_fhandle3;
68static int			nfsdump_maxcount;
69static rpcvers_t		nfsdump_version;
70
71/*
72 * nonzero dumplog enables nd_log messages
73 */
74static int 	dumplog = 0;
75
76static int	nd_init(vnode_t *, TIUSER **);
77static int	nd_poll(TIUSER *, int, int *);
78static int	nd_send_data(TIUSER *, caddr_t, int, XDR *, uint32_t *);
79static int	nd_get_reply(TIUSER *, XDR *, uint32_t, int *);
80static int	nd_auth_marshall(XDR *);
81
82static void nd_log(const char *, ...) __KPRINTFLIKE(1);
83
84/*PRINTFLIKE1*/
85static void
86nd_log(const char *fmt, ...)
87{
88	if (dumplog) {
89		va_list adx;
90
91		va_start(adx, fmt);
92		vprintf(fmt, adx);
93		va_end(adx);
94	}
95}
96
97/* ARGSUSED */
98int
99nfs_dump(vnode_t *dumpvp, caddr_t addr, offset_t bn, offset_t count,
100    caller_context_t *ct)
101{
102	static TIUSER	*tiptr;
103	XDR		xdrs;
104	int		reply;
105	int		badmsg;
106	uint32_t	call_xid;
107	int		retry = 0;
108	int		error;
109	int		i;
110
111	nd_log("nfs_dump: addr=%p bn=%lld count=%lld\n",
112	    (void *)addr, bn, count);
113
114	if (error = nd_init(dumpvp, &tiptr))
115		return (error);
116
117	for (i = 0; i < count; i += ptod(1), addr += ptob(1)) {
118		do {
119			error = nd_send_data(tiptr, addr, (int)dbtob(bn + i),
120			    &xdrs, &call_xid);
121			if (error)
122				return (error);
123
124			do {
125				if (error = nd_poll(tiptr, retry, &reply))
126					return (error);
127
128				if (!reply) {
129					retry++;
130					break;
131				}
132				retry = 0;
133
134				error = nd_get_reply(tiptr, &xdrs, call_xid,
135				    &badmsg);
136				if (error)
137					return (error);
138			} while (badmsg);
139		} while (retry);
140	}
141
142	return (0);
143}
144
145static int
146nd_init(vnode_t *dumpvp, TIUSER **tiptr)
147{
148	int 		error;
149
150	if (*tiptr)
151		return (0);
152
153	/*
154	 * If dump info hasn't yet been initialized (because dump
155	 * device was chosen at user-level, rather than at boot time
156	 * in nfs_swapvp) fill it in now.
157	 */
158	if (nfsdump_maxcount == 0) {
159		nfsdump_version = VTOMI(dumpvp)->mi_vers;
160		switch (nfsdump_version) {
161		case NFS_VERSION:
162			nfsdump_fhandle2 = *VTOFH(dumpvp);
163			break;
164		case NFS_V3:
165			nfsdump_fhandle3 = *VTOFH3(dumpvp);
166			break;
167		default:
168			return (EIO);
169		}
170		nfsdump_maxcount = (int)dumpvp_size;
171		nfsdump_addr = VTOMI(dumpvp)->mi_curr_serv->sv_addr;
172		nfsdump_cf = *(VTOMI(dumpvp)->mi_curr_serv->sv_knconf);
173		if (nfsdump_cf.knc_semantics != NC_TPI_CLTS) {
174			int v6 = 1;
175			nd_log("nfs_dump: not connectionless!\n");
176			if ((strcmp(nfsdump_cf.knc_protofmly, NC_INET) == 0) ||
177			    ((v6 = strcmp(nfsdump_cf.knc_protofmly, NC_INET6))\
178			    == 0)) {
179				major_t clone_maj;
180
181				nfsdump_cf.knc_proto = NC_UDP;
182				nfsdump_cf.knc_semantics = NC_TPI_CLTS;
183				nd_log("nfs_dump: grabbing UDP major number\n");
184				clone_maj = ddi_name_to_major("clone");
185				nd_log("nfs_dump: making UDP device\n");
186				nfsdump_cf.knc_rdev = makedevice(clone_maj,
187				    ddi_name_to_major(v6?"udp":"udp6"));
188			} else {
189				error = EIO;
190				nfs_perror(error, "\nnfs_dump: cannot dump over"
191				    " protocol %s: %m\n", nfsdump_cf.knc_proto);
192				return (error);
193			}
194		}
195	}
196
197	nd_log("nfs_dump: calling t_kopen\n");
198
199	if (error = t_kopen(NULL, nfsdump_cf.knc_rdev,
200	    FREAD|FWRITE|FNDELAY, tiptr, CRED())) {
201		nfs_perror(error, "\nnfs_dump: t_kopen failed: %m\n");
202		return (EIO);
203	}
204
205	if ((strcmp(nfsdump_cf.knc_protofmly, NC_INET) == 0) ||
206	    (strcmp(nfsdump_cf.knc_protofmly, NC_INET6) == 0)) {
207		nd_log("nfs_dump: calling bindresvport\n");
208		if (error = bindresvport(*tiptr, NULL, NULL, FALSE)) {
209			nfs_perror(error,
210			    "\nnfs_dump: bindresvport failed: %m\n");
211			return (EIO);
212		}
213	} else {
214		nd_log("nfs_dump: calling t_kbind\n");
215		if ((error = t_kbind(*tiptr, NULL, NULL)) != 0) {
216			nfs_perror(error, "\nnfs_dump: t_kbind failed: %m\n");
217			return (EIO);
218		}
219	}
220	return (0);
221}
222
223static int
224nd_send_data(TIUSER *tiptr, caddr_t addr, int offset, XDR *xdrp, uint32_t *xidp)
225{
226	static struct rpc_msg		call_msg;
227	static uchar_t			header[HDR_SIZE];
228	static struct t_kunitdata	sudata;
229	static uchar_t			*dumpbuf;
230	int				procnum;
231	stable_how			stable = FILE_SYNC;
232	mblk_t				*mblk_p;
233	int				error;
234	int				tsize = ptob(1);
235	uint64				offset3;
236
237	if (!dumpbuf) {
238		call_msg.rm_direction = CALL;
239		call_msg.rm_call.cb_rpcvers = RPC_MSG_VERSION;
240		call_msg.rm_call.cb_prog = NFS_PROGRAM;
241		call_msg.rm_call.cb_vers = nfsdump_version;
242
243		if (!(dumpbuf = kmem_alloc(ptob(1), KM_NOSLEEP))) {
244		cmn_err(CE_WARN, "\tnfs_dump: cannot allocate dump buffer");
245			return (ENOMEM);
246		}
247	}
248
249	nd_log("nfs_dump: calling esballoc for header\n");
250
251	if (!(mblk_p = esballoc(header, HDR_SIZE, BPRI_HI, &frnop))) {
252		cmn_err(CE_WARN, "\tnfs_dump: out of mblks");
253		return (ENOBUFS);
254	}
255
256	xdrmem_create(xdrp, (caddr_t)header, HDR_SIZE, XDR_ENCODE);
257
258	call_msg.rm_xid = alloc_xid();
259	*xidp = call_msg.rm_xid;
260
261	if (!xdr_callhdr(xdrp, &call_msg)) {
262		cmn_err(CE_WARN, "\tnfs_dump: cannot serialize header");
263		return (EIO);
264	}
265
266	if (nfsdump_maxcount) {
267		/*
268		 * Do not extend the dump file if it is also
269		 * the swap file.
270		 */
271		if (offset >= nfsdump_maxcount) {
272			cmn_err(CE_WARN, "\tnfs_dump: end of file");
273			return (EIO);
274		}
275		if (offset + tsize > nfsdump_maxcount)
276			tsize = nfsdump_maxcount - offset;
277	}
278	switch (nfsdump_version) {
279	case NFS_VERSION:
280		procnum = RFS_WRITE;
281		if (!XDR_PUTINT32(xdrp, (int32_t *)&procnum) ||
282		    !nd_auth_marshall(xdrp) ||
283		    !xdr_fhandle(xdrp, &nfsdump_fhandle2) ||
284			/*
285			 *  Following four values are:
286			 *	beginoffset
287			 *	offset
288			 *	length
289			 *	bytes array length
290			 */
291		    !XDR_PUTINT32(xdrp, (int32_t *)&offset) ||
292		    !XDR_PUTINT32(xdrp, (int32_t *)&offset) ||
293		    !XDR_PUTINT32(xdrp, (int32_t *)&tsize) ||
294		    !XDR_PUTINT32(xdrp, (int32_t *)&tsize)) {
295			cmn_err(CE_WARN, "\tnfs_dump: serialization failed");
296			return (EIO);
297		}
298		break;
299	case NFS_V3:
300		procnum = NFSPROC3_WRITE;
301		offset3 = offset;
302		if (!XDR_PUTINT32(xdrp, (int32_t *)&procnum) ||
303		    !nd_auth_marshall(xdrp) ||
304		    !xdr_nfs_fh3(xdrp, &nfsdump_fhandle3) ||
305			/*
306			 *  Following four values are:
307			 *	offset
308			 *	count
309			 *	stable
310			 *	bytes array length
311			 */
312		    !xdr_u_longlong_t(xdrp, &offset3) ||
313		    !XDR_PUTINT32(xdrp, (int32_t *)&tsize) ||
314		    !XDR_PUTINT32(xdrp, (int32_t *)&stable) ||
315		    !XDR_PUTINT32(xdrp, (int32_t *)&tsize)) {
316			cmn_err(CE_WARN, "\tnfs_dump: serialization failed");
317			return (EIO);
318		}
319		break;
320	default:
321		return (EIO);
322	}
323
324	bcopy(addr, (caddr_t)dumpbuf, tsize);
325
326	mblk_p->b_wptr += (int)XDR_GETPOS(xdrp);
327
328	mblk_p->b_cont = esballoc((uchar_t *)dumpbuf, ptob(1), BPRI_HI, &frnop);
329
330	if (!mblk_p->b_cont) {
331		cmn_err(CE_WARN, "\tnfs_dump: out of mblks");
332		return (ENOBUFS);
333	}
334	mblk_p->b_cont->b_wptr += ptob(1);
335
336	sudata.addr = nfsdump_addr;		/* structure copy */
337	sudata.udata.buf = (char *)NULL;
338	sudata.udata.maxlen = 0;
339	sudata.udata.len = 1;			/* needed for t_ksndudata */
340	sudata.udata.udata_mp = mblk_p;
341
342	nd_log("nfs_dump: calling t_ksndudata\n");
343
344	if (error = t_ksndudata(tiptr, &sudata, (frtn_t *)NULL)) {
345		nfs_perror(error, "\nnfs_dump: t_ksndudata failed: %m\n");
346		return (error);
347	}
348	return (0);
349}
350
351static int
352nd_get_reply(TIUSER *tiptr, XDR *xdrp, uint32_t call_xid, int *badmsg)
353{
354	static struct rpc_msg		reply_msg;
355	static struct rpc_err		rpc_err;
356	static struct nfsattrstat	na;
357	static struct WRITE3res		wres;
358	static struct t_kunitdata	rudata;
359	int				uderr;
360	int				type;
361	int				error;
362
363	*badmsg = 0;
364
365	rudata.addr.maxlen = 0;
366	rudata.opt.maxlen = 0;
367	rudata.udata.udata_mp = (mblk_t *)NULL;
368
369	nd_log("nfs_dump: calling t_krcvudata\n");
370
371	if (error = t_krcvudata(tiptr, &rudata, &type, &uderr)) {
372		nfs_perror(error, "\nnfs_dump: t_krcvudata failed: %m\n");
373		return (EIO);
374	}
375	if (type != T_DATA) {
376		cmn_err(CE_WARN, "\tnfs_dump:  received type %d", type);
377		*badmsg = 1;
378		return (0);
379	}
380	if (!rudata.udata.udata_mp) {
381		cmn_err(CE_WARN, "\tnfs_dump: null receive");
382		*badmsg = 1;
383		return (0);
384	}
385
386	/*
387	 * Decode results.
388	 */
389	xdrmblk_init(xdrp, rudata.udata.udata_mp, XDR_DECODE, 0);
390
391	reply_msg.acpted_rply.ar_verf = _null_auth;
392	switch (nfsdump_version) {
393	case NFS_VERSION:
394		reply_msg.acpted_rply.ar_results.where = (caddr_t)&na;
395		reply_msg.acpted_rply.ar_results.proc = xdr_attrstat;
396		break;
397	case NFS_V3:
398		reply_msg.acpted_rply.ar_results.where = (caddr_t)&wres;
399		reply_msg.acpted_rply.ar_results.proc = xdr_WRITE3res;
400		break;
401	default:
402		return (EIO);
403	}
404
405	if (!xdr_replymsg(xdrp, &reply_msg)) {
406		cmn_err(CE_WARN, "\tnfs_dump: xdr_replymsg failed");
407		return (EIO);
408	}
409
410	if (reply_msg.rm_xid != call_xid) {
411		*badmsg = 1;
412		return (0);
413	}
414
415	_seterr_reply(&reply_msg, &rpc_err);
416
417	if (rpc_err.re_status != RPC_SUCCESS) {
418		cmn_err(CE_WARN, "\tnfs_dump: RPC error %d (%s)",
419		    rpc_err.re_status, clnt_sperrno(rpc_err.re_status));
420		return (EIO);
421	}
422
423	switch (nfsdump_version) {
424	case NFS_VERSION:
425		if (na.ns_status) {
426			cmn_err(CE_WARN, "\tnfs_dump: status %d", na.ns_status);
427			return (EIO);
428		}
429		break;
430	case NFS_V3:
431		if (wres.status != NFS3_OK) {
432			cmn_err(CE_WARN, "\tnfs_dump: status %d", wres.status);
433			return (EIO);
434		}
435		break;
436	default:
437		return (EIO);
438	}
439
440	if (reply_msg.acpted_rply.ar_verf.oa_base != NULL) {
441		/* free auth handle */
442		xdrp->x_op = XDR_FREE;
443		(void) xdr_opaque_auth(xdrp, &(reply_msg.acpted_rply.ar_verf));
444	}
445
446	freemsg(rudata.udata.udata_mp);
447
448	return (0);
449}
450
451static int
452nd_poll(TIUSER *tiptr, int retry, int *eventp)
453{
454	clock_t		start_bolt = ddi_get_lbolt();
455	clock_t		timout = TIMEOUT * (retry + 1);
456	int		error;
457
458	nd_log("nfs_dump: calling t_kspoll\n");
459
460	*eventp = 0;
461
462	while (!*eventp && ((ddi_get_lbolt() - start_bolt) < timout)) {
463		/*
464		 * Briefly enable interrupts before checking for a reply;
465		 * the network transports do not yet support do_polled_io.
466		 */
467		int s = spl0();
468		splx(s);
469
470		if (error = t_kspoll(tiptr, 0, READWAIT, eventp)) {
471			nfs_perror(error,
472			    "\nnfs_dump: t_kspoll failed: %m\n");
473			return (EIO);
474		}
475		runqueues();
476	}
477
478	if (retry == RETRIES && !*eventp) {
479		cmn_err(CE_WARN, "\tnfs_dump: server not responding");
480		return (EIO);
481	}
482
483	return (0);
484}
485
486static int
487nd_auth_marshall(XDR *xdrp)
488{
489	int credsize;
490	int32_t *ptr;
491	int hostnamelen;
492
493	hostnamelen = (int)strlen(utsname.nodename);
494	credsize = 4 + 4 + roundup(hostnamelen, 4) + 4 + 4 + 4;
495
496	ptr = XDR_INLINE(xdrp, 4 + 4 + credsize + 4 + 4);
497	if (!ptr) {
498		cmn_err(CE_WARN, "\tnfs_dump: auth_marshall failed");
499		return (0);
500	}
501	/*
502	 * We can do the fast path.
503	 */
504	IXDR_PUT_INT32(ptr, AUTH_UNIX);	/* cred flavor */
505	IXDR_PUT_INT32(ptr, credsize);	/* cred len */
506	IXDR_PUT_INT32(ptr, gethrestime_sec());
507	IXDR_PUT_INT32(ptr, hostnamelen);
508
509	bcopy(utsname.nodename, ptr, hostnamelen);
510	ptr += roundup(hostnamelen, 4) / 4;
511
512	IXDR_PUT_INT32(ptr, 0);		/* uid */
513	IXDR_PUT_INT32(ptr, 0);		/* gid */
514	IXDR_PUT_INT32(ptr, 0);		/* gid list length (empty) */
515	IXDR_PUT_INT32(ptr, AUTH_NULL);	/* verf flavor */
516	IXDR_PUT_INT32(ptr, 0);		/* verf len */
517
518	return (1);
519}
520