uadmin.c revision 9160:1517e6edbc6f
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27
28#include <sys/param.h>
29#include <sys/types.h>
30#include <sys/sysmacros.h>
31#include <sys/systm.h>
32#include <sys/errno.h>
33#include <sys/vfs.h>
34#include <sys/vnode.h>
35#include <sys/swap.h>
36#include <sys/file.h>
37#include <sys/proc.h>
38#include <sys/var.h>
39#include <sys/uadmin.h>
40#include <sys/signal.h>
41#include <sys/time.h>
42#include <vm/seg_kmem.h>
43#include <sys/modctl.h>
44#include <sys/callb.h>
45#include <sys/dumphdr.h>
46#include <sys/debug.h>
47#include <sys/ftrace.h>
48#include <sys/cmn_err.h>
49#include <sys/panic.h>
50#include <sys/ddi.h>
51#include <sys/sunddi.h>
52#include <sys/policy.h>
53#include <sys/zone.h>
54#include <sys/condvar.h>
55#include <sys/thread.h>
56#include <sys/sdt.h>
57
58/*
59 * Administrivia system call.  We provide this in two flavors: one for calling
60 * from the system call path (uadmin), and the other for calling from elsewhere
61 * within the kernel (kadmin).  Callers must beware that certain uadmin cmd
62 * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin.
63 */
64
65extern ksema_t fsflush_sema;
66kmutex_t ualock;
67kcondvar_t uacond;
68kthread_t *ua_shutdown_thread = NULL;
69
70int sys_shutdown = 0;
71volatile int fastreboot_dryrun = 0;
72
73/*
74 * Kill all user processes in said zone.  A special argument of ALL_ZONES is
75 * passed in when the system as a whole is shutting down.  The lack of per-zone
76 * process lists is likely to make the following a performance bottleneck on a
77 * system with many zones.
78 */
79void
80killall(zoneid_t zoneid)
81{
82	proc_t *p;
83
84	ASSERT(zoneid != GLOBAL_ZONEID);
85	/*
86	 * Kill all processes except kernel daemons and ourself.
87	 * Make a first pass to stop all processes so they won't
88	 * be trying to restart children as we kill them.
89	 */
90	mutex_enter(&pidlock);
91	for (p = practive; p != NULL; p = p->p_next) {
92		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
93		    p->p_exec != NULLVP &&	/* kernel daemons */
94		    p->p_as != &kas &&
95		    p->p_stat != SZOMB) {
96			mutex_enter(&p->p_lock);
97			p->p_flag |= SNOWAIT;
98			sigtoproc(p, NULL, SIGSTOP);
99			mutex_exit(&p->p_lock);
100		}
101	}
102	p = practive;
103	while (p != NULL) {
104		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
105		    p->p_exec != NULLVP &&	/* kernel daemons */
106		    p->p_as != &kas &&
107		    p->p_stat != SIDL &&
108		    p->p_stat != SZOMB) {
109			mutex_enter(&p->p_lock);
110			if (sigismember(&p->p_sig, SIGKILL)) {
111				mutex_exit(&p->p_lock);
112				p = p->p_next;
113			} else {
114				sigtoproc(p, NULL, SIGKILL);
115				mutex_exit(&p->p_lock);
116				(void) cv_timedwait(&p->p_srwchan_cv, &pidlock,
117				    lbolt + hz);
118				p = practive;
119			}
120		} else {
121			p = p->p_next;
122		}
123	}
124	mutex_exit(&pidlock);
125}
126
127int
128kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
129{
130	int error = 0;
131	char *buf;
132	size_t buflen = 0;
133	boolean_t invoke_cb = B_FALSE;
134
135	/*
136	 * We might be called directly by the kernel's fault-handling code, so
137	 * we can't assert that the caller is in the global zone.
138	 */
139
140	/*
141	 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes
142	 * and that we have appropriate privileges for this action.
143	 */
144	switch (cmd) {
145	case A_FTRACE:
146	case A_SHUTDOWN:
147	case A_REBOOT:
148	case A_REMOUNT:
149	case A_FREEZE:
150	case A_DUMP:
151	case A_SDTTEST:
152	case A_CONFIG:
153		if (secpolicy_sys_config(credp, B_FALSE) != 0)
154			return (EPERM);
155		break;
156
157	default:
158		return (EINVAL);
159	}
160
161	/*
162	 * Serialize these operations on ualock.  If it is held, the
163	 * system should shutdown, reboot, or remount shortly, unless there is
164	 * an error.  We need a cv rather than just a mutex because proper
165	 * functioning of A_REBOOT relies on being able to interrupt blocked
166	 * userland callers.
167	 *
168	 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG.
169	 * Other commands should never return.
170	 */
171	if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT ||
172	    cmd == A_CONFIG) {
173		mutex_enter(&ualock);
174		while (ua_shutdown_thread != NULL) {
175			if (cv_wait_sig(&uacond, &ualock) == 0) {
176				/*
177				 * If we were interrupted, leave, and handle
178				 * the signal (or exit, depending on what
179				 * happened)
180				 */
181				mutex_exit(&ualock);
182				return (EINTR);
183			}
184		}
185		ua_shutdown_thread = curthread;
186		mutex_exit(&ualock);
187	}
188
189	switch (cmd) {
190	case A_SHUTDOWN:
191	{
192		proc_t *p = ttoproc(curthread);
193
194		/*
195		 * Release (almost) all of our own resources if we are called
196		 * from a user context, however if we are calling kadmin() from
197		 * a kernel context then we do not release these resources.
198		 */
199		if (p != &p0) {
200			proc_is_exiting(p);
201			if ((error = exitlwps(0)) != 0) {
202				/*
203				 * Another thread in this process also called
204				 * exitlwps().
205				 */
206				mutex_enter(&ualock);
207				ua_shutdown_thread = NULL;
208				cv_signal(&uacond);
209				mutex_exit(&ualock);
210				return (error);
211			}
212			mutex_enter(&p->p_lock);
213			p->p_flag |= SNOWAIT;
214			sigfillset(&p->p_ignore);
215			curthread->t_lwp->lwp_cursig = 0;
216			curthread->t_lwp->lwp_extsig = 0;
217			if (p->p_exec) {
218				vnode_t *exec_vp = p->p_exec;
219				p->p_exec = NULLVP;
220				mutex_exit(&p->p_lock);
221				VN_RELE(exec_vp);
222			} else {
223				mutex_exit(&p->p_lock);
224			}
225
226			pollcleanup();
227			closeall(P_FINFO(curproc));
228			relvm();
229
230		} else {
231			/*
232			 * Reset t_cred if not set because much of the
233			 * filesystem code depends on CRED() being valid.
234			 */
235			if (curthread->t_cred == NULL)
236				curthread->t_cred = kcred;
237		}
238
239		/* indicate shutdown in progress */
240		sys_shutdown = 1;
241
242		/*
243		 * Communcate that init shouldn't be restarted.
244		 */
245		zone_shutdown_global();
246
247		killall(ALL_ZONES);
248		/*
249		 * If we are calling kadmin() from a kernel context then we
250		 * do not release these resources.
251		 */
252		if (ttoproc(curthread) != &p0) {
253			VN_RELE(PTOU(curproc)->u_cdir);
254			if (PTOU(curproc)->u_rdir)
255				VN_RELE(PTOU(curproc)->u_rdir);
256			if (PTOU(curproc)->u_cwd)
257				refstr_rele(PTOU(curproc)->u_cwd);
258
259			PTOU(curproc)->u_cdir = rootdir;
260			PTOU(curproc)->u_rdir = NULL;
261			PTOU(curproc)->u_cwd = NULL;
262		}
263
264		/*
265		 * Allow the reboot/halt/poweroff code a chance to do
266		 * anything it needs to whilst we still have filesystems
267		 * mounted, like loading any modules necessary for later
268		 * performing the actual poweroff.
269		 */
270		if ((mdep != NULL) && (*(char *)mdep == '/')) {
271			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
272			mdpreboot(cmd, fcn, buf);
273		} else
274			mdpreboot(cmd, fcn, mdep);
275
276		/*
277		 * Allow fsflush to finish running and then prevent it
278		 * from ever running again so that vfs_unmountall() and
279		 * vfs_syncall() can acquire the vfs locks they need.
280		 */
281		sema_p(&fsflush_sema);
282		(void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL);
283
284		vfs_unmountall();
285		(void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT);
286		vfs_syncall();
287
288		dump_ereports();
289		dump_messages();
290
291		invoke_cb = B_TRUE;
292
293		/* FALLTHROUGH */
294	}
295
296	case A_REBOOT:
297		if ((mdep != NULL) && (*(char *)mdep == '/')) {
298			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
299			mdboot(cmd, fcn, buf, invoke_cb);
300		} else
301			mdboot(cmd, fcn, mdep, invoke_cb);
302		/* no return expected */
303		break;
304
305	case A_CONFIG:
306		switch (fcn) {
307		case AD_UPDATE_BOOT_CONFIG:
308#ifndef	__sparc
309		{
310			extern int fastreboot_capable;
311			extern void fastboot_update_config(const char *);
312
313			if (fastreboot_capable)
314				fastboot_update_config(mdep);
315		}
316#endif
317
318			break;
319		}
320		/* Let other threads enter the shutdown path now */
321		mutex_enter(&ualock);
322		ua_shutdown_thread = NULL;
323		cv_signal(&uacond);
324		mutex_exit(&ualock);
325		break;
326
327	case A_REMOUNT:
328		(void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT);
329		/* Let other threads enter the shutdown path now */
330		mutex_enter(&ualock);
331		ua_shutdown_thread = NULL;
332		cv_signal(&uacond);
333		mutex_exit(&ualock);
334		break;
335
336	case A_FREEZE:
337	{
338		/*
339		 * This is the entrypoint for all suspend/resume actions.
340		 */
341		extern int cpr(int, void *);
342
343		if (modload("misc", "cpr") == -1)
344			return (ENOTSUP);
345		/* Let the CPR module decide what to do with mdep */
346		error = cpr(fcn, mdep);
347		break;
348	}
349
350	case A_FTRACE:
351	{
352		switch (fcn) {
353		case AD_FTRACE_START:
354			(void) FTRACE_START();
355			break;
356		case AD_FTRACE_STOP:
357			(void) FTRACE_STOP();
358			break;
359		default:
360			error = EINVAL;
361		}
362		break;
363	}
364
365	case A_DUMP:
366	{
367		if (fcn == AD_NOSYNC) {
368			in_sync = 1;
369			break;
370		}
371
372		panic_bootfcn = fcn;
373		panic_forced = 1;
374
375		if ((mdep != NULL) && (*(char *)mdep == '/')) {
376			panic_bootstr = i_convert_boot_device_name(mdep,
377			    NULL, &buflen);
378		} else
379			panic_bootstr = mdep;
380
381#ifndef	__sparc
382		extern int fastreboot_onpanic;
383		if (fcn != AD_FASTREBOOT) {
384			extern void fastboot_update_config(const char *);
385			/*
386			 * If user has explicitly requested reboot to prom,
387			 * or uadmin(1M) was invoked with other functions,
388			 * don't try to fast reboot after dumping.
389			 */
390			fastreboot_onpanic = 0;
391			fastboot_update_config((char *)&fastreboot_onpanic);
392		}
393
394		if (fastreboot_onpanic) {
395			extern void fastboot_load_kernel(char *);
396			fastboot_load_kernel(mdep);
397		}
398#endif
399
400		panic("forced crash dump initiated at user request");
401		/*NOTREACHED*/
402	}
403
404	case A_SDTTEST:
405	{
406		DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5,
407		    int, 6, int, 7);
408		break;
409	}
410
411	default:
412		error = EINVAL;
413	}
414
415	return (error);
416}
417
418int
419uadmin(int cmd, int fcn, uintptr_t mdep)
420{
421	int error = 0, rv = 0;
422	size_t nbytes = 0;
423	cred_t *credp = CRED();
424	char *bootargs = NULL;
425	int reset_status = 0;
426
427	if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) {
428		ddi_walk_devs(ddi_root_node(), check_driver_quiesce,
429		    &reset_status);
430		if (reset_status != 0)
431			return (EIO);
432		else
433			return (0);
434	}
435
436	/*
437	 * The swapctl system call doesn't have its own entry point: it uses
438	 * uadmin as a wrapper so we just call it directly from here.
439	 */
440	if (cmd == A_SWAPCTL) {
441		if (get_udatamodel() == DATAMODEL_NATIVE)
442			error = swapctl(fcn, (void *)mdep, &rv);
443#if defined(_SYSCALL32_IMPL)
444		else
445			error = swapctl32(fcn, (void *)mdep, &rv);
446#endif /* _SYSCALL32_IMPL */
447		return (error ? set_errno(error) : rv);
448	}
449
450	/*
451	 * Certain subcommands intepret a non-NULL mdep value as a pointer to
452	 * a boot string.  We pull that in as bootargs, if applicable.
453	 */
454	if (mdep != NULL &&
455	    (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP ||
456	    cmd == A_FREEZE || cmd == A_CONFIG)) {
457		bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
458		if ((error = copyinstr((const char *)mdep, bootargs,
459		    BOOTARGS_MAX, &nbytes)) != 0) {
460			kmem_free(bootargs, BOOTARGS_MAX);
461			return (set_errno(error));
462		}
463	}
464
465	/*
466	 * Invoke the appropriate kadmin() routine.
467	 */
468	if (getzoneid() != GLOBAL_ZONEID)
469		error = zone_kadmin(cmd, fcn, bootargs, credp);
470	else
471		error = kadmin(cmd, fcn, bootargs, credp);
472
473	if (bootargs != NULL)
474		kmem_free(bootargs, BOOTARGS_MAX);
475	return (error ? set_errno(error) : 0);
476}
477