kern_acct.c revision 93593
1122394Sharti/*-
2122394Sharti * Copyright (c) 1994 Christopher G. Demetriou
3122394Sharti * Copyright (c) 1982, 1986, 1989, 1993
4122394Sharti *	The Regents of the University of California.  All rights reserved.
5163820Sharti * (c) UNIX System Laboratories, Inc.
6163820Sharti * All or some portions of this file are derived from material licensed
7163820Sharti * to the University of California by American Telephone and Telegraph
8122394Sharti * Co. or Unix System Laboratories, Inc. and are reproduced herein with
9122394Sharti * the permission of UNIX System Laboratories, Inc.
10133211Sharti *
11133211Sharti * Redistribution and use in source and binary forms, with or without
12133211Sharti * modification, are permitted provided that the following conditions
13133211Sharti * are met:
14133211Sharti * 1. Redistributions of source code must retain the above copyright
15133211Sharti *    notice, this list of conditions and the following disclaimer.
16122394Sharti * 2. Redistributions in binary form must reproduce the above copyright
17122394Sharti *    notice, this list of conditions and the following disclaimer in the
18122394Sharti *    documentation and/or other materials provided with the distribution.
19133211Sharti * 3. All advertising materials mentioning features or use of this software
20133211Sharti *    must display the following acknowledgement:
21133211Sharti *	This product includes software developed by the University of
22133211Sharti *	California, Berkeley and its contributors.
23133211Sharti * 4. Neither the name of the University nor the names of its contributors
24133211Sharti *    may be used to endorse or promote products derived from this software
25133211Sharti *    without specific prior written permission.
26133211Sharti *
27133211Sharti * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28133211Sharti * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29133211Sharti * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30133211Sharti * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31122394Sharti * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32163820Sharti * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33122394Sharti * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34122394Sharti * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35122394Sharti * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36122394Sharti * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37216294Ssyrinx * SUCH DAMAGE.
38122394Sharti *
39122394Sharti *	@(#)kern_acct.c	8.1 (Berkeley) 6/14/93
40163820Sharti * $FreeBSD: head/sys/kern/kern_acct.c 93593 2002-04-01 21:31:13Z jhb $
41122394Sharti */
42122394Sharti
43122394Sharti#include <sys/param.h>
44122394Sharti#include <sys/systm.h>
45122394Sharti#include <sys/lock.h>
46216294Ssyrinx#include <sys/mutex.h>
47122394Sharti#include <sys/sysproto.h>
48122394Sharti#include <sys/proc.h>
49122394Sharti#include <sys/mount.h>
50122394Sharti#include <sys/vnode.h>
51122394Sharti#include <sys/fcntl.h>
52122394Sharti#include <sys/syslog.h>
53122394Sharti#include <sys/kernel.h>
54122394Sharti#include <sys/sysent.h>
55122394Sharti#include <sys/sysctl.h>
56122394Sharti#include <sys/namei.h>
57163820Sharti#include <sys/acct.h>
58163820Sharti#include <sys/resourcevar.h>
59163820Sharti#include <sys/tty.h>
60163820Sharti
61163820Sharti/*
62122394Sharti * The routines implemented in this file are described in:
63122394Sharti *      Leffler, et al.: The Design and Implementation of the 4.3BSD
64122394Sharti *	    UNIX Operating System (Addison Welley, 1989)
65122394Sharti * on pages 62-63.
66122394Sharti *
67122394Sharti * Arguably, to simplify accounting operations, this mechanism should
68122394Sharti * be replaced by one in which an accounting log file (similar to /dev/klog)
69122394Sharti * is read by a user process, etc.  However, that has its own problems.
70122394Sharti */
71122394Sharti
72122394Sharti/*
73122394Sharti * Internal accounting functions.
74122394Sharti * The former's operation is described in Leffler, et al., and the latter
75122394Sharti * was provided by UCB with the 4.4BSD-Lite release
76122394Sharti */
77122394Shartistatic comp_t	encode_comp_t(u_long, u_long);
78122394Shartistatic void	acctwatch(void *);
79122394Sharti
80122394Sharti/*
81122394Sharti * Accounting callout used for periodic scheduling of acctwatch.
82122394Sharti */
83122394Shartistatic struct	callout acctwatch_callout;
84122394Sharti
85122394Sharti/*
86122394Sharti * Accounting vnode pointer, and saved vnode pointer.
87122394Sharti */
88122394Shartistatic struct	vnode *acctp;
89122394Shartistatic struct	vnode *savacctp;
90122394Sharti
91163820Sharti/*
92163820Sharti * Values associated with enabling and disabling accounting
93122394Sharti */
94163820Shartistatic int acctsuspend = 2;	/* stop accounting when < 2% free space left */
95163820ShartiSYSCTL_INT(_kern, OID_AUTO, acct_suspend, CTLFLAG_RW,
96163820Sharti	&acctsuspend, 0, "percentage of free disk space below which accounting stops");
97163820Sharti
98122394Shartistatic int acctresume = 4;	/* resume when free space risen to > 4% */
99163820ShartiSYSCTL_INT(_kern, OID_AUTO, acct_resume, CTLFLAG_RW,
100122394Sharti	&acctresume, 0, "percentage of free disk space above which accounting resumes");
101122394Sharti
102163820Shartistatic int acctchkfreq = 15;	/* frequency (in seconds) to check space */
103163820ShartiSYSCTL_INT(_kern, OID_AUTO, acct_chkfreq, CTLFLAG_RW,
104122394Sharti	&acctchkfreq, 0, "frequency for checking the free space");
105163820Sharti
106163820Sharti/*
107122394Sharti * Accounting system call.  Written based on the specification and
108163820Sharti * previous implementation done by Mark Tinguely.
109163820Sharti *
110163820Sharti * MPSAFE
111163820Sharti */
112122394Shartiint
113163820Shartiacct(td, uap)
114163820Sharti	struct thread *td;
115163820Sharti	struct acct_args /* {
116163820Sharti		syscallarg(char *) path;
117163820Sharti	} */ *uap;
118163820Sharti{
119122394Sharti	struct nameidata nd;
120163820Sharti	int error, flags;
121163820Sharti
122163820Sharti	mtx_lock(&Giant);
123163820Sharti	if (td != curthread)
124163820Sharti		panic("acct");		/* XXXKSE DIAGNOSTIC */
125163820Sharti	/* Make sure that the caller is root. */
126163820Sharti	error = suser(td);
127163820Sharti	if (error)
128163820Sharti		goto done2;
129163820Sharti
130163820Sharti	/*
131163820Sharti	 * If accounting is to be started to a file, open that file for
132163820Sharti	 * writing and make sure it's a 'normal'.
133163820Sharti	 */
134163820Sharti	if (SCARG(uap, path) != NULL) {
135163820Sharti		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path),
136163820Sharti		       td);
137163820Sharti		flags = FWRITE;
138163820Sharti		error = vn_open(&nd, &flags, 0);
139163820Sharti		if (error)
140163820Sharti			goto done2;
141122394Sharti		NDFREE(&nd, NDF_ONLY_PNBUF);
142122394Sharti		VOP_UNLOCK(nd.ni_vp, 0, td);
143122394Sharti		if (nd.ni_vp->v_type != VREG) {
144216294Ssyrinx			vn_close(nd.ni_vp, FWRITE, td->td_ucred, td);
145216294Ssyrinx			error = EACCES;
146216294Ssyrinx			goto done2;
147216294Ssyrinx		}
148216294Ssyrinx	}
149216294Ssyrinx
150216294Ssyrinx	/*
151122394Sharti	 * If accounting was previously enabled, kill the old space-watcher,
152216294Ssyrinx	 * close the file, and (if no new file was specified, leave).
153216294Ssyrinx	 */
154216294Ssyrinx	if (acctp != NULLVP || savacctp != NULLVP) {
155122394Sharti		callout_stop(&acctwatch_callout);
156216294Ssyrinx		error = vn_close((acctp != NULLVP ? acctp : savacctp), FWRITE,
157216294Ssyrinx		    td->td_ucred, td);
158216294Ssyrinx		acctp = savacctp = NULLVP;
159216294Ssyrinx	}
160216294Ssyrinx	if (SCARG(uap, path) == NULL)
161216294Ssyrinx		goto done2;
162216294Ssyrinx
163216294Ssyrinx	/*
164216294Ssyrinx	 * Save the new accounting file vnode, and schedule the new
165216294Ssyrinx	 * free space watcher.
166216294Ssyrinx	 */
167216294Ssyrinx	acctp = nd.ni_vp;
168216294Ssyrinx	callout_init(&acctwatch_callout, 0);
169216294Ssyrinx	acctwatch(NULL);
170216294Ssyrinxdone2:
171216294Ssyrinx	mtx_unlock(&Giant);
172216294Ssyrinx	return (error);
173216294Ssyrinx}
174216294Ssyrinx
175216294Ssyrinx/*
176216294Ssyrinx * Write out process accounting information, on process exit.
177216294Ssyrinx * Data to be written out is specified in Leffler, et al.
178216294Ssyrinx * and are enumerated below.  (They're also noted in the system
179216294Ssyrinx * "acct.h" header file.)
180216294Ssyrinx */
181216294Ssyrinx
182216294Ssyrinxint
183216294Ssyrinxacct_process(td)
184216294Ssyrinx	struct thread *td;
185216294Ssyrinx{
186216294Ssyrinx	struct proc *p = td->td_proc;
187216294Ssyrinx	struct acct acct;
188216294Ssyrinx	struct rusage *r;
189216294Ssyrinx	struct timeval ut, st, tmp;
190216294Ssyrinx	int t;
191216294Ssyrinx	struct vnode *vp;
192216294Ssyrinx
193216294Ssyrinx	/* If accounting isn't enabled, don't bother */
194216294Ssyrinx	vp = acctp;
195216294Ssyrinx	if (vp == NULLVP)
196216294Ssyrinx		return (0);
197216294Ssyrinx
198216294Ssyrinx	/*
199216294Ssyrinx	 * Get process accounting information.
200216294Ssyrinx	 */
201216294Ssyrinx
202216294Ssyrinx	/* (1) The name of the command that ran */
203216294Ssyrinx	bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm);
204216294Ssyrinx
205216294Ssyrinx	/* (2) The amount of user and system time that was used */
206216294Ssyrinx	mtx_lock_spin(&sched_lock);
207216294Ssyrinx	calcru(p, &ut, &st, NULL);
208216294Ssyrinx	mtx_unlock_spin(&sched_lock);
209216294Ssyrinx	acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_usec);
210216294Ssyrinx	acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_usec);
211216294Ssyrinx
212216294Ssyrinx	/* (3) The elapsed time the commmand ran (and its starting time) */
213216294Ssyrinx	acct.ac_btime = p->p_stats->p_start.tv_sec;
214216294Ssyrinx	microtime(&tmp);
215216294Ssyrinx	timevalsub(&tmp, &p->p_stats->p_start);
216216294Ssyrinx	acct.ac_etime = encode_comp_t(tmp.tv_sec, tmp.tv_usec);
217216294Ssyrinx
218216294Ssyrinx	/* (4) The average amount of memory used */
219216294Ssyrinx	r = &p->p_stats->p_ru;
220216294Ssyrinx	tmp = ut;
221216294Ssyrinx	timevaladd(&tmp, &st);
222122394Sharti	t = tmp.tv_sec * hz + tmp.tv_usec / tick;
223122394Sharti	if (t)
224122394Sharti		acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t;
225122394Sharti	else
226122394Sharti		acct.ac_mem = 0;
227122394Sharti
228122394Sharti	/* (5) The number of disk I/O operations done */
229122394Sharti	acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0);
230122394Sharti
231122394Sharti	/* (6) The UID and GID of the process */
232122394Sharti	acct.ac_uid = p->p_ucred->cr_ruid;
233122394Sharti	acct.ac_gid = p->p_ucred->cr_rgid;
234122394Sharti
235122394Sharti	/* (7) The terminal from which the process was started */
236122394Sharti	PROC_LOCK(p);
237122394Sharti	SESS_LOCK(p->p_session);
238122394Sharti	if ((p->p_flag & P_CONTROLT) && p->p_pgrp->pg_session->s_ttyp)
239122394Sharti		acct.ac_tty = dev2udev(p->p_pgrp->pg_session->s_ttyp->t_dev);
240122394Sharti	else
241122394Sharti		acct.ac_tty = NOUDEV;
242122394Sharti	SESS_UNLOCK(p->p_session);
243122394Sharti	PROC_UNLOCK(p);
244122394Sharti
245122394Sharti	/* (8) The boolean flags that tell how the process terminated, etc. */
246122394Sharti	acct.ac_flag = p->p_acflag;
247122394Sharti
248122394Sharti	/*
249122394Sharti	 * Eliminate any file size rlimit.
250122394Sharti	 */
251122394Sharti	if (p->p_limit->p_refcnt > 1 &&
252122394Sharti	    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
253122394Sharti		p->p_limit->p_refcnt--;
254122394Sharti		p->p_limit = limcopy(p->p_limit);
255122394Sharti	}
256122394Sharti	p->p_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
257122394Sharti
258122394Sharti	/*
259122394Sharti	 * Write the accounting information to the file.
260122394Sharti	 */
261122394Sharti	VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE);
262122394Sharti	return (vn_rdwr(UIO_WRITE, vp, (caddr_t)&acct, sizeof (acct),
263122394Sharti	    (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, td->td_ucred,
264122394Sharti	    (int *)0, td));
265122394Sharti}
266122394Sharti
267122394Sharti/*
268122394Sharti * Encode_comp_t converts from ticks in seconds and microseconds
269122394Sharti * to ticks in 1/AHZ seconds.  The encoding is described in
270122394Sharti * Leffler, et al., on page 63.
271122394Sharti */
272122394Sharti
273122394Sharti#define	MANTSIZE	13			/* 13 bit mantissa. */
274122394Sharti#define	EXPSIZE		3			/* Base 8 (3 bit) exponent. */
275122394Sharti#define	MAXFRACT	((1 << MANTSIZE) - 1)	/* Maximum fractional value. */
276122394Sharti
277122394Shartistatic comp_t
278122394Shartiencode_comp_t(s, us)
279122394Sharti	u_long s, us;
280122394Sharti{
281122394Sharti	int exp, rnd;
282122394Sharti
283122394Sharti	exp = 0;
284122394Sharti	rnd = 0;
285122394Sharti	s *= AHZ;
286122394Sharti	s += us / (1000000 / AHZ);	/* Maximize precision. */
287122394Sharti
288122394Sharti	while (s > MAXFRACT) {
289122394Sharti	rnd = s & (1 << (EXPSIZE - 1));	/* Round up? */
290122394Sharti		s >>= EXPSIZE;		/* Base 8 exponent == 3 bit shift. */
291122394Sharti		exp++;
292122394Sharti	}
293122394Sharti
294122394Sharti	/* If we need to round up, do it (and handle overflow correctly). */
295122394Sharti	if (rnd && (++s > MAXFRACT)) {
296122394Sharti		s >>= EXPSIZE;
297122394Sharti		exp++;
298122394Sharti	}
299122394Sharti
300122394Sharti	/* Clean it up and polish it off. */
301122394Sharti	exp <<= MANTSIZE;		/* Shift the exponent into place */
302122394Sharti	exp += s;			/* and add on the mantissa. */
303122394Sharti	return (exp);
304122394Sharti}
305122394Sharti
306122394Sharti/*
307122394Sharti * Periodically check the file system to see if accounting
308122394Sharti * should be turned on or off.  Beware the case where the vnode
309122394Sharti * has been vgone()'d out from underneath us, e.g. when the file
310122394Sharti * system containing the accounting file has been forcibly unmounted.
311122394Sharti */
312122394Sharti/* ARGSUSED */
313122394Shartistatic void
314122394Shartiacctwatch(a)
315122394Sharti	void *a;
316122394Sharti{
317122394Sharti	struct statfs sb;
318122394Sharti
319122394Sharti	if (savacctp != NULLVP) {
320122394Sharti		if (savacctp->v_type == VBAD) {
321122394Sharti			(void) vn_close(savacctp, FWRITE, NOCRED, NULL);
322122394Sharti			savacctp = NULLVP;
323122394Sharti			return;
324122394Sharti		}
325122394Sharti		(void)VFS_STATFS(savacctp->v_mount, &sb, (struct thread *)0);
326122394Sharti		if (sb.f_bavail > acctresume * sb.f_blocks / 100) {
327122394Sharti			acctp = savacctp;
328122394Sharti			savacctp = NULLVP;
329122394Sharti			log(LOG_NOTICE, "Accounting resumed\n");
330122394Sharti		}
331122394Sharti	} else {
332122394Sharti		if (acctp == NULLVP)
333122394Sharti			return;
334122394Sharti		if (acctp->v_type == VBAD) {
335122394Sharti			(void) vn_close(acctp, FWRITE, NOCRED, NULL);
336122394Sharti			acctp = NULLVP;
337122394Sharti			return;
338122394Sharti		}
339122394Sharti		(void)VFS_STATFS(acctp->v_mount, &sb, (struct thread *)0);
340122394Sharti		if (sb.f_bavail <= acctsuspend * sb.f_blocks / 100) {
341122394Sharti			savacctp = acctp;
342122394Sharti			acctp = NULLVP;
343122394Sharti			log(LOG_NOTICE, "Accounting suspended\n");
344122394Sharti		}
345122394Sharti	}
346122394Sharti	callout_reset(&acctwatch_callout, acctchkfreq * hz, acctwatch, NULL);
347122394Sharti}
348122394Sharti