1/*
2 *  linux/kernel/acct.c
3 *
4 *  BSD Process Accounting for Linux
5 *
6 *  Author: Marco van Wieringen <mvw@planets.elm.net>
7 *
8 *  Some code based on ideas and code from:
9 *  Thomas K. Dyas <tdyas@eden.rutgers.edu>
10 *
11 *  This file implements BSD-style process accounting. Whenever any
12 *  process exits, an accounting record of type "struct acct" is
13 *  written to the file specified with the acct() system call. It is
14 *  up to user-level programs to do useful things with the accounting
15 *  log. The kernel just provides the raw accounting information.
16 *
17 * (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V.
18 *
19 *  Plugged two leaks. 1) It didn't return acct_file into the free_filps if
20 *  the file happened to be read-only. 2) If the accounting was suspended
21 *  due to the lack of space it happily allowed to reopen it and completely
22 *  lost the old acct_file. 3/10/98, Al Viro.
23 *
24 *  Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
25 *  XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
26 *
27 *  Fixed a nasty interaction with with sys_umount(). If the accointing
28 *  was suspeneded we failed to stop it on umount(). Messy.
29 *  Another one: remount to readonly didn't stop accounting.
30 *	Question: what should we do if we have CAP_SYS_ADMIN but not
31 *  CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY
32 *  unless we are messing with the root. In that case we are getting a
33 *  real mess with do_remount_sb(). 9/11/98, AV.
34 *
35 *  Fixed a bunch of races (and pair of leaks). Probably not the best way,
36 *  but this one obviously doesn't introduce deadlocks. Later. BTW, found
37 *  one race (and leak) in BSD implementation.
38 *  OK, that's better. ANOTHER race and leak in BSD variant. There always
39 *  is one more bug... 10/11/98, AV.
40 *
41 *	Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold
42 * ->mmap_sem to walk the vma list of current->mm. Nasty, since it leaks
43 * a struct file opened for write. Fixed. 2/6/2000, AV.
44 */
45
46#include <linux/config.h>
47#include <linux/errno.h>
48#include <linux/kernel.h>
49
50#ifdef CONFIG_BSD_PROCESS_ACCT
51#include <linux/mm.h>
52#include <linux/slab.h>
53#include <linux/acct.h>
54#include <linux/smp_lock.h>
55#include <linux/file.h>
56#include <linux/tty.h>
57
58#include <asm/uaccess.h>
59
60/*
61 * These constants control the amount of freespace that suspend and
62 * resume the process accounting system, and the time delay between
63 * each check.
64 * Turned into sysctl-controllable parameters. AV, 12/11/98
65 */
66
67int acct_parm[3] = {4, 2, 30};
68#define RESUME		(acct_parm[0])	/* >foo% free space - resume */
69#define SUSPEND		(acct_parm[1])	/* <foo% free space - suspend */
70#define ACCT_TIMEOUT	(acct_parm[2])	/* foo second timeout between checks */
71
72/*
73 * External references and all of the globals.
74 */
75
76static volatile int acct_active;
77static volatile int acct_needcheck;
78static struct file *acct_file;
79static struct timer_list acct_timer;
80static void do_acct_process(long, struct file *);
81
82/*
83 * Called whenever the timer says to check the free space.
84 */
85static void acct_timeout(unsigned long unused)
86{
87	acct_needcheck = 1;
88}
89
90/*
91 * Check the amount of free space and suspend/resume accordingly.
92 */
93static int check_free_space(struct file *file)
94{
95	struct statfs sbuf;
96	int res;
97	int act;
98
99	lock_kernel();
100	res = acct_active;
101	if (!file || !acct_needcheck)
102		goto out;
103	unlock_kernel();
104
105	/* May block */
106	if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf))
107		return res;
108
109	if (sbuf.f_bavail <= SUSPEND * sbuf.f_blocks / 100)
110		act = -1;
111	else if (sbuf.f_bavail >= RESUME * sbuf.f_blocks / 100)
112		act = 1;
113	else
114		act = 0;
115
116	/*
117	 * If some joker switched acct_file under us we'ld better be
118	 * silent and _not_ touch anything.
119	 */
120	lock_kernel();
121	if (file != acct_file) {
122		if (act)
123			res = act>0;
124		goto out;
125	}
126
127	if (acct_active) {
128		if (act < 0) {
129			acct_active = 0;
130			printk(KERN_INFO "Process accounting paused\n");
131		}
132	} else {
133		if (act > 0) {
134			acct_active = 1;
135			printk(KERN_INFO "Process accounting resumed\n");
136		}
137	}
138
139	del_timer(&acct_timer);
140	acct_needcheck = 0;
141	acct_timer.expires = jiffies + ACCT_TIMEOUT*HZ;
142	add_timer(&acct_timer);
143	res = acct_active;
144out:
145	unlock_kernel();
146	return res;
147}
148
149/*
150 *  sys_acct() is the only system call needed to implement process
151 *  accounting. It takes the name of the file where accounting records
152 *  should be written. If the filename is NULL, accounting will be
153 *  shutdown.
154 */
155asmlinkage long sys_acct(const char *name)
156{
157	struct file *file = NULL, *old_acct = NULL;
158	char *tmp;
159	int error;
160
161	if (!capable(CAP_SYS_PACCT))
162		return -EPERM;
163
164	if (name) {
165		tmp = getname(name);
166		error = PTR_ERR(tmp);
167		if (IS_ERR(tmp))
168			goto out;
169		/* Difference from BSD - they don't do O_APPEND */
170		file = filp_open(tmp, O_WRONLY|O_APPEND, 0);
171		putname(tmp);
172		if (IS_ERR(file)) {
173			error = PTR_ERR(file);
174			goto out;
175		}
176		error = -EACCES;
177		if (!S_ISREG(file->f_dentry->d_inode->i_mode))
178			goto out_err;
179
180		error = -EIO;
181		if (!file->f_op->write)
182			goto out_err;
183	}
184
185	error = 0;
186	lock_kernel();
187	if (acct_file) {
188		old_acct = acct_file;
189		del_timer(&acct_timer);
190		acct_active = 0;
191		acct_needcheck = 0;
192		acct_file = NULL;
193	}
194	if (name) {
195		acct_file = file;
196		acct_needcheck = 0;
197		acct_active = 1;
198		/* It's been deleted if it was used before so this is safe */
199		init_timer(&acct_timer);
200		acct_timer.function = acct_timeout;
201		acct_timer.expires = jiffies + ACCT_TIMEOUT*HZ;
202		add_timer(&acct_timer);
203	}
204	unlock_kernel();
205	if (old_acct) {
206		do_acct_process(0,old_acct);
207		filp_close(old_acct, NULL);
208	}
209out:
210	return error;
211out_err:
212	filp_close(file, NULL);
213	goto out;
214}
215
216void acct_auto_close(kdev_t dev)
217{
218	lock_kernel();
219	if (acct_file && acct_file->f_dentry->d_inode->i_dev == dev)
220		sys_acct(NULL);
221	unlock_kernel();
222}
223
224/*
225 *  encode an unsigned long into a comp_t
226 *
227 *  This routine has been adopted from the encode_comp_t() function in
228 *  the kern_acct.c file of the FreeBSD operating system. The encoding
229 *  is a 13-bit fraction with a 3-bit (base 8) exponent.
230 */
231
232#define	MANTSIZE	13			/* 13 bit mantissa. */
233#define	EXPSIZE		3			/* Base 8 (3 bit) exponent. */
234#define	MAXFRACT	((1 << MANTSIZE) - 1)	/* Maximum fractional value. */
235
236static comp_t encode_comp_t(unsigned long value)
237{
238	int exp, rnd;
239
240	exp = rnd = 0;
241	while (value > MAXFRACT) {
242		rnd = value & (1 << (EXPSIZE - 1));	/* Round up? */
243		value >>= EXPSIZE;	/* Base 8 exponent == 3 bit shift. */
244		exp++;
245	}
246
247	/*
248         * If we need to round up, do it (and handle overflow correctly).
249         */
250	if (rnd && (++value > MAXFRACT)) {
251		value >>= EXPSIZE;
252		exp++;
253	}
254
255	/*
256         * Clean it up and polish it off.
257         */
258	exp <<= MANTSIZE;		/* Shift the exponent into place */
259	exp += value;			/* and add on the mantissa. */
260	return exp;
261}
262
263/*
264 *  Write an accounting entry for an exiting process
265 *
266 *  The acct_process() call is the workhorse of the process
267 *  accounting system. The struct acct is built here and then written
268 *  into the accounting file. This function should only be called from
269 *  do_exit().
270 */
271
272/*
273 *  do_acct_process does all actual work. Caller holds the reference to file.
274 */
275static void do_acct_process(long exitcode, struct file *file)
276{
277	struct acct ac;
278	mm_segment_t fs;
279	unsigned long vsize;
280	unsigned long flim;
281
282	/*
283	 * First check to see if there is enough free_space to continue
284	 * the process accounting system.
285	 */
286	if (!check_free_space(file))
287		return;
288
289	/*
290	 * Fill the accounting struct with the needed info as recorded
291	 * by the different kernel functions.
292	 */
293	memset((caddr_t)&ac, 0, sizeof(struct acct));
294
295	strncpy(ac.ac_comm, current->comm, ACCT_COMM);
296	ac.ac_comm[ACCT_COMM - 1] = '\0';
297
298	ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ));
299	ac.ac_etime = encode_comp_t(jiffies - current->start_time);
300	ac.ac_utime = encode_comp_t(current->times.tms_utime);
301	ac.ac_stime = encode_comp_t(current->times.tms_stime);
302	ac.ac_uid = current->uid;
303	ac.ac_gid = current->gid;
304	ac.ac_tty = (current->tty) ? kdev_t_to_nr(current->tty->device) : 0;
305
306	ac.ac_flag = 0;
307	if (current->flags & PF_FORKNOEXEC)
308		ac.ac_flag |= AFORK;
309	if (current->flags & PF_SUPERPRIV)
310		ac.ac_flag |= ASU;
311	if (current->flags & PF_DUMPCORE)
312		ac.ac_flag |= ACORE;
313	if (current->flags & PF_SIGNALED)
314		ac.ac_flag |= AXSIG;
315
316	vsize = 0;
317	if (current->mm) {
318		struct vm_area_struct *vma;
319		down_read(&current->mm->mmap_sem);
320		vma = current->mm->mmap;
321		while (vma) {
322			vsize += vma->vm_end - vma->vm_start;
323			vma = vma->vm_next;
324		}
325		up_read(&current->mm->mmap_sem);
326	}
327	vsize = vsize / 1024;
328	ac.ac_mem = encode_comp_t(vsize);
329	ac.ac_io = encode_comp_t(0 /* current->io_usage */);	/* %% */
330	ac.ac_rw = encode_comp_t(ac.ac_io / 1024);
331	ac.ac_minflt = encode_comp_t(current->min_flt);
332	ac.ac_majflt = encode_comp_t(current->maj_flt);
333	ac.ac_swaps = encode_comp_t(current->nswap);
334	ac.ac_exitcode = exitcode;
335
336	/*
337         * Kernel segment override to datasegment and write it
338         * to the accounting file.
339         */
340	fs = get_fs();
341	set_fs(KERNEL_DS);
342	/*
343 	 * Accounting records are not subject to resource limits.
344 	 */
345	flim = current->rlim[RLIMIT_FSIZE].rlim_cur;
346	current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
347	file->f_op->write(file, (char *)&ac,
348			       sizeof(struct acct), &file->f_pos);
349	current->rlim[RLIMIT_FSIZE].rlim_cur = flim;
350	set_fs(fs);
351}
352
353/*
354 * acct_process - now just a wrapper around do_acct_process
355 */
356int acct_process(long exitcode)
357{
358	struct file *file = NULL;
359	lock_kernel();
360	if (acct_file) {
361		file = acct_file;
362		get_file(file);
363		unlock_kernel();
364		do_acct_process(exitcode, file);
365		fput(file);
366	} else
367		unlock_kernel();
368	return 0;
369}
370
371#else
372/*
373 * Dummy system call when BSD process accounting is not configured
374 * into the kernel.
375 */
376
377asmlinkage long sys_acct(const char * filename)
378{
379	return -ENOSYS;
380}
381#endif
382