1/* 2 * linux/kernel/acct.c 3 * 4 * BSD Process Accounting for Linux 5 * 6 * Author: Marco van Wieringen <mvw@planets.elm.net> 7 * 8 * Some code based on ideas and code from: 9 * Thomas K. Dyas <tdyas@eden.rutgers.edu> 10 * 11 * This file implements BSD-style process accounting. Whenever any 12 * process exits, an accounting record of type "struct acct" is 13 * written to the file specified with the acct() system call. It is 14 * up to user-level programs to do useful things with the accounting 15 * log. The kernel just provides the raw accounting information. 16 * 17 * (C) Copyright 1995 - 1997 Marco van Wieringen - ELM Consultancy B.V. 18 * 19 * Plugged two leaks. 1) It didn't return acct_file into the free_filps if 20 * the file happened to be read-only. 2) If the accounting was suspended 21 * due to the lack of space it happily allowed to reopen it and completely 22 * lost the old acct_file. 3/10/98, Al Viro. 23 * 24 * Now we silently close acct_file on attempt to reopen. Cleaned sys_acct(). 25 * XTerms and EMACS are manifestations of pure evil. 21/10/98, AV. 26 * 27 * Fixed a nasty interaction with with sys_umount(). If the accointing 28 * was suspeneded we failed to stop it on umount(). Messy. 29 * Another one: remount to readonly didn't stop accounting. 30 * Question: what should we do if we have CAP_SYS_ADMIN but not 31 * CAP_SYS_PACCT? Current code does the following: umount returns -EBUSY 32 * unless we are messing with the root. In that case we are getting a 33 * real mess with do_remount_sb(). 9/11/98, AV. 34 * 35 * Fixed a bunch of races (and pair of leaks). Probably not the best way, 36 * but this one obviously doesn't introduce deadlocks. Later. BTW, found 37 * one race (and leak) in BSD implementation. 38 * OK, that's better. ANOTHER race and leak in BSD variant. There always 39 * is one more bug... 10/11/98, AV. 40 * 41 * Oh, fsck... Oopsable SMP race in do_process_acct() - we must hold 42 * ->mmap_sem to walk the vma list of current->mm. Nasty, since it leaks 43 * a struct file opened for write. Fixed. 2/6/2000, AV. 44 */ 45 46#include <linux/config.h> 47#include <linux/errno.h> 48#include <linux/kernel.h> 49 50#ifdef CONFIG_BSD_PROCESS_ACCT 51#include <linux/mm.h> 52#include <linux/slab.h> 53#include <linux/acct.h> 54#include <linux/smp_lock.h> 55#include <linux/file.h> 56#include <linux/tty.h> 57 58#include <asm/uaccess.h> 59 60/* 61 * These constants control the amount of freespace that suspend and 62 * resume the process accounting system, and the time delay between 63 * each check. 64 * Turned into sysctl-controllable parameters. AV, 12/11/98 65 */ 66 67int acct_parm[3] = {4, 2, 30}; 68#define RESUME (acct_parm[0]) /* >foo% free space - resume */ 69#define SUSPEND (acct_parm[1]) /* <foo% free space - suspend */ 70#define ACCT_TIMEOUT (acct_parm[2]) /* foo second timeout between checks */ 71 72/* 73 * External references and all of the globals. 74 */ 75 76static volatile int acct_active; 77static volatile int acct_needcheck; 78static struct file *acct_file; 79static struct timer_list acct_timer; 80static void do_acct_process(long, struct file *); 81 82/* 83 * Called whenever the timer says to check the free space. 84 */ 85static void acct_timeout(unsigned long unused) 86{ 87 acct_needcheck = 1; 88} 89 90/* 91 * Check the amount of free space and suspend/resume accordingly. 92 */ 93static int check_free_space(struct file *file) 94{ 95 struct statfs sbuf; 96 int res; 97 int act; 98 99 lock_kernel(); 100 res = acct_active; 101 if (!file || !acct_needcheck) 102 goto out; 103 unlock_kernel(); 104 105 /* May block */ 106 if (vfs_statfs(file->f_dentry->d_inode->i_sb, &sbuf)) 107 return res; 108 109 if (sbuf.f_bavail <= SUSPEND * sbuf.f_blocks / 100) 110 act = -1; 111 else if (sbuf.f_bavail >= RESUME * sbuf.f_blocks / 100) 112 act = 1; 113 else 114 act = 0; 115 116 /* 117 * If some joker switched acct_file under us we'ld better be 118 * silent and _not_ touch anything. 119 */ 120 lock_kernel(); 121 if (file != acct_file) { 122 if (act) 123 res = act>0; 124 goto out; 125 } 126 127 if (acct_active) { 128 if (act < 0) { 129 acct_active = 0; 130 printk(KERN_INFO "Process accounting paused\n"); 131 } 132 } else { 133 if (act > 0) { 134 acct_active = 1; 135 printk(KERN_INFO "Process accounting resumed\n"); 136 } 137 } 138 139 del_timer(&acct_timer); 140 acct_needcheck = 0; 141 acct_timer.expires = jiffies + ACCT_TIMEOUT*HZ; 142 add_timer(&acct_timer); 143 res = acct_active; 144out: 145 unlock_kernel(); 146 return res; 147} 148 149/* 150 * sys_acct() is the only system call needed to implement process 151 * accounting. It takes the name of the file where accounting records 152 * should be written. If the filename is NULL, accounting will be 153 * shutdown. 154 */ 155asmlinkage long sys_acct(const char *name) 156{ 157 struct file *file = NULL, *old_acct = NULL; 158 char *tmp; 159 int error; 160 161 if (!capable(CAP_SYS_PACCT)) 162 return -EPERM; 163 164 if (name) { 165 tmp = getname(name); 166 error = PTR_ERR(tmp); 167 if (IS_ERR(tmp)) 168 goto out; 169 /* Difference from BSD - they don't do O_APPEND */ 170 file = filp_open(tmp, O_WRONLY|O_APPEND, 0); 171 putname(tmp); 172 if (IS_ERR(file)) { 173 error = PTR_ERR(file); 174 goto out; 175 } 176 error = -EACCES; 177 if (!S_ISREG(file->f_dentry->d_inode->i_mode)) 178 goto out_err; 179 180 error = -EIO; 181 if (!file->f_op->write) 182 goto out_err; 183 } 184 185 error = 0; 186 lock_kernel(); 187 if (acct_file) { 188 old_acct = acct_file; 189 del_timer(&acct_timer); 190 acct_active = 0; 191 acct_needcheck = 0; 192 acct_file = NULL; 193 } 194 if (name) { 195 acct_file = file; 196 acct_needcheck = 0; 197 acct_active = 1; 198 /* It's been deleted if it was used before so this is safe */ 199 init_timer(&acct_timer); 200 acct_timer.function = acct_timeout; 201 acct_timer.expires = jiffies + ACCT_TIMEOUT*HZ; 202 add_timer(&acct_timer); 203 } 204 unlock_kernel(); 205 if (old_acct) { 206 do_acct_process(0,old_acct); 207 filp_close(old_acct, NULL); 208 } 209out: 210 return error; 211out_err: 212 filp_close(file, NULL); 213 goto out; 214} 215 216void acct_auto_close(kdev_t dev) 217{ 218 lock_kernel(); 219 if (acct_file && acct_file->f_dentry->d_inode->i_dev == dev) 220 sys_acct(NULL); 221 unlock_kernel(); 222} 223 224/* 225 * encode an unsigned long into a comp_t 226 * 227 * This routine has been adopted from the encode_comp_t() function in 228 * the kern_acct.c file of the FreeBSD operating system. The encoding 229 * is a 13-bit fraction with a 3-bit (base 8) exponent. 230 */ 231 232#define MANTSIZE 13 /* 13 bit mantissa. */ 233#define EXPSIZE 3 /* Base 8 (3 bit) exponent. */ 234#define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */ 235 236static comp_t encode_comp_t(unsigned long value) 237{ 238 int exp, rnd; 239 240 exp = rnd = 0; 241 while (value > MAXFRACT) { 242 rnd = value & (1 << (EXPSIZE - 1)); /* Round up? */ 243 value >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */ 244 exp++; 245 } 246 247 /* 248 * If we need to round up, do it (and handle overflow correctly). 249 */ 250 if (rnd && (++value > MAXFRACT)) { 251 value >>= EXPSIZE; 252 exp++; 253 } 254 255 /* 256 * Clean it up and polish it off. 257 */ 258 exp <<= MANTSIZE; /* Shift the exponent into place */ 259 exp += value; /* and add on the mantissa. */ 260 return exp; 261} 262 263/* 264 * Write an accounting entry for an exiting process 265 * 266 * The acct_process() call is the workhorse of the process 267 * accounting system. The struct acct is built here and then written 268 * into the accounting file. This function should only be called from 269 * do_exit(). 270 */ 271 272/* 273 * do_acct_process does all actual work. Caller holds the reference to file. 274 */ 275static void do_acct_process(long exitcode, struct file *file) 276{ 277 struct acct ac; 278 mm_segment_t fs; 279 unsigned long vsize; 280 unsigned long flim; 281 282 /* 283 * First check to see if there is enough free_space to continue 284 * the process accounting system. 285 */ 286 if (!check_free_space(file)) 287 return; 288 289 /* 290 * Fill the accounting struct with the needed info as recorded 291 * by the different kernel functions. 292 */ 293 memset((caddr_t)&ac, 0, sizeof(struct acct)); 294 295 strncpy(ac.ac_comm, current->comm, ACCT_COMM); 296 ac.ac_comm[ACCT_COMM - 1] = '\0'; 297 298 ac.ac_btime = CT_TO_SECS(current->start_time) + (xtime.tv_sec - (jiffies / HZ)); 299 ac.ac_etime = encode_comp_t(jiffies - current->start_time); 300 ac.ac_utime = encode_comp_t(current->times.tms_utime); 301 ac.ac_stime = encode_comp_t(current->times.tms_stime); 302 ac.ac_uid = current->uid; 303 ac.ac_gid = current->gid; 304 ac.ac_tty = (current->tty) ? kdev_t_to_nr(current->tty->device) : 0; 305 306 ac.ac_flag = 0; 307 if (current->flags & PF_FORKNOEXEC) 308 ac.ac_flag |= AFORK; 309 if (current->flags & PF_SUPERPRIV) 310 ac.ac_flag |= ASU; 311 if (current->flags & PF_DUMPCORE) 312 ac.ac_flag |= ACORE; 313 if (current->flags & PF_SIGNALED) 314 ac.ac_flag |= AXSIG; 315 316 vsize = 0; 317 if (current->mm) { 318 struct vm_area_struct *vma; 319 down_read(¤t->mm->mmap_sem); 320 vma = current->mm->mmap; 321 while (vma) { 322 vsize += vma->vm_end - vma->vm_start; 323 vma = vma->vm_next; 324 } 325 up_read(¤t->mm->mmap_sem); 326 } 327 vsize = vsize / 1024; 328 ac.ac_mem = encode_comp_t(vsize); 329 ac.ac_io = encode_comp_t(0 /* current->io_usage */); /* %% */ 330 ac.ac_rw = encode_comp_t(ac.ac_io / 1024); 331 ac.ac_minflt = encode_comp_t(current->min_flt); 332 ac.ac_majflt = encode_comp_t(current->maj_flt); 333 ac.ac_swaps = encode_comp_t(current->nswap); 334 ac.ac_exitcode = exitcode; 335 336 /* 337 * Kernel segment override to datasegment and write it 338 * to the accounting file. 339 */ 340 fs = get_fs(); 341 set_fs(KERNEL_DS); 342 /* 343 * Accounting records are not subject to resource limits. 344 */ 345 flim = current->rlim[RLIMIT_FSIZE].rlim_cur; 346 current->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; 347 file->f_op->write(file, (char *)&ac, 348 sizeof(struct acct), &file->f_pos); 349 current->rlim[RLIMIT_FSIZE].rlim_cur = flim; 350 set_fs(fs); 351} 352 353/* 354 * acct_process - now just a wrapper around do_acct_process 355 */ 356int acct_process(long exitcode) 357{ 358 struct file *file = NULL; 359 lock_kernel(); 360 if (acct_file) { 361 file = acct_file; 362 get_file(file); 363 unlock_kernel(); 364 do_acct_process(exitcode, file); 365 fput(file); 366 } else 367 unlock_kernel(); 368 return 0; 369} 370 371#else 372/* 373 * Dummy system call when BSD process accounting is not configured 374 * into the kernel. 375 */ 376 377asmlinkage long sys_acct(const char * filename) 378{ 379 return -ENOSYS; 380} 381#endif 382