11541Srgrimes/*-
21541Srgrimes * Copyright (c) 1982, 1986, 1989, 1993
31541Srgrimes *	The Regents of the University of California.  All rights reserved.
41541Srgrimes * (c) UNIX System Laboratories, Inc.
5165896Srwatson * Copyright (c) 2005 Robert N. M. Watson
6165896Srwatson * All rights reserved.
7165896Srwatson *
81541Srgrimes * All or some portions of this file are derived from material licensed
91541Srgrimes * to the University of California by American Telephone and Telegraph
101541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with
111541Srgrimes * the permission of UNIX System Laboratories, Inc.
121541Srgrimes *
13165896Srwatson * Redistribution and use in source and binary forms, with or without
14165896Srwatson * modification, are permitted provided that the following conditions
15165896Srwatson * are met:
16165896Srwatson * 1. Redistributions of source code must retain the above copyright
17165896Srwatson *    notice, this list of conditions and the following disclaimer.
18165896Srwatson * 2. Redistributions in binary form must reproduce the above copyright
19165896Srwatson *    notice, this list of conditions and the following disclaimer in the
20165896Srwatson *    documentation and/or other materials provided with the distribution.
21165896Srwatson * 4. Neither the name of the University nor the names of its contributors
22165896Srwatson *    may be used to endorse or promote products derived from this software
23165896Srwatson *    without specific prior written permission.
24165896Srwatson *
25165896Srwatson * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26165896Srwatson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27165896Srwatson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28165896Srwatson * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29165896Srwatson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30165896Srwatson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31165896Srwatson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32165896Srwatson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33165896Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34165896Srwatson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35165896Srwatson * SUCH DAMAGE.
36165896Srwatson *
37152328Srwatson * Copyright (c) 1994 Christopher G. Demetriou
38152328Srwatson *
391541Srgrimes * Redistribution and use in source and binary forms, with or without
401541Srgrimes * modification, are permitted provided that the following conditions
411541Srgrimes * are met:
421541Srgrimes * 1. Redistributions of source code must retain the above copyright
431541Srgrimes *    notice, this list of conditions and the following disclaimer.
441541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
451541Srgrimes *    notice, this list of conditions and the following disclaimer in the
461541Srgrimes *    documentation and/or other materials provided with the distribution.
471541Srgrimes * 3. All advertising materials mentioning features or use of this software
481541Srgrimes *    must display the following acknowledgement:
491541Srgrimes *	This product includes software developed by the University of
501541Srgrimes *	California, Berkeley and its contributors.
511541Srgrimes * 4. Neither the name of the University nor the names of its contributors
521541Srgrimes *    may be used to endorse or promote products derived from this software
531541Srgrimes *    without specific prior written permission.
541541Srgrimes *
551541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
561541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
571541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
581541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
591541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
601541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
611541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
621541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
631541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
641541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
651541Srgrimes * SUCH DAMAGE.
661541Srgrimes *
673124Sdg *	@(#)kern_acct.c	8.1 (Berkeley) 6/14/93
681541Srgrimes */
691541Srgrimes
70116182Sobrien#include <sys/cdefs.h>
71116182Sobrien__FBSDID("$FreeBSD$");
72116182Sobrien
731541Srgrimes#include <sys/param.h>
742807Sbde#include <sys/systm.h>
75155262Sjhb#include <sys/acct.h>
76155262Sjhb#include <sys/fcntl.h>
77155262Sjhb#include <sys/kernel.h>
78155431Sjhb#include <sys/kthread.h>
79172023Sdds#include <sys/limits.h>
8076166Smarkm#include <sys/lock.h>
81155262Sjhb#include <sys/mount.h>
8276166Smarkm#include <sys/mutex.h>
83155262Sjhb#include <sys/namei.h>
84164033Srwatson#include <sys/priv.h>
851541Srgrimes#include <sys/proc.h>
86155262Sjhb#include <sys/resourcevar.h>
87155431Sjhb#include <sys/sched.h>
88152328Srwatson#include <sys/sx.h>
89155262Sjhb#include <sys/sysctl.h>
9022521Sdyson#include <sys/sysent.h>
91155262Sjhb#include <sys/syslog.h>
92155262Sjhb#include <sys/sysproto.h>
933124Sdg#include <sys/tty.h>
94155262Sjhb#include <sys/vnode.h>
951541Srgrimes
96163606Srwatson#include <security/mac/mac_framework.h>
97163606Srwatson
983124Sdg/*
993124Sdg * The routines implemented in this file are described in:
1003124Sdg *      Leffler, et al.: The Design and Implementation of the 4.3BSD
1013124Sdg *	    UNIX Operating System (Addison Welley, 1989)
1023124Sdg * on pages 62-63.
103169857Sdds * On May 2007 the historic 3 bits base 8 exponent, 13 bit fraction
104169857Sdds * compt_t representation described in the above reference was replaced
105169857Sdds * with that of IEEE-754 floats.
1063124Sdg *
1073124Sdg * Arguably, to simplify accounting operations, this mechanism should
1083124Sdg * be replaced by one in which an accounting log file (similar to /dev/klog)
1093124Sdg * is read by a user process, etc.  However, that has its own problems.
1103124Sdg */
1113124Sdg
112169857Sdds/* Floating point definitions from <float.h>. */
113169857Sdds#define FLT_MANT_DIG    24              /* p */
114169857Sdds#define FLT_MAX_EXP     128             /* emax */
115169857Sdds
1163124Sdg/*
1173124Sdg * Internal accounting functions.
1183124Sdg * The former's operation is described in Leffler, et al., and the latter
1193124Sdg * was provided by UCB with the 4.4BSD-Lite release
1203124Sdg */
121169857Sddsstatic uint32_t	encode_timeval(struct timeval);
122169857Sddsstatic uint32_t	encode_long(long);
123155431Sjhbstatic void	acctwatch(void);
124155431Sjhbstatic void	acct_thread(void *);
125247321Sjhbstatic int	acct_disable(struct thread *, int);
1263124Sdg
1273124Sdg/*
128100444Sjohan * Accounting vnode pointer, saved vnode pointer, and flags for each.
129152328Srwatson * acct_sx protects against changes to the active vnode and credentials
130152328Srwatson * while accounting records are being committed to disk.
1313124Sdg */
132162370Srwatsonstatic int		 acct_configured;
133152328Srwatsonstatic int		 acct_suspended;
134152328Srwatsonstatic struct vnode	*acct_vp;
135152328Srwatsonstatic struct ucred	*acct_cred;
136252886Smjgstatic struct plimit	*acct_limit;
137152328Srwatsonstatic int		 acct_flags;
138152328Srwatsonstatic struct sx	 acct_sx;
1393124Sdg
140152328SrwatsonSX_SYSINIT(acct, &acct_sx, "acct_sx");
141103208Sarr
1423124Sdg/*
143155431Sjhb * State of the accounting kthread.
144155431Sjhb */
145155431Sjhbstatic int		 acct_state;
146155431Sjhb
147155431Sjhb#define	ACCT_RUNNING	1	/* Accounting kthread is running. */
148155431Sjhb#define	ACCT_EXITREQ	2	/* Accounting kthread should exit. */
149155431Sjhb
150155431Sjhb/*
1513124Sdg * Values associated with enabling and disabling accounting
1523124Sdg */
15312819Sphkstatic int acctsuspend = 2;	/* stop accounting when < 2% free space left */
15412819SphkSYSCTL_INT(_kern, OID_AUTO, acct_suspend, CTLFLAG_RW,
15562119Snbm	&acctsuspend, 0, "percentage of free disk space below which accounting stops");
1563124Sdg
15712819Sphkstatic int acctresume = 4;	/* resume when free space risen to > 4% */
15812819SphkSYSCTL_INT(_kern, OID_AUTO, acct_resume, CTLFLAG_RW,
15962119Snbm	&acctresume, 0, "percentage of free disk space above which accounting resumes");
16012819Sphk
16112819Sphkstatic int acctchkfreq = 15;	/* frequency (in seconds) to check space */
16212819Sphk
163155438Sjhbstatic int
164155438Sjhbsysctl_acct_chkfreq(SYSCTL_HANDLER_ARGS)
165155438Sjhb{
166155438Sjhb	int error, value;
167155438Sjhb
168155438Sjhb	/* Write out the old value. */
169155438Sjhb	error = SYSCTL_OUT(req, &acctchkfreq, sizeof(int));
170155438Sjhb	if (error || req->newptr == NULL)
171155438Sjhb		return (error);
172155438Sjhb
173155438Sjhb	/* Read in and verify the new value. */
174155438Sjhb	error = SYSCTL_IN(req, &value, sizeof(int));
175155438Sjhb	if (error)
176155438Sjhb		return (error);
177155438Sjhb	if (value <= 0)
178155438Sjhb		return (EINVAL);
179155438Sjhb	acctchkfreq = value;
180155438Sjhb	return (0);
181155438Sjhb}
182155438SjhbSYSCTL_PROC(_kern, OID_AUTO, acct_chkfreq, CTLTYPE_INT|CTLFLAG_RW,
183155438Sjhb    &acctchkfreq, 0, sysctl_acct_chkfreq, "I",
184155438Sjhb    "frequency for checking the free space");
185155438Sjhb
186162370SrwatsonSYSCTL_INT(_kern, OID_AUTO, acct_configured, CTLFLAG_RD, &acct_configured, 0,
187162370Srwatson	"Accounting configured or not");
188162370Srwatson
189152328SrwatsonSYSCTL_INT(_kern, OID_AUTO, acct_suspended, CTLFLAG_RD, &acct_suspended, 0,
190152328Srwatson	"Accounting suspended or not");
191152328Srwatson
1923124Sdg/*
193167211Srwatson * Accounting system call.  Written based on the specification and previous
194167211Srwatson * implementation done by Mark Tinguely.
1953124Sdg */
1961549Srgrimesint
197225617Skmacysys_acct(struct thread *td, struct acct_args *uap)
1981541Srgrimes{
1993124Sdg	struct nameidata nd;
200252886Smjg	int error, flags, i, vfslocked, replacing;
2013124Sdg
202164033Srwatson	error = priv_check(td, PRIV_ACCT);
2033308Sphk	if (error)
20494301Sjhb		return (error);
2053124Sdg
2061541Srgrimes	/*
2073124Sdg	 * If accounting is to be started to a file, open that file for
208157232Sjhb	 * appending and make sure it's a 'normal'.
2091541Srgrimes	 */
210107849Salfred	if (uap->path != NULL) {
211159258Srwatson		NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE | AUDITVNODE1,
212159258Srwatson		    UIO_USERSPACE, uap->path, td);
21399740Sjohan		flags = FWRITE | O_APPEND;
214170152Skib		error = vn_open(&nd, &flags, 0, NULL);
2153308Sphk		if (error)
216157232Sjhb			return (error);
217157232Sjhb		vfslocked = NDHASGIANT(&nd);
21854655Seivind		NDFREE(&nd, NDF_ONLY_PNBUF);
219106412Srwatson#ifdef MAC
220172930Srwatson		error = mac_system_check_acct(td->td_ucred, nd.ni_vp);
221106412Srwatson		if (error) {
222175294Sattilio			VOP_UNLOCK(nd.ni_vp, 0);
223106412Srwatson			vn_close(nd.ni_vp, flags, td->td_ucred, td);
224157232Sjhb			VFS_UNLOCK_GIANT(vfslocked);
225157232Sjhb			return (error);
226106412Srwatson		}
227106412Srwatson#endif
228175294Sattilio		VOP_UNLOCK(nd.ni_vp, 0);
2293124Sdg		if (nd.ni_vp->v_type != VREG) {
230100444Sjohan			vn_close(nd.ni_vp, flags, td->td_ucred, td);
231157232Sjhb			VFS_UNLOCK_GIANT(vfslocked);
232157232Sjhb			return (EACCES);
2333124Sdg		}
234157232Sjhb		VFS_UNLOCK_GIANT(vfslocked);
235106412Srwatson#ifdef MAC
236106412Srwatson	} else {
237172930Srwatson		error = mac_system_check_acct(td->td_ucred, NULL);
238106412Srwatson		if (error)
239157232Sjhb			return (error);
240106412Srwatson#endif
2413124Sdg	}
2421541Srgrimes
243152328Srwatson	/*
244152328Srwatson	 * Disallow concurrent access to the accounting vnode while we swap
245152328Srwatson	 * it out, in order to prevent access after close.
246152328Srwatson	 */
247152328Srwatson	sx_xlock(&acct_sx);
248126586Sbde
2493124Sdg	/*
250247321Sjhb	 * Don't log spurious disable/enable messages if we are
251247321Sjhb	 * switching from one accounting file to another due to log
252247321Sjhb	 * rotation.
253247321Sjhb	 */
254247321Sjhb	replacing = (acct_vp != NULL && uap->path != NULL);
255247321Sjhb
256247321Sjhb	/*
2573124Sdg	 * If accounting was previously enabled, kill the old space-watcher,
258152328Srwatson	 * close the file, and (if no new file was specified, leave).  Reset
259152328Srwatson	 * the suspended state regardless of whether accounting remains
260152328Srwatson	 * enabled.
2613124Sdg	 */
262152328Srwatson	acct_suspended = 0;
263157232Sjhb	if (acct_vp != NULL) {
264157232Sjhb		vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount);
265247321Sjhb		error = acct_disable(td, !replacing);
266157232Sjhb		VFS_UNLOCK_GIANT(vfslocked);
267157232Sjhb	}
268107849Salfred	if (uap->path == NULL) {
269155431Sjhb		if (acct_state & ACCT_RUNNING) {
270155431Sjhb			acct_state |= ACCT_EXITREQ;
271155431Sjhb			wakeup(&acct_state);
272155431Sjhb		}
273152328Srwatson		sx_xunlock(&acct_sx);
274157232Sjhb		return (error);
275103244Sarr	}
2761541Srgrimes
2771541Srgrimes	/*
278252886Smjg	 * Create our own plimit object without limits. It will be assigned
279252886Smjg	 * to exiting processes.
280252886Smjg	 */
281252886Smjg	acct_limit = lim_alloc();
282252886Smjg	for (i = 0; i < RLIM_NLIMITS; i++)
283252886Smjg		acct_limit->pl_rlimit[i].rlim_cur =
284252886Smjg		    acct_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
285252886Smjg
286252886Smjg	/*
2873124Sdg	 * Save the new accounting file vnode, and schedule the new
2883124Sdg	 * free space watcher.
2891541Srgrimes	 */
290152328Srwatson	acct_vp = nd.ni_vp;
291152328Srwatson	acct_cred = crhold(td->td_ucred);
292152328Srwatson	acct_flags = flags;
293155431Sjhb	if (acct_state & ACCT_RUNNING)
294155431Sjhb		acct_state &= ~ACCT_EXITREQ;
295155431Sjhb	else {
296155431Sjhb		/*
297155431Sjhb		 * Try to start up an accounting kthread.  We may start more
298155431Sjhb		 * than one, but if so the extras will commit suicide as
299155431Sjhb		 * soon as they start up.
300155431Sjhb		 */
301172836Sjulian		error = kproc_create(acct_thread, NULL, NULL, 0, 0,
302155431Sjhb		    "accounting");
303155431Sjhb		if (error) {
304157232Sjhb			vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount);
305252886Smjg			(void) acct_disable(td, 0);
306157232Sjhb			VFS_UNLOCK_GIANT(vfslocked);
307155431Sjhb			sx_xunlock(&acct_sx);
308155431Sjhb			log(LOG_NOTICE, "Unable to start accounting thread\n");
309157232Sjhb			return (error);
310155431Sjhb		}
311155431Sjhb	}
312162370Srwatson	acct_configured = 1;
313152328Srwatson	sx_xunlock(&acct_sx);
314247321Sjhb	if (!replacing)
315247321Sjhb		log(LOG_NOTICE, "Accounting enabled\n");
3163124Sdg	return (error);
3171541Srgrimes}
3181541Srgrimes
3191541Srgrimes/*
320155431Sjhb * Disable currently in-progress accounting by closing the vnode, dropping
321155431Sjhb * our reference to the credential, and clearing the vnode's flags.
322155431Sjhb */
323155431Sjhbstatic int
324247321Sjhbacct_disable(struct thread *td, int logging)
325155431Sjhb{
326155431Sjhb	int error;
327155431Sjhb
328155431Sjhb	sx_assert(&acct_sx, SX_XLOCKED);
329155431Sjhb	error = vn_close(acct_vp, acct_flags, acct_cred, td);
330155431Sjhb	crfree(acct_cred);
331252886Smjg	lim_free(acct_limit);
332162370Srwatson	acct_configured = 0;
333155431Sjhb	acct_vp = NULL;
334155431Sjhb	acct_cred = NULL;
335155431Sjhb	acct_flags = 0;
336247321Sjhb	if (logging)
337247321Sjhb		log(LOG_NOTICE, "Accounting disabled\n");
338155431Sjhb	return (error);
339155431Sjhb}
340155431Sjhb
341155431Sjhb/*
3423124Sdg * Write out process accounting information, on process exit.
3433124Sdg * Data to be written out is specified in Leffler, et al.
3443124Sdg * and are enumerated below.  (They're also noted in the system
3453124Sdg * "acct.h" header file.)
3461541Srgrimes */
3473124Sdgint
348152328Srwatsonacct_process(struct thread *td)
3493124Sdg{
350169857Sdds	struct acctv2 acct;
351126586Sbde	struct timeval ut, st, tmp;
352252886Smjg	struct plimit *oldlim;
353126586Sbde	struct proc *p;
354170174Sjeff	struct rusage ru;
355152328Srwatson	int t, ret, vfslocked;
3563124Sdg
357139895Srwatson	/*
358139895Srwatson	 * Lockless check of accounting condition before doing the hard
359139895Srwatson	 * work.
360139895Srwatson	 */
361152328Srwatson	if (acct_vp == NULL || acct_suspended)
362139895Srwatson		return (0);
363139895Srwatson
364152328Srwatson	sx_slock(&acct_sx);
365103208Sarr
366139895Srwatson	/*
367139895Srwatson	 * If accounting isn't enabled, don't bother.  Have to check again
368139895Srwatson	 * once we own the lock in case we raced with disabling of accounting
369139895Srwatson	 * by another thread.
370139895Srwatson	 */
371152328Srwatson	if (acct_vp == NULL || acct_suspended) {
372152328Srwatson		sx_sunlock(&acct_sx);
3733124Sdg		return (0);
374103208Sarr	}
3753124Sdg
376126586Sbde	p = td->td_proc;
377126586Sbde
3783124Sdg	/*
3793124Sdg	 * Get process accounting information.
3803124Sdg	 */
3813124Sdg
382181963Sed	sx_slock(&proctree_lock);
383113624Sjhb	PROC_LOCK(p);
384181963Sed
385181963Sed	/* (1) The terminal from which the process was started */
386181963Sed	if ((p->p_flag & P_CONTROLT) && p->p_pgrp->pg_session->s_ttyp)
387181963Sed		acct.ac_tty = tty_udev(p->p_pgrp->pg_session->s_ttyp);
388181963Sed	else
389181963Sed		acct.ac_tty = NODEV;
390181963Sed	sx_sunlock(&proctree_lock);
391181963Sed
392181963Sed	/* (2) The name of the command that ran */
3933124Sdg	bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm);
3943124Sdg
395181963Sed	/* (3) The amount of user and system time that was used */
396170472Sattilio	rufetchcalc(p, &ru, &ut, &st);
397169857Sdds	acct.ac_utime = encode_timeval(ut);
398169857Sdds	acct.ac_stime = encode_timeval(st);
3993124Sdg
400181963Sed	/* (4) The elapsed time the command ran (and its starting time) */
401114434Sdes	tmp = boottime;
402114434Sdes	timevaladd(&tmp, &p->p_stats->p_start);
403114434Sdes	acct.ac_btime = tmp.tv_sec;
404114434Sdes	microuptime(&tmp);
4053124Sdg	timevalsub(&tmp, &p->p_stats->p_start);
406169857Sdds	acct.ac_etime = encode_timeval(tmp);
4073124Sdg
408181963Sed	/* (5) The average amount of memory used */
4093124Sdg	tmp = ut;
4103124Sdg	timevaladd(&tmp, &st);
411169857Sdds	/* Convert tmp (i.e. u + s) into hz units to match ru_i*. */
4123124Sdg	t = tmp.tv_sec * hz + tmp.tv_usec / tick;
4133124Sdg	if (t)
414170174Sjeff		acct.ac_mem = encode_long((ru.ru_ixrss + ru.ru_idrss +
415170174Sjeff		    + ru.ru_isrss) / t);
4163124Sdg	else
4173124Sdg		acct.ac_mem = 0;
4183124Sdg
419181963Sed	/* (6) The number of disk I/O operations done */
420170174Sjeff	acct.ac_io = encode_long(ru.ru_inblock + ru.ru_oublock);
4213124Sdg
422181963Sed	/* (7) The UID and GID of the process */
42377183Srwatson	acct.ac_uid = p->p_ucred->cr_ruid;
42477183Srwatson	acct.ac_gid = p->p_ucred->cr_rgid;
4253124Sdg
4263124Sdg	/* (8) The boolean flags that tell how the process terminated, etc. */
427169857Sdds	acct.ac_flagx = p->p_acflag;
4283124Sdg
429169857Sdds	/* Setup ancillary structure fields. */
430169857Sdds	acct.ac_flagx |= ANVER;
431169857Sdds	acct.ac_zero = 0;
432169857Sdds	acct.ac_version = 2;
433169857Sdds	acct.ac_len = acct.ac_len2 = sizeof(acct);
434169857Sdds
4353124Sdg	/*
436252886Smjg	 * Eliminate rlimits (file size limit in particular).
4373124Sdg	 */
438125454Sjhb	oldlim = p->p_limit;
439252886Smjg	p->p_limit = lim_hold(acct_limit);
440125454Sjhb	PROC_UNLOCK(p);
441125454Sjhb	lim_free(oldlim);
44236676Sdg
443126586Sbde	/*
444126586Sbde	 * Write the accounting information to the file.
445126586Sbde	 */
446152328Srwatson	vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount);
447152328Srwatson	ret = vn_rdwr(UIO_WRITE, acct_vp, (caddr_t)&acct, sizeof (acct),
448152328Srwatson	    (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, acct_cred, NOCRED,
449194296Skib	    NULL, td);
450152328Srwatson	VFS_UNLOCK_GIANT(vfslocked);
451152328Srwatson	sx_sunlock(&acct_sx);
452103208Sarr	return (ret);
4533124Sdg}
4543124Sdg
455169857Sdds/* FLOAT_CONVERSION_START (Regression testing; don't remove this line.) */
456169857Sdds
457169857Sdds/* Convert timevals and longs into IEEE-754 bit patterns. */
458169857Sdds
459169857Sdds/* Mantissa mask (MSB is implied, so subtract 1). */
460169857Sdds#define MANT_MASK ((1 << (FLT_MANT_DIG - 1)) - 1)
461169857Sdds
4621541Srgrimes/*
463169857Sdds * We calculate integer values to a precision of approximately
464169857Sdds * 28 bits.
465169857Sdds * This is high-enough precision to fill the 24 float bits
466169857Sdds * and low-enough to avoid overflowing the 32 int bits.
4671541Srgrimes */
468169857Sdds#define CALC_BITS 28
4691541Srgrimes
470169857Sdds/* log_2(1000000). */
471169857Sdds#define LOG2_1M 20
4723124Sdg
473169857Sdds/*
474169857Sdds * Convert the elements of a timeval into a 32-bit word holding
475169857Sdds * the bits of a IEEE-754 float.
476169857Sdds * The float value represents the timeval's value in microsecond units.
477169857Sdds */
478169857Sddsstatic uint32_t
479169857Sddsencode_timeval(struct timeval tv)
4803124Sdg{
481169857Sdds	int log2_s;
482169857Sdds	int val, exp;	/* Unnormalized value and exponent */
483169857Sdds	int norm_exp;	/* Normalized exponent */
484169857Sdds	int shift;
4853124Sdg
486169857Sdds	/*
487169857Sdds	 * First calculate value and exponent to about CALC_BITS precision.
488169857Sdds	 * Note that the following conditionals have been ordered so that
489169857Sdds	 * the most common cases appear first.
490169857Sdds	 */
491169857Sdds	if (tv.tv_sec == 0) {
492169857Sdds		if (tv.tv_usec == 0)
493169857Sdds			return (0);
494169857Sdds		exp = 0;
495169857Sdds		val = tv.tv_usec;
496169857Sdds	} else {
497169857Sdds		/*
498169857Sdds		 * Calculate the value to a precision of approximately
499169857Sdds		 * CALC_BITS.
500169857Sdds		 */
501169857Sdds		log2_s = fls(tv.tv_sec) - 1;
502169857Sdds		if (log2_s + LOG2_1M < CALC_BITS) {
503169857Sdds			exp = 0;
504169857Sdds			val = 1000000 * tv.tv_sec + tv.tv_usec;
505169857Sdds		} else {
506169857Sdds			exp = log2_s + LOG2_1M - CALC_BITS;
507209390Sed			val = (unsigned int)(((uint64_t)1000000 * tv.tv_sec +
508169857Sdds			    tv.tv_usec) >> exp);
509169857Sdds		}
5103124Sdg	}
511169857Sdds	/* Now normalize and pack the value into an IEEE-754 float. */
512169857Sdds	norm_exp = fls(val) - 1;
513169857Sdds	shift = FLT_MANT_DIG - norm_exp - 1;
514169857Sdds#ifdef ACCT_DEBUG
515169857Sdds	printf("val=%d exp=%d shift=%d log2(val)=%d\n",
516169857Sdds	    val, exp, shift, norm_exp);
517169857Sdds	printf("exp=%x mant=%x\n", FLT_MAX_EXP - 1 + exp + norm_exp,
518169857Sdds	    ((shift > 0 ? (val << shift) : (val >> -shift)) & MANT_MASK));
519169857Sdds#endif
520169857Sdds	return (((FLT_MAX_EXP - 1 + exp + norm_exp) << (FLT_MANT_DIG - 1)) |
521169857Sdds	    ((shift > 0 ? val << shift : val >> -shift) & MANT_MASK));
522169857Sdds}
5233124Sdg
524169857Sdds/*
525169857Sdds * Convert a non-negative long value into the bit pattern of
526169857Sdds * an IEEE-754 float value.
527169857Sdds */
528169857Sddsstatic uint32_t
529169857Sddsencode_long(long val)
530169857Sdds{
531169857Sdds	int norm_exp;	/* Normalized exponent */
532169857Sdds	int shift;
5333124Sdg
534169857Sdds	if (val == 0)
535169857Sdds		return (0);
536172023Sdds	if (val < 0) {
537172023Sdds		log(LOG_NOTICE,
538172024Sdds		    "encode_long: negative value %ld in accounting record\n",
539172023Sdds		    val);
540172023Sdds		val = LONG_MAX;
541172023Sdds	}
542169857Sdds	norm_exp = fls(val) - 1;
543169857Sdds	shift = FLT_MANT_DIG - norm_exp - 1;
544169857Sdds#ifdef ACCT_DEBUG
545169857Sdds	printf("val=%d shift=%d log2(val)=%d\n",
546169857Sdds	    val, shift, norm_exp);
547169857Sdds	printf("exp=%x mant=%x\n", FLT_MAX_EXP - 1 + exp + norm_exp,
548169857Sdds	    ((shift > 0 ? (val << shift) : (val >> -shift)) & MANT_MASK));
549169857Sdds#endif
550169857Sdds	return (((FLT_MAX_EXP - 1 + norm_exp) << (FLT_MANT_DIG - 1)) |
551169857Sdds	    ((shift > 0 ? val << shift : val >> -shift) & MANT_MASK));
5523124Sdg}
5533124Sdg
554169857Sdds/* FLOAT_CONVERSION_END (Regression testing; don't remove this line.) */
555169857Sdds
5561541Srgrimes/*
55796755Strhodes * Periodically check the filesystem to see if accounting
5583124Sdg * should be turned on or off.  Beware the case where the vnode
5593124Sdg * has been vgone()'d out from underneath us, e.g. when the file
5603124Sdg * system containing the accounting file has been forcibly unmounted.
5611541Srgrimes */
5621541Srgrimes/* ARGSUSED */
56312819Sphkstatic void
564155431Sjhbacctwatch(void)
5651541Srgrimes{
5661541Srgrimes	struct statfs sb;
567152328Srwatson	int vfslocked;
5681541Srgrimes
569155431Sjhb	sx_assert(&acct_sx, SX_XLOCKED);
570155431Sjhb
571155431Sjhb	/*
572155431Sjhb	 * If accounting was disabled before our kthread was scheduled,
573155431Sjhb	 * then acct_vp might be NULL.  If so, just ask our kthread to
574155431Sjhb	 * exit and return.
575155431Sjhb	 */
576155431Sjhb	if (acct_vp == NULL) {
577155431Sjhb		acct_state |= ACCT_EXITREQ;
578155431Sjhb		return;
579155431Sjhb	}
580155431Sjhb
581155431Sjhb	/*
582155431Sjhb	 * If our vnode is no longer valid, tear it down and signal the
583155431Sjhb	 * accounting thread to die.
584155431Sjhb	 */
585152328Srwatson	vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount);
586152328Srwatson	if (acct_vp->v_type == VBAD) {
587247321Sjhb		(void) acct_disable(NULL, 1);
588152328Srwatson		VFS_UNLOCK_GIANT(vfslocked);
589155431Sjhb		acct_state |= ACCT_EXITREQ;
590152328Srwatson		return;
591152328Srwatson	}
592155431Sjhb
593103244Sarr	/*
594152328Srwatson	 * Stopping here is better than continuing, maybe it will be VBAD
595152328Srwatson	 * next time around.
596112209Sjhb	 */
597191990Sattilio	if (VFS_STATFS(acct_vp->v_mount, &sb) < 0) {
598152328Srwatson		VFS_UNLOCK_GIANT(vfslocked);
599152328Srwatson		return;
600152328Srwatson	}
601152328Srwatson	VFS_UNLOCK_GIANT(vfslocked);
602152328Srwatson	if (acct_suspended) {
603152328Srwatson		if (sb.f_bavail > (int64_t)(acctresume * sb.f_blocks /
604152328Srwatson		    100)) {
605152328Srwatson			acct_suspended = 0;
6061541Srgrimes			log(LOG_NOTICE, "Accounting resumed\n");
6071541Srgrimes		}
60822521Sdyson	} else {
609152328Srwatson		if (sb.f_bavail <= (int64_t)(acctsuspend * sb.f_blocks /
610152328Srwatson		    100)) {
611152328Srwatson			acct_suspended = 1;
6121541Srgrimes			log(LOG_NOTICE, "Accounting suspended\n");
6131541Srgrimes		}
61422521Sdyson	}
615155431Sjhb}
616155431Sjhb
617155431Sjhb/*
618155431Sjhb * The main loop for the dedicated kernel thread that periodically calls
619155431Sjhb * acctwatch().
620155431Sjhb */
621155431Sjhbstatic void
622155431Sjhbacct_thread(void *dummy)
623155431Sjhb{
624155431Sjhb	u_char pri;
625155431Sjhb
626155431Sjhb	/* This is a low-priority kernel thread. */
627155431Sjhb	pri = PRI_MAX_KERN;
628170307Sjeff	thread_lock(curthread);
629155431Sjhb	sched_prio(curthread, pri);
630170307Sjeff	thread_unlock(curthread);
631155431Sjhb
632155431Sjhb	/* If another accounting kthread is already running, just die. */
633155431Sjhb	sx_xlock(&acct_sx);
634155431Sjhb	if (acct_state & ACCT_RUNNING) {
635155431Sjhb		sx_xunlock(&acct_sx);
636172836Sjulian		kproc_exit(0);
637155431Sjhb	}
638155431Sjhb	acct_state |= ACCT_RUNNING;
639155431Sjhb
640155431Sjhb	/* Loop until we are asked to exit. */
641155431Sjhb	while (!(acct_state & ACCT_EXITREQ)) {
642155431Sjhb
643155431Sjhb		/* Perform our periodic checks. */
644155431Sjhb		acctwatch();
645155431Sjhb
646155431Sjhb		/*
647155431Sjhb		 * We check this flag again before sleeping since the
648155431Sjhb		 * acctwatch() might have shut down accounting and asked us
649155431Sjhb		 * to exit.
650155431Sjhb		 */
651155431Sjhb		if (!(acct_state & ACCT_EXITREQ)) {
652167389Sjhb			sx_sleep(&acct_state, &acct_sx, 0, "-",
653167389Sjhb			    acctchkfreq * hz);
654155431Sjhb		}
655155431Sjhb	}
656155431Sjhb
657155431Sjhb	/*
658155431Sjhb	 * Acknowledge the exit request and shutdown.  We clear both the
659155431Sjhb	 * exit request and running flags.
660155431Sjhb	 */
661155431Sjhb	acct_state = 0;
662152328Srwatson	sx_xunlock(&acct_sx);
663172836Sjulian	kproc_exit(0);
6641541Srgrimes}
665