machine.c revision 1.40
1/* $OpenBSD: machine.c,v 1.40 2004/06/11 01:32:11 deraadt Exp $	 */
2
3/*-
4 * Copyright (c) 1994 Thorsten Lockert <tholo@sigmasoft.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
20 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
21 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
27 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * AUTHOR:  Thorsten Lockert <tholo@sigmasoft.com>
30 *          Adapted from BSD4.4 by Christos Zoulas <christos@ee.cornell.edu>
31 *          Patch for process wait display by Jarl F. Greipsland <jarle@idt.unit.no>
32 *	    Patch for -DORDER by Kenneth Stailey <kstailey@disclosure.com>
33 *	    Patch for new swapctl(2) by Tobias Weingartner <weingart@openbsd.org>
34 */
35
36#include <sys/types.h>
37#include <sys/signal.h>
38#include <sys/param.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <limits.h>
43#include <err.h>
44#include <math.h>
45#include <unistd.h>
46#include <sys/errno.h>
47#include <sys/sysctl.h>
48#include <sys/dir.h>
49#include <sys/dkstat.h>
50#include <sys/file.h>
51#include <sys/time.h>
52#include <sys/resource.h>
53#include <sys/swap.h>
54#include <err.h>
55
56#include "top.h"
57#include "display.h"
58#include "machine.h"
59#include "utils.h"
60#include "loadavg.h"
61
62static int	swapmode(int *, int *);
63
64/* get_process_info passes back a handle.  This is what it looks like: */
65
66struct handle {
67	struct kinfo_proc2 **next_proc;	/* points to next valid proc pointer */
68	int		remaining;	/* number of pointers remaining */
69};
70
71/* what we consider to be process size: */
72#define PROCSIZE(pp) ((pp)->p_vm_tsize + (pp)->p_vm_dsize + (pp)->p_vm_ssize)
73
74/*
75 *  These definitions control the format of the per-process area
76 */
77static char header[] =
78	"  PID X        PRI NICE  SIZE   RES STATE    WAIT     TIME    CPU COMMAND";
79
80/* 0123456   -- field to fill in starts at header+6 */
81#define UNAME_START 6
82
83#define Proc_format \
84	"%5d %-8.8s %3d %4d %5s %5s %-8s %-6.6s %6s %5.2f%% %.11s"
85
86/* process state names for the "STATE" column of the display */
87/*
88 * the extra nulls in the string "run" are for adding a slash and the
89 * processor number when needed
90 */
91
92char	*state_abbrev[] = {
93	"", "start", "run", "sleep", "stop", "zomb", "dead", "onproc"
94};
95
96static int      stathz;
97
98/* these are for calculating cpu state percentages */
99static long     cp_time[CPUSTATES];
100static long     cp_old[CPUSTATES];
101static long     cp_diff[CPUSTATES];
102
103/* these are for detailing the process states */
104int process_states[7];
105char *procstatenames[] = {
106	"", " starting, ", " running, ", " idle, ",
107	" stopped, ", " zombie, ",
108	NULL
109};
110
111/* these are for detailing the cpu states */
112int cpu_states[CPUSTATES];
113char *cpustatenames[] = {
114	"user", "nice", "system", "interrupt", "idle", NULL
115};
116
117/* these are for detailing the memory statistics */
118int memory_stats[8];
119char *memorynames[] = {
120	"Real: ", "K/", "K act/tot  ", "Free: ", "K  ",
121	"Swap: ", "K/", "K used/tot",
122	NULL
123};
124
125/* these are names given to allowed sorting orders -- first is default */
126char	*ordernames[] = {
127	"cpu", "size", "res", "time", "pri", NULL
128};
129
130/* these are for keeping track of the proc array */
131static int      nproc;
132static int      onproc = -1;
133static int      pref_len;
134static struct kinfo_proc2 *pbase;
135static struct kinfo_proc2 **pref;
136
137/* these are for getting the memory statistics */
138static int      pageshift;	/* log base 2 of the pagesize */
139
140/* define pagetok in terms of pageshift */
141#define pagetok(size) ((size) << pageshift)
142
143unsigned int	maxslp;
144
145static int
146getstathz(void)
147{
148	struct clockinfo cinf;
149	size_t size = sizeof(cinf);
150	int mib[2];
151
152	mib[0] = CTL_KERN;
153	mib[1] = KERN_CLOCKRATE;
154	if (sysctl(mib, 2, &cinf, &size, NULL, 0) == -1)
155		return (-1);
156	return (cinf.stathz);
157}
158
159int
160machine_init(struct statics *statics)
161{
162	int pagesize;
163
164	stathz = getstathz();
165	if (stathz == -1)
166		return (-1);
167
168	pbase = NULL;
169	pref = NULL;
170	onproc = -1;
171	nproc = 0;
172
173	/*
174	 * get the page size with "getpagesize" and calculate pageshift from
175	 * it
176	 */
177	pagesize = getpagesize();
178	pageshift = 0;
179	while (pagesize > 1) {
180		pageshift++;
181		pagesize >>= 1;
182	}
183
184	/* we only need the amount of log(2)1024 for our conversion */
185	pageshift -= LOG1024;
186
187	/* fill in the statics information */
188	statics->procstate_names = procstatenames;
189	statics->cpustate_names = cpustatenames;
190	statics->memory_names = memorynames;
191	statics->order_names = ordernames;
192	return (0);
193}
194
195char *
196format_header(char *uname_field)
197{
198	char *ptr;
199
200	ptr = header + UNAME_START;
201	while (*uname_field != '\0')
202		*ptr++ = *uname_field++;
203	return (header);
204}
205
206void
207get_system_info(struct system_info *si)
208{
209	static int sysload_mib[] = {CTL_VM, VM_LOADAVG};
210	static int vmtotal_mib[] = {CTL_VM, VM_METER};
211	static int cp_time_mib[] = {CTL_KERN, KERN_CPTIME};
212	struct loadavg sysload;
213	struct vmtotal vmtotal;
214	double *infoloadp;
215	size_t size;
216	int i;
217
218	size = sizeof(cp_time);
219	if (sysctl(cp_time_mib, 2, &cp_time, &size, NULL, 0) < 0)
220		warn("sysctl kern.cp_time failed");
221
222	size = sizeof(sysload);
223	if (sysctl(sysload_mib, 2, &sysload, &size, NULL, 0) < 0)
224		warn("sysctl failed");
225	infoloadp = si->load_avg;
226	for (i = 0; i < 3; i++)
227		*infoloadp++ = ((double) sysload.ldavg[i]) / sysload.fscale;
228
229	/* convert cp_time counts to percentages */
230	(void) percentages(CPUSTATES, cpu_states, cp_time, cp_old, cp_diff);
231
232	/* get total -- systemwide main memory usage structure */
233	size = sizeof(vmtotal);
234	if (sysctl(vmtotal_mib, 2, &vmtotal, &size, NULL, 0) < 0) {
235		warn("sysctl failed");
236		bzero(&vmtotal, sizeof(vmtotal));
237	}
238	/* convert memory stats to Kbytes */
239	memory_stats[0] = -1;
240	memory_stats[1] = pagetok(vmtotal.t_arm);
241	memory_stats[2] = pagetok(vmtotal.t_rm);
242	memory_stats[3] = -1;
243	memory_stats[4] = pagetok(vmtotal.t_free);
244	memory_stats[5] = -1;
245
246	if (!swapmode(&memory_stats[6], &memory_stats[7])) {
247		memory_stats[6] = 0;
248		memory_stats[7] = 0;
249	}
250
251	/* set arrays and strings */
252	si->cpustates = cpu_states;
253	si->memory = memory_stats;
254	si->last_pid = -1;
255}
256
257static struct handle handle;
258
259static struct kinfo_proc2 *
260getprocs(int op, int arg, int *cnt)
261{
262	size_t size;
263	int mib[6] = {CTL_KERN, KERN_PROC2, 0, 0, sizeof(struct kinfo_proc2), 0};
264	static int maxslp_mib[] = {CTL_VM, VM_MAXSLP};
265	static struct kinfo_proc2 *procbase;
266	int st;
267
268	mib[2] = op;
269	mib[3] = arg;
270
271	size = sizeof(maxslp);
272	if (sysctl(maxslp_mib, 2, &maxslp, &size, NULL, 0) < 0) {
273		warn("sysctl vm.maxslp failed");
274		return (0);
275	}
276    retry:
277	free(procbase);
278	st = sysctl(mib, 6, NULL, &size, NULL, 0);
279	if (st == -1) {
280		/* _kvm_syserr(kd, kd->program, "kvm_getproc2"); */
281		return (0);
282	}
283	size = 5 * size / 4;			/* extra slop */
284	if ((procbase = malloc(size)) == NULL)
285		return (0);
286	mib[5] = (int)(size / sizeof(struct kinfo_proc2));
287	st = sysctl(mib, 6, procbase, &size, NULL, 0);
288	if (st == -1) {
289		if (errno == ENOMEM)
290			goto retry;
291		/* _kvm_syserr(kd, kd->program, "kvm_getproc2"); */
292		return (0);
293	}
294	*cnt = (int)(size / sizeof(struct kinfo_proc2));
295	return (procbase);
296}
297
298caddr_t
299get_process_info(struct system_info *si, struct process_select *sel,
300    int (*compare) (const void *, const void *))
301{
302	int show_idle, show_system, show_uid;
303	int total_procs, active_procs, i;
304	struct kinfo_proc2 **prefp, *pp;
305
306	if ((pbase = getprocs(KERN_PROC_KTHREAD, 0, &nproc)) == NULL) {
307		/* warnx("%s", kvm_geterr(kd)); */
308		quit(23);
309	}
310	if (nproc > onproc)
311		pref = (struct kinfo_proc2 **)realloc(pref,
312		    sizeof(struct kinfo_proc2 *) * (onproc = nproc));
313	if (pref == NULL) {
314		warnx("Out of memory.");
315		quit(23);
316	}
317	/* get a pointer to the states summary array */
318	si->procstates = process_states;
319
320	/* set up flags which define what we are going to select */
321	show_idle = sel->idle;
322	show_system = sel->system;
323	show_uid = sel->uid != (uid_t)-1;
324
325	/* count up process states and get pointers to interesting procs */
326	total_procs = 0;
327	active_procs = 0;
328	memset((char *) process_states, 0, sizeof(process_states));
329	prefp = pref;
330	for (pp = pbase, i = 0; i < nproc; pp++, i++) {
331		/*
332		 *  Place pointers to each valid proc structure in pref[].
333		 *  Process slots that are actually in use have a non-zero
334		 *  status field.  Processes with SSYS set are system
335		 *  processes---these get ignored unless show_sysprocs is set.
336		 */
337		if (pp->p_stat != 0 &&
338		    (show_system || (pp->p_flag & P_SYSTEM) == 0)) {
339			total_procs++;
340			process_states[(unsigned char) pp->p_stat]++;
341			if (pp->p_stat != SZOMB &&
342			    (show_idle || pp->p_pctcpu != 0 ||
343			    pp->p_stat == SRUN) &&
344			    (!show_uid || pp->p_ruid == sel->uid)) {
345				*prefp++ = pp;
346				active_procs++;
347			}
348		}
349	}
350
351	/* if requested, sort the "interesting" processes */
352	if (compare != NULL)
353		qsort((char *) pref, active_procs,
354		    sizeof(struct kinfo_proc2 *), compare);
355	/* remember active and total counts */
356	si->p_total = total_procs;
357	si->p_active = pref_len = active_procs;
358
359	/* pass back a handle */
360	handle.next_proc = pref;
361	handle.remaining = active_procs;
362	return ((caddr_t) & handle);
363}
364
365char fmt[MAX_COLS];	/* static area where result is built */
366
367char *
368state_abbr(struct kinfo_proc2 *pp)
369{
370	static char buf[10];
371
372	snprintf(buf, sizeof buf, "%s/%d",
373	    state_abbrev[(unsigned char)pp->p_stat], pp->p_cpuid);
374	return buf;
375}
376
377char *
378format_next_process(caddr_t handle, char *(*get_userid)(uid_t))
379{
380	char *p_wait, waddr[sizeof(void *) * 2 + 3];	/* Hexify void pointer */
381	struct kinfo_proc2 *pp;
382	struct handle *hp;
383	int cputime;
384	double pct;
385
386	/* find and remember the next proc structure */
387	hp = (struct handle *) handle;
388	pp = *(hp->next_proc++);
389	hp->remaining--;
390
391	if ((pp->p_flag & P_INMEM) == 0) {
392		/*
393		 * Print swapped processes as <pname>
394		 */
395		char buf[sizeof(pp->p_comm)];
396
397		(void) strlcpy(buf, pp->p_comm, sizeof(buf));
398		(void) snprintf(pp->p_comm, sizeof(pp->p_comm), "<%s>", buf);
399	}
400	cputime = (pp->p_uticks + pp->p_sticks + pp->p_iticks) / stathz;
401
402	/* calculate the base for cpu percentages */
403	pct = pctdouble(pp->p_pctcpu);
404
405	if (pp->p_wchan) {
406		if (pp->p_wmesg)
407			p_wait = pp->p_wmesg;
408		else {
409			snprintf(waddr, sizeof(waddr), "%llx",
410			    pp->p_wchan & ~KERNBASE);
411			p_wait = waddr;
412		}
413	} else
414		p_wait = "-";
415
416	/* format this entry */
417	snprintf(fmt, sizeof fmt, Proc_format,
418	    pp->p_pid, (*get_userid)(pp->p_ruid),
419	    pp->p_priority - PZERO, pp->p_nice - NZERO,
420	    format_k(pagetok(PROCSIZE(pp))),
421	    format_k(pagetok(pp->p_vm_rssize)),
422	    (pp->p_stat == SSLEEP && pp->p_slptime > maxslp) ?
423	    "idle" : state_abbr(pp),
424	    p_wait, format_time(cputime), 100.0 * pct,
425	    printable(pp->p_comm));
426
427	/* return the result */
428	return (fmt);
429}
430
431/* comparison routine for qsort */
432static unsigned char sorted_state[] =
433{
434	0,			/* not used		 */
435	4,			/* start		 */
436	5,			/* run			 */
437	2,			/* sleep		 */
438	3,			/* stop			 */
439	1			/* zombie		 */
440};
441
442/*
443 *  proc_compares - comparison functions for "qsort"
444 */
445
446/*
447 * First, the possible comparison keys.  These are defined in such a way
448 * that they can be merely listed in the source code to define the actual
449 * desired ordering.
450 */
451
452#define ORDERKEY_PCTCPU \
453	if (lresult = (pctcpu)p2->p_pctcpu - (pctcpu)p1->p_pctcpu, \
454	    (result = lresult > 0 ? 1 : lresult < 0 ? -1 : 0) == 0)
455#define ORDERKEY_CPUTIME \
456	if ((result = p2->p_rtime_sec - p1->p_rtime_sec) == 0) \
457		if ((result = p2->p_rtime_usec - p1->p_rtime_usec) == 0)
458#define ORDERKEY_STATE \
459	if ((result = sorted_state[(unsigned char)p2->p_stat] - \
460	    sorted_state[(unsigned char)p1->p_stat])  == 0)
461#define ORDERKEY_PRIO \
462	if ((result = p2->p_priority - p1->p_priority) == 0)
463#define ORDERKEY_RSSIZE \
464	if ((result = p2->p_vm_rssize - p1->p_vm_rssize) == 0)
465#define ORDERKEY_MEM \
466	if ((result = PROCSIZE(p2) - PROCSIZE(p1)) == 0)
467
468/* compare_cpu - the comparison function for sorting by cpu percentage */
469static int
470compare_cpu(const void *v1, const void *v2)
471{
472	struct proc **pp1 = (struct proc **) v1;
473	struct proc **pp2 = (struct proc **) v2;
474	struct kinfo_proc2 *p1, *p2;
475	pctcpu lresult;
476	int result;
477
478	/* remove one level of indirection */
479	p1 = *(struct kinfo_proc2 **) pp1;
480	p2 = *(struct kinfo_proc2 **) pp2;
481
482	ORDERKEY_PCTCPU
483	ORDERKEY_CPUTIME
484	ORDERKEY_STATE
485	ORDERKEY_PRIO
486	ORDERKEY_RSSIZE
487	ORDERKEY_MEM
488		;
489	return (result);
490}
491
492/* compare_size - the comparison function for sorting by total memory usage */
493static int
494compare_size(const void *v1, const void *v2)
495{
496	struct proc **pp1 = (struct proc **) v1;
497	struct proc **pp2 = (struct proc **) v2;
498	struct kinfo_proc2 *p1, *p2;
499	pctcpu lresult;
500	int result;
501
502	/* remove one level of indirection */
503	p1 = *(struct kinfo_proc2 **) pp1;
504	p2 = *(struct kinfo_proc2 **) pp2;
505
506	ORDERKEY_MEM
507	ORDERKEY_RSSIZE
508	ORDERKEY_PCTCPU
509	ORDERKEY_CPUTIME
510	ORDERKEY_STATE
511	ORDERKEY_PRIO
512		;
513	return (result);
514}
515
516/* compare_res - the comparison function for sorting by resident set size */
517static int
518compare_res(const void *v1, const void *v2)
519{
520	struct proc **pp1 = (struct proc **) v1;
521	struct proc **pp2 = (struct proc **) v2;
522	struct kinfo_proc2 *p1, *p2;
523	pctcpu lresult;
524	int result;
525
526	/* remove one level of indirection */
527	p1 = *(struct kinfo_proc2 **) pp1;
528	p2 = *(struct kinfo_proc2 **) pp2;
529
530	ORDERKEY_RSSIZE
531	ORDERKEY_MEM
532	ORDERKEY_PCTCPU
533	ORDERKEY_CPUTIME
534	ORDERKEY_STATE
535	ORDERKEY_PRIO
536		;
537	return (result);
538}
539
540/* compare_time - the comparison function for sorting by CPU time */
541static int
542compare_time(const void *v1, const void *v2)
543{
544	struct proc **pp1 = (struct proc **) v1;
545	struct proc **pp2 = (struct proc **) v2;
546	struct kinfo_proc2 *p1, *p2;
547	pctcpu lresult;
548	int result;
549
550	/* remove one level of indirection */
551	p1 = *(struct kinfo_proc2 **) pp1;
552	p2 = *(struct kinfo_proc2 **) pp2;
553
554	ORDERKEY_CPUTIME
555	ORDERKEY_PCTCPU
556	ORDERKEY_STATE
557	ORDERKEY_PRIO
558	ORDERKEY_MEM
559	ORDERKEY_RSSIZE
560		;
561	return (result);
562}
563
564/* compare_prio - the comparison function for sorting by CPU time */
565static int
566compare_prio(const void *v1, const void *v2)
567{
568	struct proc   **pp1 = (struct proc **) v1;
569	struct proc   **pp2 = (struct proc **) v2;
570	struct kinfo_proc2 *p1, *p2;
571	pctcpu lresult;
572	int result;
573
574	/* remove one level of indirection */
575	p1 = *(struct kinfo_proc2 **) pp1;
576	p2 = *(struct kinfo_proc2 **) pp2;
577
578	ORDERKEY_PRIO
579	ORDERKEY_PCTCPU
580	ORDERKEY_CPUTIME
581	ORDERKEY_STATE
582	ORDERKEY_RSSIZE
583	ORDERKEY_MEM
584		;
585	return (result);
586}
587
588int (*proc_compares[])(const void *, const void *) = {
589	compare_cpu,
590	compare_size,
591	compare_res,
592	compare_time,
593	compare_prio,
594	NULL
595};
596
597/*
598 * proc_owner(pid) - returns the uid that owns process "pid", or -1 if
599 *		the process does not exist.
600 *		It is EXTREMLY IMPORTANT that this function work correctly.
601 *		If top runs setuid root (as in SVR4), then this function
602 *		is the only thing that stands in the way of a serious
603 *		security problem.  It validates requests for the "kill"
604 *		and "renice" commands.
605 */
606uid_t
607proc_owner(pid_t pid)
608{
609	struct kinfo_proc2 **prefp, *pp;
610	int cnt;
611
612	prefp = pref;
613	cnt = pref_len;
614	while (--cnt >= 0) {
615		pp = *prefp++;
616		if (pp->p_pid == pid)
617			return ((uid_t)pp->p_ruid);
618	}
619	return (uid_t)(-1);
620}
621
622/*
623 * swapmode is rewritten by Tobias Weingartner <weingart@openbsd.org>
624 * to be based on the new swapctl(2) system call.
625 */
626static int
627swapmode(int *used, int *total)
628{
629	struct swapent *swdev;
630	int nswap, rnswap, i;
631
632	nswap = swapctl(SWAP_NSWAP, 0, 0);
633	if (nswap == 0)
634		return 0;
635
636	swdev = malloc(nswap * sizeof(*swdev));
637	if (swdev == NULL)
638		return 0;
639
640	rnswap = swapctl(SWAP_STATS, swdev, nswap);
641	if (rnswap == -1)
642		return 0;
643
644	/* if rnswap != nswap, then what? */
645
646	/* Total things up */
647	*total = *used = 0;
648	for (i = 0; i < nswap; i++) {
649		if (swdev[i].se_flags & SWF_ENABLE) {
650			*used += (swdev[i].se_inuse / (1024 / DEV_BSIZE));
651			*total += (swdev[i].se_nblks / (1024 / DEV_BSIZE));
652		}
653	}
654	free(swdev);
655	return 1;
656}
657