machine.c revision 1.21
1/*	$OpenBSD: machine.c,v 1.21 2001/02/17 22:55:07 deraadt Exp $	*/
2
3/*
4 * top - a top users display for Unix
5 *
6 * SYNOPSIS:  For an OpenBSD system
7 *
8 * DESCRIPTION:
9 * This is the machine-dependent module for OpenBSD
10 * Tested on:
11 *	i386
12 *
13 * LIBS: -lkvm
14 *
15 * TERMCAP: -ltermlib
16 *
17 * CFLAGS: -DHAVE_GETOPT -DORDER
18 *
19 * AUTHOR:  Thorsten Lockert <tholo@sigmasoft.com>
20 *          Adapted from BSD4.4 by Christos Zoulas <christos@ee.cornell.edu>
21 *          Patch for process wait display by Jarl F. Greipsland <jarle@idt.unit.no>
22 *	    Patch for -DORDER by Kenneth Stailey <kstailey@disclosure.com>
23 *	    Patch for new swapctl(2) by Tobias Weingartner <weingart@openbsd.org>
24 */
25
26#include <sys/types.h>
27#include <sys/signal.h>
28#include <sys/param.h>
29
30#define DOSWAP
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <string.h>
35#include <limits.h>
36#include <err.h>
37#include <math.h>
38#include <kvm.h>
39#include <unistd.h>
40#include <sys/errno.h>
41#include <sys/sysctl.h>
42#include <sys/dir.h>
43#include <sys/dkstat.h>
44#include <sys/file.h>
45#include <sys/time.h>
46#include <sys/resource.h>
47
48#ifdef DOSWAP
49#include <sys/swap.h>
50#include <err.h>
51#endif
52
53static int getkval __P((unsigned long, int *, int, char *));
54static int swapmode __P((int *, int *));
55
56#include "top.h"
57#include "display.h"
58#include "machine.h"
59#include "utils.h"
60
61/* get_process_info passes back a handle.  This is what it looks like: */
62
63struct handle {
64	struct kinfo_proc **next_proc;	/* points to next valid proc pointer */
65	int     remaining;	/* number of pointers remaining */
66};
67
68/* declarations for load_avg */
69#include "loadavg.h"
70
71#define PP(pp, field) ((pp)->kp_proc . field)
72#define EP(pp, field) ((pp)->kp_eproc . field)
73#define VP(pp, field) ((pp)->kp_eproc.e_vm . field)
74
75/* what we consider to be process size: */
76#define PROCSIZE(pp) (VP((pp), vm_tsize) + VP((pp), vm_dsize) + VP((pp), vm_ssize))
77
78/*
79 *  These definitions control the format of the per-process area
80 */
81static char header[] =
82"  PID X        PRI NICE  SIZE   RES STATE WAIT     TIME    CPU COMMAND";
83/* 0123456   -- field to fill in starts at header+6 */
84#define UNAME_START 6
85
86#define Proc_format \
87	"%5d %-8.8s %3d %4d %5s %5s %-5s %-6.6s %6s %5.2f%% %.14s"
88
89
90/* process state names for the "STATE" column of the display */
91/* the extra nulls in the string "run" are for adding a slash and
92   the processor number when needed */
93
94char *state_abbrev[] = {
95	"", "start", "run\0\0\0", "sleep", "stop", "zomb",
96};
97
98
99static kvm_t *kd;
100
101/* these are retrieved from the kernel in _init */
102
103static int stathz;
104
105/* these are offsets obtained via nlist and used in the get_ functions */
106
107static unsigned long cp_time_offset;
108
109/* these are for calculating cpu state percentages */
110static long cp_time[CPUSTATES];
111static long cp_old[CPUSTATES];
112static long cp_diff[CPUSTATES];
113
114/* these are for detailing the process states */
115int     process_states[7];
116char   *procstatenames[] = {
117	"", " starting, ", " running, ", " idle, ", " stopped, ", " zombie, ",
118	NULL
119};
120
121/* these are for detailing the cpu states */
122int     cpu_states[CPUSTATES];
123char   *cpustatenames[] = {
124	"user", "nice", "system", "interrupt", "idle", NULL
125};
126
127/* these are for detailing the memory statistics */
128int     memory_stats[8];
129char   *memorynames[] = {
130	"Real: ", "K/", "K act/tot  ", "Free: ", "K  ",
131#ifdef DOSWAP
132	"Swap: ", "K/", "K used/tot",
133#endif
134	NULL
135};
136
137#ifdef ORDER
138/* these are names given to allowed sorting orders -- first is default */
139char   *ordernames[] = {"cpu", "size", "res", "time", "pri", NULL};
140#endif
141
142/* these are for keeping track of the proc array */
143static int nproc;
144static int onproc = -1;
145static int pref_len;
146static struct kinfo_proc *pbase;
147static struct kinfo_proc **pref;
148
149/* these are for getting the memory statistics */
150static int pageshift;		/* log base 2 of the pagesize */
151
152/* define pagetok in terms of pageshift */
153#define pagetok(size) ((size) << pageshift)
154
155int
156getstathz()
157{
158	struct clockinfo cinf;
159	size_t  size = sizeof(cinf);
160	int     mib[2];
161
162	mib[0] = CTL_KERN;
163	mib[1] = KERN_CLOCKRATE;
164	if (sysctl(mib, 2, &cinf, &size, NULL, 0) == -1)
165		return (-1);
166	return (cinf.stathz);
167}
168
169int
170machine_init(statics)
171	struct statics *statics;
172{
173	char    errbuf[_POSIX2_LINE_MAX];
174	int pagesize, i = 0;
175
176	if ((kd = kvm_openfiles(NULL, NULL, NULL, O_RDONLY, errbuf)) == NULL) {
177		warnx("%s", errbuf);
178		return (-1);
179	}
180	setegid(getgid());
181	setgid(getgid());
182
183	stathz = getstathz();
184	if (stathz == -1)
185		return (-1);
186
187	pbase = NULL;
188	pref = NULL;
189	onproc = -1;
190	nproc = 0;
191
192	/* get the page size with "getpagesize" and calculate pageshift from
193	 * it */
194	pagesize = getpagesize();
195	pageshift = 0;
196	while (pagesize > 1) {
197		pageshift++;
198		pagesize >>= 1;
199	}
200
201	/* we only need the amount of log(2)1024 for our conversion */
202	pageshift -= LOG1024;
203
204	/* fill in the statics information */
205	statics->procstate_names = procstatenames;
206	statics->cpustate_names = cpustatenames;
207	statics->memory_names = memorynames;
208#ifdef ORDER
209	statics->order_names = ordernames;
210#endif
211	return (0);
212}
213
214char *
215format_header(uname_field)
216	char   *uname_field;
217{
218	char *ptr;
219
220	ptr = header + UNAME_START;
221	while (*uname_field != '\0') {
222		*ptr++ = *uname_field++;
223	}
224	return (header);
225}
226
227void
228get_system_info(si)
229	struct system_info *si;
230{
231	static int sysload_mib[] = {CTL_VM, VM_LOADAVG};
232	static int vmtotal_mib[] = {CTL_VM, VM_METER};
233	static int cp_time_mib[] = { CTL_KERN, KERN_CPTIME };
234	struct loadavg sysload;
235	struct vmtotal vmtotal;
236	double *infoloadp;
237	int total, i;
238	size_t  size;
239
240#if 1
241	size = sizeof(cp_time);
242	if (sysctl(cp_time_mib, 2, &cp_time, &size, NULL, 0) < 0) {
243		warn("sysctl kern.cp_time failed");
244		total = 0;
245	}
246#else
247	/* get the cp_time array */
248	(void) getkval(cp_time_offset, (int *) cp_time, sizeof(cp_time),
249	    "_cp_time");
250#endif
251
252	size = sizeof(sysload);
253	if (sysctl(sysload_mib, 2, &sysload, &size, NULL, 0) < 0) {
254		warn("sysctl failed");
255		bzero(&total, sizeof(total));
256	}
257	infoloadp = si->load_avg;
258	for (i = 0; i < 3; i++)
259		*infoloadp++ = ((double) sysload.ldavg[i]) / sysload.fscale;
260
261	/* convert cp_time counts to percentages */
262	total = percentages(CPUSTATES, cpu_states, cp_time, cp_old, cp_diff);
263
264	/* get total -- systemwide main memory usage structure */
265	size = sizeof(vmtotal);
266	if (sysctl(vmtotal_mib, 2, &vmtotal, &size, NULL, 0) < 0) {
267		warn("sysctl failed");
268		bzero(&vmtotal, sizeof(vmtotal));
269	}
270	/* convert memory stats to Kbytes */
271	memory_stats[0] = -1;
272	memory_stats[1] = pagetok(vmtotal.t_arm);
273	memory_stats[2] = pagetok(vmtotal.t_rm);
274	memory_stats[3] = -1;
275	memory_stats[4] = pagetok(vmtotal.t_free);
276	memory_stats[5] = -1;
277#ifdef DOSWAP
278	if (!swapmode(&memory_stats[6], &memory_stats[7])) {
279		memory_stats[6] = 0;
280		memory_stats[7] = 0;
281	}
282#endif
283
284	/* set arrays and strings */
285	si->cpustates = cpu_states;
286	si->memory = memory_stats;
287	si->last_pid = -1;
288}
289
290static struct handle handle;
291
292caddr_t
293get_process_info(si, sel, compare)
294	struct system_info *si;
295	struct process_select *sel;
296	int (*compare) __P((const void *, const void *));
297
298{
299	int show_idle, show_system, show_uid, show_command;
300	int total_procs, active_procs, i;
301	struct kinfo_proc **prefp, *pp;
302
303	if ((pbase = kvm_getprocs(kd, KERN_PROC_KTHREAD, 0, &nproc)) == NULL) {
304		warnx("%s", kvm_geterr(kd));
305		quit(23);
306	}
307	if (nproc > onproc)
308		pref = (struct kinfo_proc **) realloc(pref, sizeof(struct kinfo_proc *)
309		    * (onproc = nproc));
310	if (pref == NULL) {
311		warnx("Out of memory.");
312		quit(23);
313	}
314	/* get a pointer to the states summary array */
315	si->procstates = process_states;
316
317	/* set up flags which define what we are going to select */
318	show_idle = sel->idle;
319	show_system = sel->system;
320	show_uid = sel->uid != -1;
321	show_command = sel->command != NULL;
322
323	/* count up process states and get pointers to interesting procs */
324	total_procs = 0;
325	active_procs = 0;
326	memset((char *) process_states, 0, sizeof(process_states));
327	prefp = pref;
328	for (pp = pbase, i = 0; i < nproc; pp++, i++) {
329		/*
330		 *  Place pointers to each valid proc structure in pref[].
331		 *  Process slots that are actually in use have a non-zero
332		 *  status field.  Processes with SSYS set are system
333		 *  processes---these get ignored unless show_sysprocs is set.
334		 */
335		if (PP(pp, p_stat) != 0 &&
336		    (show_system || ((PP(pp, p_flag) & P_SYSTEM) == 0))) {
337			total_procs++;
338			process_states[(unsigned char) PP(pp, p_stat)]++;
339			if ((PP(pp, p_stat) != SZOMB) &&
340			    (show_idle || (PP(pp, p_pctcpu) != 0) ||
341				(PP(pp, p_stat) == SRUN)) &&
342			    (!show_uid || EP(pp, e_pcred.p_ruid) == (uid_t) sel->uid)) {
343				*prefp++ = pp;
344				active_procs++;
345			}
346		}
347	}
348
349	/* if requested, sort the "interesting" processes */
350	if (compare != NULL) {
351		qsort((char *) pref, active_procs, sizeof(struct kinfo_proc *), compare);
352	}
353	/* remember active and total counts */
354	si->p_total = total_procs;
355	si->p_active = pref_len = active_procs;
356
357	/* pass back a handle */
358	handle.next_proc = pref;
359	handle.remaining = active_procs;
360	return ((caddr_t) & handle);
361}
362
363char    fmt[MAX_COLS];		/* static area where result is built */
364
365char *
366format_next_process(handle, get_userid)
367	caddr_t handle;
368	char *(*get_userid)();
369
370{
371	char waddr[sizeof(void *) * 2 + 3];	/* Hexify void pointer */
372	struct kinfo_proc *pp;
373	struct handle *hp;
374	char *p_wait;
375	int cputime;
376	double pct;
377
378	/* find and remember the next proc structure */
379	hp = (struct handle *) handle;
380	pp = *(hp->next_proc++);
381	hp->remaining--;
382
383	/* get the process's user struct and set cputime */
384	if ((PP(pp, p_flag) & P_INMEM) == 0) {
385		/*
386		 * Print swapped processes as <pname>
387		 */
388		char   *comm = PP(pp, p_comm);
389#define COMSIZ sizeof(PP(pp, p_comm))
390		char    buf[COMSIZ];
391		(void) strncpy(buf, comm, COMSIZ);
392		comm[0] = '<';
393		(void) strncpy(&comm[1], buf, COMSIZ - 2);
394		comm[COMSIZ - 2] = '\0';
395		(void) strncat(comm, ">", COMSIZ - 1);
396		comm[COMSIZ - 1] = '\0';
397	}
398	cputime = (PP(pp, p_uticks) + PP(pp, p_sticks) + PP(pp, p_iticks)) / stathz;
399
400	/* calculate the base for cpu percentages */
401	pct = pctdouble(PP(pp, p_pctcpu));
402
403	if (PP(pp, p_wchan))
404		if (PP(pp, p_wmesg))
405			p_wait = EP(pp, e_wmesg);
406		else {
407			snprintf(waddr, sizeof(waddr), "%lx",
408			    (unsigned long) (PP(pp, p_wchan)) & ~KERNBASE);
409			p_wait = waddr;
410		}
411	else
412		p_wait = "-";
413
414	/* format this entry */
415	snprintf(fmt, MAX_COLS,
416	    Proc_format,
417	    PP(pp, p_pid),
418	    (*get_userid) (EP(pp, e_pcred.p_ruid)),
419	    PP(pp, p_priority) - PZERO,
420	    PP(pp, p_nice) - NZERO,
421	    format_k(pagetok(PROCSIZE(pp))),
422	    format_k(pagetok(VP(pp, vm_rssize))),
423	    (PP(pp, p_stat) == SSLEEP && PP(pp, p_slptime) > MAXSLP)
424	    ? "idle" : state_abbrev[(unsigned char) PP(pp, p_stat)],
425	    p_wait,
426	    format_time(cputime),
427	    100.0 * pct,
428	    printable(PP(pp, p_comm)));
429
430	/* return the result */
431	return (fmt);
432}
433
434/* comparison routine for qsort */
435static unsigned char sorted_state[] =
436{
437	0,			/* not used		 */
438	4,			/* start		 */
439	5,			/* run			 */
440	2,			/* sleep		 */
441	3,			/* stop			 */
442	1			/* zombie		 */
443};
444#ifdef ORDER
445
446/*
447 *  proc_compares - comparison functions for "qsort"
448 */
449
450/*
451 * First, the possible comparison keys.  These are defined in such a way
452 * that they can be merely listed in the source code to define the actual
453 * desired ordering.
454 */
455
456
457#define ORDERKEY_PCTCPU \
458	if (lresult = (pctcpu)PP(p2, p_pctcpu) - (pctcpu)PP(p1, p_pctcpu), \
459           (result = lresult > 0 ? 1 : lresult < 0 ? -1 : 0) == 0)
460#define ORDERKEY_CPUTIME \
461	if ((result = PP(p2, p_rtime.tv_sec) - PP(p1, p_rtime.tv_sec)) == 0) \
462		if ((result = PP(p2, p_rtime.tv_usec) - \
463		     PP(p1, p_rtime.tv_usec)) == 0)
464#define ORDERKEY_STATE \
465	if ((result = sorted_state[(unsigned char) PP(p2, p_stat)] - \
466                      sorted_state[(unsigned char) PP(p1, p_stat)])  == 0)
467#define ORDERKEY_PRIO \
468	if ((result = PP(p2, p_priority) - PP(p1, p_priority)) == 0)
469#define ORDERKEY_RSSIZE \
470	if ((result = VP(p2, vm_rssize) - VP(p1, vm_rssize)) == 0)
471#define ORDERKEY_MEM \
472	if ((result = PROCSIZE(p2) - PROCSIZE(p1)) == 0)
473
474
475/* compare_cpu - the comparison function for sorting by cpu percentage */
476int
477compare_cpu(v1, v2)
478	const void *v1, *v2;
479{
480	struct proc **pp1 = (struct proc **) v1;
481	struct proc **pp2 = (struct proc **) v2;
482	struct kinfo_proc *p1;
483	struct kinfo_proc *p2;
484	int result;
485	pctcpu lresult;
486
487	/* remove one level of indirection */
488	p1 = *(struct kinfo_proc **) pp1;
489	p2 = *(struct kinfo_proc **) pp2;
490
491	ORDERKEY_PCTCPU
492	    ORDERKEY_CPUTIME
493	    ORDERKEY_STATE
494	    ORDERKEY_PRIO
495	    ORDERKEY_RSSIZE
496	    ORDERKEY_MEM
497	    ;
498	return (result);
499}
500
501/* compare_size - the comparison function for sorting by total memory usage */
502int
503compare_size(v1, v2)
504	const void *v1, *v2;
505{
506	struct proc **pp1 = (struct proc **) v1;
507	struct proc **pp2 = (struct proc **) v2;
508	struct kinfo_proc *p1;
509	struct kinfo_proc *p2;
510	int result;
511	pctcpu lresult;
512
513	/* remove one level of indirection */
514	p1 = *(struct kinfo_proc **) pp1;
515	p2 = *(struct kinfo_proc **) pp2;
516
517	ORDERKEY_MEM
518	    ORDERKEY_RSSIZE
519	    ORDERKEY_PCTCPU
520	    ORDERKEY_CPUTIME
521	    ORDERKEY_STATE
522	    ORDERKEY_PRIO
523	    ;
524	return (result);
525}
526
527/* compare_res - the comparison function for sorting by resident set size */
528int
529compare_res(v1, v2)
530	const void *v1, *v2;
531{
532	struct proc **pp1 = (struct proc **) v1;
533	struct proc **pp2 = (struct proc **) v2;
534	struct kinfo_proc *p1;
535	struct kinfo_proc *p2;
536	int result;
537	pctcpu lresult;
538
539	/* remove one level of indirection */
540	p1 = *(struct kinfo_proc **) pp1;
541	p2 = *(struct kinfo_proc **) pp2;
542
543	ORDERKEY_RSSIZE
544	    ORDERKEY_MEM
545	    ORDERKEY_PCTCPU
546	    ORDERKEY_CPUTIME
547	    ORDERKEY_STATE
548	    ORDERKEY_PRIO
549	    ;
550	return (result);
551}
552
553/* compare_time - the comparison function for sorting by CPU time */
554int
555compare_time(v1, v2)
556	const void *v1, *v2;
557{
558	struct proc **pp1 = (struct proc **) v1;
559	struct proc **pp2 = (struct proc **) v2;
560	struct kinfo_proc *p1;
561	struct kinfo_proc *p2;
562	int result;
563	pctcpu lresult;
564
565	/* remove one level of indirection */
566	p1 = *(struct kinfo_proc **) pp1;
567	p2 = *(struct kinfo_proc **) pp2;
568
569	ORDERKEY_CPUTIME
570	    ORDERKEY_PCTCPU
571	    ORDERKEY_STATE
572	    ORDERKEY_PRIO
573	    ORDERKEY_MEM
574	    ORDERKEY_RSSIZE
575	    ;
576	return (result);
577}
578
579/* compare_prio - the comparison function for sorting by CPU time */
580int
581compare_prio(v1, v2)
582	const void *v1, *v2;
583{
584	struct proc **pp1 = (struct proc **) v1;
585	struct proc **pp2 = (struct proc **) v2;
586	struct kinfo_proc *p1;
587	struct kinfo_proc *p2;
588	int result;
589	pctcpu lresult;
590
591	/* remove one level of indirection */
592	p1 = *(struct kinfo_proc **) pp1;
593	p2 = *(struct kinfo_proc **) pp2;
594
595	ORDERKEY_PRIO
596	    ORDERKEY_PCTCPU
597	    ORDERKEY_CPUTIME
598	    ORDERKEY_STATE
599	    ORDERKEY_RSSIZE
600	    ORDERKEY_MEM
601	    ;
602	return (result);
603}
604
605int     (*proc_compares[]) () = {
606	compare_cpu,
607	compare_size,
608	compare_res,
609	compare_time,
610	compare_prio,
611	NULL
612};
613#else
614/*
615 *  proc_compare - comparison function for "qsort"
616 *	Compares the resource consumption of two processes using five
617 *  	distinct keys.  The keys (in descending order of importance) are:
618 *  	percent cpu, cpu ticks, state, resident set size, total virtual
619 *  	memory usage.  The process states are ordered as follows (from least
620 *  	to most important):  zombie, sleep, stop, start, run.  The array
621 *  	declaration below maps a process state index into a number that
622 *  	reflects this ordering.
623 */
624int
625proc_compare(v1, v2)
626	const void *v1, *v2;
627{
628	struct proc **pp1 = (struct proc **) v1;
629	struct proc **pp2 = (struct proc **) v2;
630	struct kinfo_proc *p1;
631	struct kinfo_proc *p2;
632	int result;
633	pctcpu lresult;
634
635	/* remove one level of indirection */
636	p1 = *(struct kinfo_proc **) pp1;
637	p2 = *(struct kinfo_proc **) pp2;
638
639	/* compare percent cpu (pctcpu) */
640	if ((lresult = PP(p2, p_pctcpu) - PP(p1, p_pctcpu)) == 0) {
641		/* use CPU usage to break the tie */
642		if ((result = PP(p2, p_rtime).tv_sec - PP(p1, p_rtime).tv_sec) == 0) {
643			/* use process state to break the tie */
644			if ((result = sorted_state[(unsigned char) PP(p2, p_stat)] -
645				sorted_state[(unsigned char) PP(p1, p_stat)]) == 0) {
646				/* use priority to break the tie */
647				if ((result = PP(p2, p_priority) - PP(p1, p_priority)) == 0) {
648					/* use resident set size (rssize) to
649					 * break the tie */
650					if ((result = VP(p2, vm_rssize) - VP(p1, vm_rssize)) == 0) {
651						/* use total memory to break
652						 * the tie */
653						result = PROCSIZE(p2) - PROCSIZE(p1);
654					}
655				}
656			}
657		}
658	} else {
659		result = lresult < 0 ? -1 : 1;
660	}
661
662	return (result);
663}
664#endif
665
666/*
667 * proc_owner(pid) - returns the uid that owns process "pid", or -1 if
668 *		the process does not exist.
669 *		It is EXTREMLY IMPORTANT that this function work correctly.
670 *		If top runs setuid root (as in SVR4), then this function
671 *		is the only thing that stands in the way of a serious
672 *		security problem.  It validates requests for the "kill"
673 *		and "renice" commands.
674 */
675int
676proc_owner(pid)
677	pid_t   pid;
678{
679	struct kinfo_proc **prefp, *pp;
680	int cnt;
681
682	prefp = pref;
683	cnt = pref_len;
684	while (--cnt >= 0) {
685		pp = *prefp++;
686		if (PP(pp, p_pid) == pid) {
687			return ((int) EP(pp, e_pcred.p_ruid));
688		}
689	}
690	return (-1);
691}
692#ifdef DOSWAP
693/*
694 * swapmode is rewritten by Tobias Weingartner <weingart@openbsd.org>
695 * to be based on the new swapctl(2) system call.
696 */
697static int
698swapmode(used, total)
699	int    *used;
700	int    *total;
701{
702	int     nswap, rnswap, i;
703	struct swapent *swdev;
704
705	nswap = swapctl(SWAP_NSWAP, 0, 0);
706	if (nswap == 0)
707		return 0;
708
709	swdev = malloc(nswap * sizeof(*swdev));
710	if (swdev == NULL)
711		return 0;
712
713	rnswap = swapctl(SWAP_STATS, swdev, nswap);
714	if (rnswap == -1)
715		return 0;
716
717	/* if rnswap != nswap, then what? */
718
719	/* Total things up */
720	*total = *used = 0;
721	for (i = 0; i < nswap; i++) {
722		if (swdev[i].se_flags & SWF_ENABLE) {
723			*used += (swdev[i].se_inuse / (1024 / DEV_BSIZE));
724			*total += (swdev[i].se_nblks / (1024 / DEV_BSIZE));
725		}
726	}
727
728	free(swdev);
729	return 1;
730}
731#endif
732