machine.c revision 1.41
1/* $OpenBSD: machine.c,v 1.41 2004/06/11 05:29:28 deraadt Exp $	 */
2
3/*-
4 * Copyright (c) 1994 Thorsten Lockert <tholo@sigmasoft.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
20 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
21 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
27 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * AUTHOR:  Thorsten Lockert <tholo@sigmasoft.com>
30 *          Adapted from BSD4.4 by Christos Zoulas <christos@ee.cornell.edu>
31 *          Patch for process wait display by Jarl F. Greipsland <jarle@idt.unit.no>
32 *	    Patch for -DORDER by Kenneth Stailey <kstailey@disclosure.com>
33 *	    Patch for new swapctl(2) by Tobias Weingartner <weingart@openbsd.org>
34 */
35
36#include <sys/types.h>
37#include <sys/signal.h>
38#include <sys/param.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <limits.h>
43#include <err.h>
44#include <math.h>
45#include <unistd.h>
46#include <sys/errno.h>
47#include <sys/sysctl.h>
48#include <sys/dir.h>
49#include <sys/dkstat.h>
50#include <sys/file.h>
51#include <sys/time.h>
52#include <sys/resource.h>
53#include <sys/swap.h>
54#include <err.h>
55
56#include "top.h"
57#include "display.h"
58#include "machine.h"
59#include "utils.h"
60#include "loadavg.h"
61
62static int	swapmode(int *, int *);
63
64/* get_process_info passes back a handle.  This is what it looks like: */
65
66struct handle {
67	struct kinfo_proc2 **next_proc;	/* points to next valid proc pointer */
68	int		remaining;	/* number of pointers remaining */
69};
70
71/* what we consider to be process size: */
72#define PROCSIZE(pp) ((pp)->p_vm_tsize + (pp)->p_vm_dsize + (pp)->p_vm_ssize)
73
74/*
75 *  These definitions control the format of the per-process area
76 */
77static char header[] =
78	"  PID X        PRI NICE  SIZE   RES STATE    WAIT     TIME    CPU COMMAND";
79
80/* 0123456   -- field to fill in starts at header+6 */
81#define UNAME_START 6
82
83#define Proc_format \
84	"%5d %-8.8s %3d %4d %5s %5s %-8s %-6.6s %6s %5.2f%% %.11s"
85
86/* process state names for the "STATE" column of the display */
87/*
88 * the extra nulls in the string "run" are for adding a slash and the
89 * processor number when needed
90 */
91
92char	*state_abbrev[] = {
93	"", "start", "run", "sleep", "stop", "zomb", "dead", "onproc"
94};
95
96static int      stathz;
97
98/* these are for calculating cpu state percentages */
99static long     cp_time[CPUSTATES];
100static long     cp_old[CPUSTATES];
101static long     cp_diff[CPUSTATES];
102
103/* these are for detailing the process states */
104int process_states[7];
105char *procstatenames[] = {
106	"", " starting, ", " running, ", " idle, ",
107	" stopped, ", " zombie, ",
108	NULL
109};
110
111/* these are for detailing the cpu states */
112int cpu_states[CPUSTATES];
113char *cpustatenames[] = {
114	"user", "nice", "system", "interrupt", "idle", NULL
115};
116
117/* these are for detailing the memory statistics */
118int memory_stats[8];
119char *memorynames[] = {
120	"Real: ", "K/", "K act/tot  ", "Free: ", "K  ",
121	"Swap: ", "K/", "K used/tot",
122	NULL
123};
124
125/* these are names given to allowed sorting orders -- first is default */
126char	*ordernames[] = {
127	"cpu", "size", "res", "time", "pri", NULL
128};
129
130/* these are for keeping track of the proc array */
131static int      nproc;
132static int      onproc = -1;
133static int      pref_len;
134static struct kinfo_proc2 *pbase;
135static struct kinfo_proc2 **pref;
136
137/* these are for getting the memory statistics */
138static int      pageshift;	/* log base 2 of the pagesize */
139
140/* define pagetok in terms of pageshift */
141#define pagetok(size) ((size) << pageshift)
142
143int		ncpu;
144
145unsigned int	maxslp;
146
147static int
148getstathz(void)
149{
150	struct clockinfo cinf;
151	size_t size = sizeof(cinf);
152	int mib[2];
153
154	mib[0] = CTL_KERN;
155	mib[1] = KERN_CLOCKRATE;
156	if (sysctl(mib, 2, &cinf, &size, NULL, 0) == -1)
157		return (-1);
158	return (cinf.stathz);
159}
160
161int
162machine_init(struct statics *statics)
163{
164	size_t size = sizeof(ncpu);
165	int mib[2], pagesize;
166
167	mib[0] = CTL_HW;
168	mib[1] = HW_NCPU;
169	if (sysctl(mib, 2, &ncpu, &size, NULL, 0) == -1)
170		return (-1);
171
172	stathz = getstathz();
173	if (stathz == -1)
174		return (-1);
175
176	pbase = NULL;
177	pref = NULL;
178	onproc = -1;
179	nproc = 0;
180
181	/*
182	 * get the page size with "getpagesize" and calculate pageshift from
183	 * it
184	 */
185	pagesize = getpagesize();
186	pageshift = 0;
187	while (pagesize > 1) {
188		pageshift++;
189		pagesize >>= 1;
190	}
191
192	/* we only need the amount of log(2)1024 for our conversion */
193	pageshift -= LOG1024;
194
195	/* fill in the statics information */
196	statics->procstate_names = procstatenames;
197	statics->cpustate_names = cpustatenames;
198	statics->memory_names = memorynames;
199	statics->order_names = ordernames;
200	return (0);
201}
202
203char *
204format_header(char *uname_field)
205{
206	char *ptr;
207
208	ptr = header + UNAME_START;
209	while (*uname_field != '\0')
210		*ptr++ = *uname_field++;
211	return (header);
212}
213
214void
215get_system_info(struct system_info *si)
216{
217	static int sysload_mib[] = {CTL_VM, VM_LOADAVG};
218	static int vmtotal_mib[] = {CTL_VM, VM_METER};
219	static int cp_time_mib[] = {CTL_KERN, KERN_CPTIME};
220	struct loadavg sysload;
221	struct vmtotal vmtotal;
222	double *infoloadp;
223	size_t size;
224	int i;
225
226	size = sizeof(cp_time);
227	if (sysctl(cp_time_mib, 2, &cp_time, &size, NULL, 0) < 0)
228		warn("sysctl kern.cp_time failed");
229
230	size = sizeof(sysload);
231	if (sysctl(sysload_mib, 2, &sysload, &size, NULL, 0) < 0)
232		warn("sysctl failed");
233	infoloadp = si->load_avg;
234	for (i = 0; i < 3; i++)
235		*infoloadp++ = ((double) sysload.ldavg[i]) / sysload.fscale;
236
237	/* convert cp_time counts to percentages */
238	(void) percentages(CPUSTATES, cpu_states, cp_time, cp_old, cp_diff);
239
240	/* get total -- systemwide main memory usage structure */
241	size = sizeof(vmtotal);
242	if (sysctl(vmtotal_mib, 2, &vmtotal, &size, NULL, 0) < 0) {
243		warn("sysctl failed");
244		bzero(&vmtotal, sizeof(vmtotal));
245	}
246	/* convert memory stats to Kbytes */
247	memory_stats[0] = -1;
248	memory_stats[1] = pagetok(vmtotal.t_arm);
249	memory_stats[2] = pagetok(vmtotal.t_rm);
250	memory_stats[3] = -1;
251	memory_stats[4] = pagetok(vmtotal.t_free);
252	memory_stats[5] = -1;
253
254	if (!swapmode(&memory_stats[6], &memory_stats[7])) {
255		memory_stats[6] = 0;
256		memory_stats[7] = 0;
257	}
258
259	/* set arrays and strings */
260	si->cpustates = cpu_states;
261	si->memory = memory_stats;
262	si->last_pid = -1;
263}
264
265static struct handle handle;
266
267static struct kinfo_proc2 *
268getprocs(int op, int arg, int *cnt)
269{
270	size_t size;
271	int mib[6] = {CTL_KERN, KERN_PROC2, 0, 0, sizeof(struct kinfo_proc2), 0};
272	static int maxslp_mib[] = {CTL_VM, VM_MAXSLP};
273	static struct kinfo_proc2 *procbase;
274	int st;
275
276	mib[2] = op;
277	mib[3] = arg;
278
279	size = sizeof(maxslp);
280	if (sysctl(maxslp_mib, 2, &maxslp, &size, NULL, 0) < 0) {
281		warn("sysctl vm.maxslp failed");
282		return (0);
283	}
284    retry:
285	free(procbase);
286	st = sysctl(mib, 6, NULL, &size, NULL, 0);
287	if (st == -1) {
288		/* _kvm_syserr(kd, kd->program, "kvm_getproc2"); */
289		return (0);
290	}
291	size = 5 * size / 4;			/* extra slop */
292	if ((procbase = malloc(size)) == NULL)
293		return (0);
294	mib[5] = (int)(size / sizeof(struct kinfo_proc2));
295	st = sysctl(mib, 6, procbase, &size, NULL, 0);
296	if (st == -1) {
297		if (errno == ENOMEM)
298			goto retry;
299		/* _kvm_syserr(kd, kd->program, "kvm_getproc2"); */
300		return (0);
301	}
302	*cnt = (int)(size / sizeof(struct kinfo_proc2));
303	return (procbase);
304}
305
306caddr_t
307get_process_info(struct system_info *si, struct process_select *sel,
308    int (*compare) (const void *, const void *))
309{
310	int show_idle, show_system, show_uid;
311	int total_procs, active_procs, i;
312	struct kinfo_proc2 **prefp, *pp;
313
314	if ((pbase = getprocs(KERN_PROC_KTHREAD, 0, &nproc)) == NULL) {
315		/* warnx("%s", kvm_geterr(kd)); */
316		quit(23);
317	}
318	if (nproc > onproc)
319		pref = (struct kinfo_proc2 **)realloc(pref,
320		    sizeof(struct kinfo_proc2 *) * (onproc = nproc));
321	if (pref == NULL) {
322		warnx("Out of memory.");
323		quit(23);
324	}
325	/* get a pointer to the states summary array */
326	si->procstates = process_states;
327
328	/* set up flags which define what we are going to select */
329	show_idle = sel->idle;
330	show_system = sel->system;
331	show_uid = sel->uid != (uid_t)-1;
332
333	/* count up process states and get pointers to interesting procs */
334	total_procs = 0;
335	active_procs = 0;
336	memset((char *) process_states, 0, sizeof(process_states));
337	prefp = pref;
338	for (pp = pbase, i = 0; i < nproc; pp++, i++) {
339		/*
340		 *  Place pointers to each valid proc structure in pref[].
341		 *  Process slots that are actually in use have a non-zero
342		 *  status field.  Processes with SSYS set are system
343		 *  processes---these get ignored unless show_sysprocs is set.
344		 */
345		if (pp->p_stat != 0 &&
346		    (show_system || (pp->p_flag & P_SYSTEM) == 0)) {
347			total_procs++;
348			process_states[(unsigned char) pp->p_stat]++;
349			if (pp->p_stat != SZOMB &&
350			    (show_idle || pp->p_pctcpu != 0 ||
351			    pp->p_stat == SRUN) &&
352			    (!show_uid || pp->p_ruid == sel->uid)) {
353				*prefp++ = pp;
354				active_procs++;
355			}
356		}
357	}
358
359	/* if requested, sort the "interesting" processes */
360	if (compare != NULL)
361		qsort((char *) pref, active_procs,
362		    sizeof(struct kinfo_proc2 *), compare);
363	/* remember active and total counts */
364	si->p_total = total_procs;
365	si->p_active = pref_len = active_procs;
366
367	/* pass back a handle */
368	handle.next_proc = pref;
369	handle.remaining = active_procs;
370	return ((caddr_t) & handle);
371}
372
373char fmt[MAX_COLS];	/* static area where result is built */
374
375char *
376state_abbr(struct kinfo_proc2 *pp)
377{
378	static char buf[10];
379
380	if (ncpu > 1)
381		snprintf(buf, sizeof buf, "%s/%d",
382		    state_abbrev[(unsigned char)pp->p_stat], pp->p_cpuid);
383	else
384		snprintf(buf, sizeof buf, "%s",
385		    state_abbrev[(unsigned char)pp->p_stat]);
386	return buf;
387}
388
389char *
390format_next_process(caddr_t handle, char *(*get_userid)(uid_t))
391{
392	char *p_wait, waddr[sizeof(void *) * 2 + 3];	/* Hexify void pointer */
393	struct kinfo_proc2 *pp;
394	struct handle *hp;
395	int cputime;
396	double pct;
397
398	/* find and remember the next proc structure */
399	hp = (struct handle *) handle;
400	pp = *(hp->next_proc++);
401	hp->remaining--;
402
403	if ((pp->p_flag & P_INMEM) == 0) {
404		/*
405		 * Print swapped processes as <pname>
406		 */
407		char buf[sizeof(pp->p_comm)];
408
409		(void) strlcpy(buf, pp->p_comm, sizeof(buf));
410		(void) snprintf(pp->p_comm, sizeof(pp->p_comm), "<%s>", buf);
411	}
412	cputime = (pp->p_uticks + pp->p_sticks + pp->p_iticks) / stathz;
413
414	/* calculate the base for cpu percentages */
415	pct = pctdouble(pp->p_pctcpu);
416
417	if (pp->p_wchan) {
418		if (pp->p_wmesg)
419			p_wait = pp->p_wmesg;
420		else {
421			snprintf(waddr, sizeof(waddr), "%llx",
422			    pp->p_wchan & ~KERNBASE);
423			p_wait = waddr;
424		}
425	} else
426		p_wait = "-";
427
428	/* format this entry */
429	snprintf(fmt, sizeof fmt, Proc_format,
430	    pp->p_pid, (*get_userid)(pp->p_ruid),
431	    pp->p_priority - PZERO, pp->p_nice - NZERO,
432	    format_k(pagetok(PROCSIZE(pp))),
433	    format_k(pagetok(pp->p_vm_rssize)),
434	    (pp->p_stat == SSLEEP && pp->p_slptime > maxslp) ?
435	    "idle" : state_abbr(pp),
436	    p_wait, format_time(cputime), 100.0 * pct,
437	    printable(pp->p_comm));
438
439	/* return the result */
440	return (fmt);
441}
442
443/* comparison routine for qsort */
444static unsigned char sorted_state[] =
445{
446	0,			/* not used		 */
447	4,			/* start		 */
448	5,			/* run			 */
449	2,			/* sleep		 */
450	3,			/* stop			 */
451	1			/* zombie		 */
452};
453
454/*
455 *  proc_compares - comparison functions for "qsort"
456 */
457
458/*
459 * First, the possible comparison keys.  These are defined in such a way
460 * that they can be merely listed in the source code to define the actual
461 * desired ordering.
462 */
463
464#define ORDERKEY_PCTCPU \
465	if (lresult = (pctcpu)p2->p_pctcpu - (pctcpu)p1->p_pctcpu, \
466	    (result = lresult > 0 ? 1 : lresult < 0 ? -1 : 0) == 0)
467#define ORDERKEY_CPUTIME \
468	if ((result = p2->p_rtime_sec - p1->p_rtime_sec) == 0) \
469		if ((result = p2->p_rtime_usec - p1->p_rtime_usec) == 0)
470#define ORDERKEY_STATE \
471	if ((result = sorted_state[(unsigned char)p2->p_stat] - \
472	    sorted_state[(unsigned char)p1->p_stat])  == 0)
473#define ORDERKEY_PRIO \
474	if ((result = p2->p_priority - p1->p_priority) == 0)
475#define ORDERKEY_RSSIZE \
476	if ((result = p2->p_vm_rssize - p1->p_vm_rssize) == 0)
477#define ORDERKEY_MEM \
478	if ((result = PROCSIZE(p2) - PROCSIZE(p1)) == 0)
479
480/* compare_cpu - the comparison function for sorting by cpu percentage */
481static int
482compare_cpu(const void *v1, const void *v2)
483{
484	struct proc **pp1 = (struct proc **) v1;
485	struct proc **pp2 = (struct proc **) v2;
486	struct kinfo_proc2 *p1, *p2;
487	pctcpu lresult;
488	int result;
489
490	/* remove one level of indirection */
491	p1 = *(struct kinfo_proc2 **) pp1;
492	p2 = *(struct kinfo_proc2 **) pp2;
493
494	ORDERKEY_PCTCPU
495	ORDERKEY_CPUTIME
496	ORDERKEY_STATE
497	ORDERKEY_PRIO
498	ORDERKEY_RSSIZE
499	ORDERKEY_MEM
500		;
501	return (result);
502}
503
504/* compare_size - the comparison function for sorting by total memory usage */
505static int
506compare_size(const void *v1, const void *v2)
507{
508	struct proc **pp1 = (struct proc **) v1;
509	struct proc **pp2 = (struct proc **) v2;
510	struct kinfo_proc2 *p1, *p2;
511	pctcpu lresult;
512	int result;
513
514	/* remove one level of indirection */
515	p1 = *(struct kinfo_proc2 **) pp1;
516	p2 = *(struct kinfo_proc2 **) pp2;
517
518	ORDERKEY_MEM
519	ORDERKEY_RSSIZE
520	ORDERKEY_PCTCPU
521	ORDERKEY_CPUTIME
522	ORDERKEY_STATE
523	ORDERKEY_PRIO
524		;
525	return (result);
526}
527
528/* compare_res - the comparison function for sorting by resident set size */
529static int
530compare_res(const void *v1, const void *v2)
531{
532	struct proc **pp1 = (struct proc **) v1;
533	struct proc **pp2 = (struct proc **) v2;
534	struct kinfo_proc2 *p1, *p2;
535	pctcpu lresult;
536	int result;
537
538	/* remove one level of indirection */
539	p1 = *(struct kinfo_proc2 **) pp1;
540	p2 = *(struct kinfo_proc2 **) pp2;
541
542	ORDERKEY_RSSIZE
543	ORDERKEY_MEM
544	ORDERKEY_PCTCPU
545	ORDERKEY_CPUTIME
546	ORDERKEY_STATE
547	ORDERKEY_PRIO
548		;
549	return (result);
550}
551
552/* compare_time - the comparison function for sorting by CPU time */
553static int
554compare_time(const void *v1, const void *v2)
555{
556	struct proc **pp1 = (struct proc **) v1;
557	struct proc **pp2 = (struct proc **) v2;
558	struct kinfo_proc2 *p1, *p2;
559	pctcpu lresult;
560	int result;
561
562	/* remove one level of indirection */
563	p1 = *(struct kinfo_proc2 **) pp1;
564	p2 = *(struct kinfo_proc2 **) pp2;
565
566	ORDERKEY_CPUTIME
567	ORDERKEY_PCTCPU
568	ORDERKEY_STATE
569	ORDERKEY_PRIO
570	ORDERKEY_MEM
571	ORDERKEY_RSSIZE
572		;
573	return (result);
574}
575
576/* compare_prio - the comparison function for sorting by CPU time */
577static int
578compare_prio(const void *v1, const void *v2)
579{
580	struct proc   **pp1 = (struct proc **) v1;
581	struct proc   **pp2 = (struct proc **) v2;
582	struct kinfo_proc2 *p1, *p2;
583	pctcpu lresult;
584	int result;
585
586	/* remove one level of indirection */
587	p1 = *(struct kinfo_proc2 **) pp1;
588	p2 = *(struct kinfo_proc2 **) pp2;
589
590	ORDERKEY_PRIO
591	ORDERKEY_PCTCPU
592	ORDERKEY_CPUTIME
593	ORDERKEY_STATE
594	ORDERKEY_RSSIZE
595	ORDERKEY_MEM
596		;
597	return (result);
598}
599
600int (*proc_compares[])(const void *, const void *) = {
601	compare_cpu,
602	compare_size,
603	compare_res,
604	compare_time,
605	compare_prio,
606	NULL
607};
608
609/*
610 * proc_owner(pid) - returns the uid that owns process "pid", or -1 if
611 *		the process does not exist.
612 *		It is EXTREMLY IMPORTANT that this function work correctly.
613 *		If top runs setuid root (as in SVR4), then this function
614 *		is the only thing that stands in the way of a serious
615 *		security problem.  It validates requests for the "kill"
616 *		and "renice" commands.
617 */
618uid_t
619proc_owner(pid_t pid)
620{
621	struct kinfo_proc2 **prefp, *pp;
622	int cnt;
623
624	prefp = pref;
625	cnt = pref_len;
626	while (--cnt >= 0) {
627		pp = *prefp++;
628		if (pp->p_pid == pid)
629			return ((uid_t)pp->p_ruid);
630	}
631	return (uid_t)(-1);
632}
633
634/*
635 * swapmode is rewritten by Tobias Weingartner <weingart@openbsd.org>
636 * to be based on the new swapctl(2) system call.
637 */
638static int
639swapmode(int *used, int *total)
640{
641	struct swapent *swdev;
642	int nswap, rnswap, i;
643
644	nswap = swapctl(SWAP_NSWAP, 0, 0);
645	if (nswap == 0)
646		return 0;
647
648	swdev = malloc(nswap * sizeof(*swdev));
649	if (swdev == NULL)
650		return 0;
651
652	rnswap = swapctl(SWAP_STATS, swdev, nswap);
653	if (rnswap == -1)
654		return 0;
655
656	/* if rnswap != nswap, then what? */
657
658	/* Total things up */
659	*total = *used = 0;
660	for (i = 0; i < nswap; i++) {
661		if (swdev[i].se_flags & SWF_ENABLE) {
662			*used += (swdev[i].se_inuse / (1024 / DEV_BSIZE));
663			*total += (swdev[i].se_nblks / (1024 / DEV_BSIZE));
664		}
665	}
666	free(swdev);
667	return 1;
668}
669