machine.c revision 1.38
1/* $OpenBSD: machine.c,v 1.38 2004/05/09 22:14:15 deraadt Exp $	 */
2
3/*-
4 * Copyright (c) 1994 Thorsten Lockert <tholo@sigmasoft.com>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. The name of the author may not be used to endorse or promote products
16 *    derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
20 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
21 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
26 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
27 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * AUTHOR:  Thorsten Lockert <tholo@sigmasoft.com>
30 *          Adapted from BSD4.4 by Christos Zoulas <christos@ee.cornell.edu>
31 *          Patch for process wait display by Jarl F. Greipsland <jarle@idt.unit.no>
32 *	    Patch for -DORDER by Kenneth Stailey <kstailey@disclosure.com>
33 *	    Patch for new swapctl(2) by Tobias Weingartner <weingart@openbsd.org>
34 */
35
36#include <sys/types.h>
37#include <sys/signal.h>
38#include <sys/param.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <limits.h>
43#include <err.h>
44#include <math.h>
45#include <unistd.h>
46#include <sys/errno.h>
47#include <sys/sysctl.h>
48#include <sys/dir.h>
49#include <sys/dkstat.h>
50#include <sys/file.h>
51#include <sys/time.h>
52#include <sys/resource.h>
53#include <sys/swap.h>
54#include <err.h>
55
56#include "top.h"
57#include "display.h"
58#include "machine.h"
59#include "utils.h"
60#include "loadavg.h"
61
62static int	swapmode(int *, int *);
63
64/* get_process_info passes back a handle.  This is what it looks like: */
65
66struct handle {
67	struct kinfo_proc2 **next_proc;	/* points to next valid proc pointer */
68	int		remaining;	/* number of pointers remaining */
69};
70
71/* what we consider to be process size: */
72#define PROCSIZE(pp) ((pp)->p_vm_tsize + (pp)->p_vm_dsize + (pp)->p_vm_ssize)
73
74/*
75 *  These definitions control the format of the per-process area
76 */
77static char header[] =
78	"  PID X        PRI NICE  SIZE   RES STATE WAIT     TIME    CPU COMMAND";
79
80/* 0123456   -- field to fill in starts at header+6 */
81#define UNAME_START 6
82
83#define Proc_format \
84	"%5d %-8.8s %3d %4d %5s %5s %-5s %-6.6s %6s %5.2f%% %.14s"
85
86/* process state names for the "STATE" column of the display */
87/*
88 * the extra nulls in the string "run" are for adding a slash and the
89 * processor number when needed
90 */
91
92char	*state_abbrev[] = {
93	"", "start", "run\0\0\0", "sleep", "stop", "zomb",
94};
95
96static int      stathz;
97
98/* these are for calculating cpu state percentages */
99static long     cp_time[CPUSTATES];
100static long     cp_old[CPUSTATES];
101static long     cp_diff[CPUSTATES];
102
103/* these are for detailing the process states */
104int process_states[7];
105char *procstatenames[] = {
106	"", " starting, ", " running, ", " idle, ",
107	" stopped, ", " zombie, ",
108	NULL
109};
110
111/* these are for detailing the cpu states */
112int cpu_states[CPUSTATES];
113char *cpustatenames[] = {
114	"user", "nice", "system", "interrupt", "idle", NULL
115};
116
117/* these are for detailing the memory statistics */
118int memory_stats[8];
119char *memorynames[] = {
120	"Real: ", "K/", "K act/tot  ", "Free: ", "K  ",
121	"Swap: ", "K/", "K used/tot",
122	NULL
123};
124
125/* these are names given to allowed sorting orders -- first is default */
126char	*ordernames[] = {
127	"cpu", "size", "res", "time", "pri", NULL
128};
129
130/* these are for keeping track of the proc array */
131static int      nproc;
132static int      onproc = -1;
133static int      pref_len;
134static struct kinfo_proc2 *pbase;
135static struct kinfo_proc2 **pref;
136
137/* these are for getting the memory statistics */
138static int      pageshift;	/* log base 2 of the pagesize */
139
140/* define pagetok in terms of pageshift */
141#define pagetok(size) ((size) << pageshift)
142
143unsigned int	maxslp;
144
145static int
146getstathz(void)
147{
148	struct clockinfo cinf;
149	size_t size = sizeof(cinf);
150	int mib[2];
151
152	mib[0] = CTL_KERN;
153	mib[1] = KERN_CLOCKRATE;
154	if (sysctl(mib, 2, &cinf, &size, NULL, 0) == -1)
155		return (-1);
156	return (cinf.stathz);
157}
158
159int
160machine_init(struct statics *statics)
161{
162	int pagesize;
163
164	stathz = getstathz();
165	if (stathz == -1)
166		return (-1);
167
168	pbase = NULL;
169	pref = NULL;
170	onproc = -1;
171	nproc = 0;
172
173	/*
174	 * get the page size with "getpagesize" and calculate pageshift from
175	 * it
176	 */
177	pagesize = getpagesize();
178	pageshift = 0;
179	while (pagesize > 1) {
180		pageshift++;
181		pagesize >>= 1;
182	}
183
184	/* we only need the amount of log(2)1024 for our conversion */
185	pageshift -= LOG1024;
186
187	/* fill in the statics information */
188	statics->procstate_names = procstatenames;
189	statics->cpustate_names = cpustatenames;
190	statics->memory_names = memorynames;
191	statics->order_names = ordernames;
192	return (0);
193}
194
195char *
196format_header(char *uname_field)
197{
198	char *ptr;
199
200	ptr = header + UNAME_START;
201	while (*uname_field != '\0')
202		*ptr++ = *uname_field++;
203	return (header);
204}
205
206void
207get_system_info(struct system_info *si)
208{
209	static int sysload_mib[] = {CTL_VM, VM_LOADAVG};
210	static int vmtotal_mib[] = {CTL_VM, VM_METER};
211	static int cp_time_mib[] = {CTL_KERN, KERN_CPTIME};
212	struct loadavg sysload;
213	struct vmtotal vmtotal;
214	double *infoloadp;
215	size_t size;
216	int i;
217
218	size = sizeof(cp_time);
219	if (sysctl(cp_time_mib, 2, &cp_time, &size, NULL, 0) < 0)
220		warn("sysctl kern.cp_time failed");
221
222	size = sizeof(sysload);
223	if (sysctl(sysload_mib, 2, &sysload, &size, NULL, 0) < 0)
224		warn("sysctl failed");
225	infoloadp = si->load_avg;
226	for (i = 0; i < 3; i++)
227		*infoloadp++ = ((double) sysload.ldavg[i]) / sysload.fscale;
228
229	/* convert cp_time counts to percentages */
230	(void) percentages(CPUSTATES, cpu_states, cp_time, cp_old, cp_diff);
231
232	/* get total -- systemwide main memory usage structure */
233	size = sizeof(vmtotal);
234	if (sysctl(vmtotal_mib, 2, &vmtotal, &size, NULL, 0) < 0) {
235		warn("sysctl failed");
236		bzero(&vmtotal, sizeof(vmtotal));
237	}
238	/* convert memory stats to Kbytes */
239	memory_stats[0] = -1;
240	memory_stats[1] = pagetok(vmtotal.t_arm);
241	memory_stats[2] = pagetok(vmtotal.t_rm);
242	memory_stats[3] = -1;
243	memory_stats[4] = pagetok(vmtotal.t_free);
244	memory_stats[5] = -1;
245
246	if (!swapmode(&memory_stats[6], &memory_stats[7])) {
247		memory_stats[6] = 0;
248		memory_stats[7] = 0;
249	}
250
251	/* set arrays and strings */
252	si->cpustates = cpu_states;
253	si->memory = memory_stats;
254	si->last_pid = -1;
255}
256
257static struct handle handle;
258
259static struct kinfo_proc2 *
260getprocs(int op, int arg, int *cnt)
261{
262	size_t size;
263	int mib[6] = {CTL_KERN, KERN_PROC2, 0, 0, sizeof(struct kinfo_proc2), 0};
264	static int maxslp_mib[] = {CTL_VM, VM_MAXSLP};
265	static struct kinfo_proc2 *procbase;
266	int st;
267
268	mib[2] = op;
269	mib[3] = arg;
270
271	size = sizeof(maxslp);
272	if (sysctl(maxslp_mib, 2, &maxslp, &size, NULL, 0) < 0) {
273		warn("sysctl vm.maxslp failed");
274		return (0);
275	}
276    retry:
277	free(procbase);
278	st = sysctl(mib, 6, NULL, &size, NULL, 0);
279	if (st == -1) {
280		/* _kvm_syserr(kd, kd->program, "kvm_getproc2"); */
281		return (0);
282	}
283	size = 5 * size / 4;			/* extra slop */
284	if ((procbase = malloc(size)) == NULL)
285		return (0);
286	mib[5] = (int)(size / sizeof(struct kinfo_proc2));
287	st = sysctl(mib, 6, procbase, &size, NULL, 0);
288	if (st == -1) {
289		if (errno == ENOMEM)
290			goto retry;
291		/* _kvm_syserr(kd, kd->program, "kvm_getproc2"); */
292		return (0);
293	}
294	*cnt = (int)(size / sizeof(struct kinfo_proc2));
295	return (procbase);
296}
297
298caddr_t
299get_process_info(struct system_info *si, struct process_select *sel,
300    int (*compare) (const void *, const void *))
301{
302	int show_idle, show_system, show_uid;
303	int total_procs, active_procs, i;
304	struct kinfo_proc2 **prefp, *pp;
305
306	if ((pbase = getprocs(KERN_PROC_KTHREAD, 0, &nproc)) == NULL) {
307		/* warnx("%s", kvm_geterr(kd)); */
308		quit(23);
309	}
310	if (nproc > onproc)
311		pref = (struct kinfo_proc2 **)realloc(pref,
312		    sizeof(struct kinfo_proc2 *) * (onproc = nproc));
313	if (pref == NULL) {
314		warnx("Out of memory.");
315		quit(23);
316	}
317	/* get a pointer to the states summary array */
318	si->procstates = process_states;
319
320	/* set up flags which define what we are going to select */
321	show_idle = sel->idle;
322	show_system = sel->system;
323	show_uid = sel->uid != (uid_t)-1;
324
325	/* count up process states and get pointers to interesting procs */
326	total_procs = 0;
327	active_procs = 0;
328	memset((char *) process_states, 0, sizeof(process_states));
329	prefp = pref;
330	for (pp = pbase, i = 0; i < nproc; pp++, i++) {
331		/*
332		 *  Place pointers to each valid proc structure in pref[].
333		 *  Process slots that are actually in use have a non-zero
334		 *  status field.  Processes with SSYS set are system
335		 *  processes---these get ignored unless show_sysprocs is set.
336		 */
337		if (pp->p_stat != 0 &&
338		    (show_system || (pp->p_flag & P_SYSTEM) == 0)) {
339			total_procs++;
340			process_states[(unsigned char) pp->p_stat]++;
341			if (pp->p_stat != SZOMB &&
342			    (show_idle || pp->p_pctcpu != 0 ||
343			    pp->p_stat == SRUN) &&
344			    (!show_uid || pp->p_ruid == sel->uid)) {
345				*prefp++ = pp;
346				active_procs++;
347			}
348		}
349	}
350
351	/* if requested, sort the "interesting" processes */
352	if (compare != NULL)
353		qsort((char *) pref, active_procs,
354		    sizeof(struct kinfo_proc2 *), compare);
355	/* remember active and total counts */
356	si->p_total = total_procs;
357	si->p_active = pref_len = active_procs;
358
359	/* pass back a handle */
360	handle.next_proc = pref;
361	handle.remaining = active_procs;
362	return ((caddr_t) & handle);
363}
364
365char fmt[MAX_COLS];	/* static area where result is built */
366
367char *
368format_next_process(caddr_t handle, char *(*get_userid)(uid_t))
369{
370	char *p_wait, waddr[sizeof(void *) * 2 + 3];	/* Hexify void pointer */
371	struct kinfo_proc2 *pp;
372	struct handle *hp;
373	int cputime;
374	double pct;
375
376	/* find and remember the next proc structure */
377	hp = (struct handle *) handle;
378	pp = *(hp->next_proc++);
379	hp->remaining--;
380
381	if ((pp->p_flag & P_INMEM) == 0) {
382		/*
383		 * Print swapped processes as <pname>
384		 */
385		char buf[sizeof(pp->p_comm)];
386
387		(void) strlcpy(buf, pp->p_comm, sizeof(buf));
388		(void) snprintf(pp->p_comm, sizeof(pp->p_comm), "<%s>", buf);
389	}
390	cputime = (pp->p_uticks + pp->p_sticks + pp->p_iticks) / stathz;
391
392	/* calculate the base for cpu percentages */
393	pct = pctdouble(pp->p_pctcpu);
394
395	if (pp->p_wchan) {
396		if (pp->p_wmesg)
397			p_wait = pp->p_wmesg;
398		else {
399			snprintf(waddr, sizeof(waddr), "%llx",
400			    pp->p_wchan & ~KERNBASE);
401			p_wait = waddr;
402		}
403	} else
404		p_wait = "-";
405
406	/* format this entry */
407	snprintf(fmt, sizeof fmt, Proc_format,
408	    pp->p_pid, (*get_userid)(pp->p_ruid),
409	    pp->p_priority - PZERO, pp->p_nice - NZERO,
410	    format_k(pagetok(PROCSIZE(pp))),
411	    format_k(pagetok(pp->p_vm_rssize)),
412	    (pp->p_stat == SSLEEP && pp->p_slptime > maxslp) ?
413	    "idle" : state_abbrev[(unsigned char)pp->p_stat],
414	    p_wait, format_time(cputime), 100.0 * pct,
415	    printable(pp->p_comm));
416
417	/* return the result */
418	return (fmt);
419}
420
421/* comparison routine for qsort */
422static unsigned char sorted_state[] =
423{
424	0,			/* not used		 */
425	4,			/* start		 */
426	5,			/* run			 */
427	2,			/* sleep		 */
428	3,			/* stop			 */
429	1			/* zombie		 */
430};
431
432/*
433 *  proc_compares - comparison functions for "qsort"
434 */
435
436/*
437 * First, the possible comparison keys.  These are defined in such a way
438 * that they can be merely listed in the source code to define the actual
439 * desired ordering.
440 */
441
442#define ORDERKEY_PCTCPU \
443	if (lresult = (pctcpu)p2->p_pctcpu - (pctcpu)p1->p_pctcpu, \
444	    (result = lresult > 0 ? 1 : lresult < 0 ? -1 : 0) == 0)
445#define ORDERKEY_CPUTIME \
446	if ((result = p2->p_rtime_sec - p1->p_rtime_sec) == 0) \
447		if ((result = p2->p_rtime_usec - p1->p_rtime_usec) == 0)
448#define ORDERKEY_STATE \
449	if ((result = sorted_state[(unsigned char)p2->p_stat] - \
450	    sorted_state[(unsigned char)p1->p_stat])  == 0)
451#define ORDERKEY_PRIO \
452	if ((result = p2->p_priority - p1->p_priority) == 0)
453#define ORDERKEY_RSSIZE \
454	if ((result = p2->p_vm_rssize - p1->p_vm_rssize) == 0)
455#define ORDERKEY_MEM \
456	if ((result = PROCSIZE(p2) - PROCSIZE(p1)) == 0)
457
458/* compare_cpu - the comparison function for sorting by cpu percentage */
459static int
460compare_cpu(const void *v1, const void *v2)
461{
462	struct proc **pp1 = (struct proc **) v1;
463	struct proc **pp2 = (struct proc **) v2;
464	struct kinfo_proc2 *p1, *p2;
465	pctcpu lresult;
466	int result;
467
468	/* remove one level of indirection */
469	p1 = *(struct kinfo_proc2 **) pp1;
470	p2 = *(struct kinfo_proc2 **) pp2;
471
472	ORDERKEY_PCTCPU
473	ORDERKEY_CPUTIME
474	ORDERKEY_STATE
475	ORDERKEY_PRIO
476	ORDERKEY_RSSIZE
477	ORDERKEY_MEM
478		;
479	return (result);
480}
481
482/* compare_size - the comparison function for sorting by total memory usage */
483static int
484compare_size(const void *v1, const void *v2)
485{
486	struct proc **pp1 = (struct proc **) v1;
487	struct proc **pp2 = (struct proc **) v2;
488	struct kinfo_proc2 *p1, *p2;
489	pctcpu lresult;
490	int result;
491
492	/* remove one level of indirection */
493	p1 = *(struct kinfo_proc2 **) pp1;
494	p2 = *(struct kinfo_proc2 **) pp2;
495
496	ORDERKEY_MEM
497	ORDERKEY_RSSIZE
498	ORDERKEY_PCTCPU
499	ORDERKEY_CPUTIME
500	ORDERKEY_STATE
501	ORDERKEY_PRIO
502		;
503	return (result);
504}
505
506/* compare_res - the comparison function for sorting by resident set size */
507static int
508compare_res(const void *v1, const void *v2)
509{
510	struct proc **pp1 = (struct proc **) v1;
511	struct proc **pp2 = (struct proc **) v2;
512	struct kinfo_proc2 *p1, *p2;
513	pctcpu lresult;
514	int result;
515
516	/* remove one level of indirection */
517	p1 = *(struct kinfo_proc2 **) pp1;
518	p2 = *(struct kinfo_proc2 **) pp2;
519
520	ORDERKEY_RSSIZE
521	ORDERKEY_MEM
522	ORDERKEY_PCTCPU
523	ORDERKEY_CPUTIME
524	ORDERKEY_STATE
525	ORDERKEY_PRIO
526		;
527	return (result);
528}
529
530/* compare_time - the comparison function for sorting by CPU time */
531static int
532compare_time(const void *v1, const void *v2)
533{
534	struct proc **pp1 = (struct proc **) v1;
535	struct proc **pp2 = (struct proc **) v2;
536	struct kinfo_proc2 *p1, *p2;
537	pctcpu lresult;
538	int result;
539
540	/* remove one level of indirection */
541	p1 = *(struct kinfo_proc2 **) pp1;
542	p2 = *(struct kinfo_proc2 **) pp2;
543
544	ORDERKEY_CPUTIME
545	ORDERKEY_PCTCPU
546	ORDERKEY_STATE
547	ORDERKEY_PRIO
548	ORDERKEY_MEM
549	ORDERKEY_RSSIZE
550		;
551	return (result);
552}
553
554/* compare_prio - the comparison function for sorting by CPU time */
555static int
556compare_prio(const void *v1, const void *v2)
557{
558	struct proc   **pp1 = (struct proc **) v1;
559	struct proc   **pp2 = (struct proc **) v2;
560	struct kinfo_proc2 *p1, *p2;
561	pctcpu lresult;
562	int result;
563
564	/* remove one level of indirection */
565	p1 = *(struct kinfo_proc2 **) pp1;
566	p2 = *(struct kinfo_proc2 **) pp2;
567
568	ORDERKEY_PRIO
569	ORDERKEY_PCTCPU
570	ORDERKEY_CPUTIME
571	ORDERKEY_STATE
572	ORDERKEY_RSSIZE
573	ORDERKEY_MEM
574		;
575	return (result);
576}
577
578int (*proc_compares[])(const void *, const void *) = {
579	compare_cpu,
580	compare_size,
581	compare_res,
582	compare_time,
583	compare_prio,
584	NULL
585};
586
587/*
588 * proc_owner(pid) - returns the uid that owns process "pid", or -1 if
589 *		the process does not exist.
590 *		It is EXTREMLY IMPORTANT that this function work correctly.
591 *		If top runs setuid root (as in SVR4), then this function
592 *		is the only thing that stands in the way of a serious
593 *		security problem.  It validates requests for the "kill"
594 *		and "renice" commands.
595 */
596uid_t
597proc_owner(pid_t pid)
598{
599	struct kinfo_proc2 **prefp, *pp;
600	int cnt;
601
602	prefp = pref;
603	cnt = pref_len;
604	while (--cnt >= 0) {
605		pp = *prefp++;
606		if (pp->p_pid == pid)
607			return ((uid_t)pp->p_ruid);
608	}
609	return (uid_t)(-1);
610}
611
612/*
613 * swapmode is rewritten by Tobias Weingartner <weingart@openbsd.org>
614 * to be based on the new swapctl(2) system call.
615 */
616static int
617swapmode(int *used, int *total)
618{
619	struct swapent *swdev;
620	int nswap, rnswap, i;
621
622	nswap = swapctl(SWAP_NSWAP, 0, 0);
623	if (nswap == 0)
624		return 0;
625
626	swdev = malloc(nswap * sizeof(*swdev));
627	if (swdev == NULL)
628		return 0;
629
630	rnswap = swapctl(SWAP_STATS, swdev, nswap);
631	if (rnswap == -1)
632		return 0;
633
634	/* if rnswap != nswap, then what? */
635
636	/* Total things up */
637	*total = *used = 0;
638	for (i = 0; i < nswap; i++) {
639		if (swdev[i].se_flags & SWF_ENABLE) {
640			*used += (swdev[i].se_inuse / (1024 / DEV_BSIZE));
641			*total += (swdev[i].se_nblks / (1024 / DEV_BSIZE));
642		}
643	}
644	free(swdev);
645	return 1;
646}
647