machine.c revision 27340
1/*
2 * top - a top users display for Unix
3 *
4 * SYNOPSIS:  For FreeBSD-2.x system
5 *
6 * DESCRIPTION:
7 * Originally written for BSD4.4 system by Christos Zoulas.
8 * Ported to FreeBSD 2.x by Steven Wallace && Wolfram Schneider
9 *
10 * This is the machine-dependent module for FreeBSD 2.2
11 * Works for:
12 *	FreeBSD 2.2, and probably FreeBSD 2.1.x
13 *
14 * LIBS: -lkvm
15 *
16 * AUTHOR:  Christos Zoulas <christos@ee.cornell.edu>
17 *          Steven Wallace  <swallace@freebsd.org>
18 *          Wolfram Schneider <wosch@FreeBSD.org>
19 *
20 * $Id: machine.c,v 1.3 1997/04/21 13:53:47 ache Exp $
21 */
22
23
24#include <sys/types.h>
25#include <sys/signal.h>
26#include <sys/param.h>
27
28#include "os.h"
29#include <stdio.h>
30#include <nlist.h>
31#include <math.h>
32#include <kvm.h>
33#include <sys/errno.h>
34#include <sys/sysctl.h>
35#include <sys/dkstat.h>
36#include <sys/file.h>
37#include <sys/time.h>
38#include <sys/proc.h>
39#include <sys/user.h>
40#include <sys/vmmeter.h>
41
42/* Swap */
43#include <stdlib.h>
44#include <sys/rlist.h>
45#include <sys/conf.h>
46
47#include <osreldate.h> /* for changes in kernel structures */
48
49#include "top.h"
50#include "machine.h"
51
52static int check_nlist __P((struct nlist *));
53static int getkval __P((unsigned long, int *, int, char *));
54extern char* printable __P((char *));
55int swapmode __P((int *retavail, int *retfree));
56static int smpmode;
57
58
59
60/* get_process_info passes back a handle.  This is what it looks like: */
61
62struct handle
63{
64    struct kinfo_proc **next_proc;	/* points to next valid proc pointer */
65    int remaining;		/* number of pointers remaining */
66};
67
68/* declarations for load_avg */
69#include "loadavg.h"
70
71#define PP(pp, field) ((pp)->kp_proc . field)
72#define EP(pp, field) ((pp)->kp_eproc . field)
73#define VP(pp, field) ((pp)->kp_eproc.e_vm . field)
74
75/* define what weighted cpu is.  */
76#define weighted_cpu(pct, pp) (PP((pp), p_swtime) == 0 ? 0.0 : \
77			 ((pct) / (1.0 - exp(PP((pp), p_swtime) * logcpu))))
78
79/* what we consider to be process size: */
80#define PROCSIZE(pp) (VP((pp), vm_tsize) + VP((pp), vm_dsize) + VP((pp), vm_ssize))
81
82/* definitions for indices in the nlist array */
83
84
85static struct nlist nlst[] = {
86#define X_CCPU		0
87    { "_ccpu" },		/* 0 */
88#define X_CP_TIME	1
89    { "_cp_time" },		/* 1 */
90#define X_HZ		2
91    { "_hz" },		        /* 2 */
92#define X_STATHZ	3
93    { "_stathz" },		/* 3 */
94#define X_AVENRUN	4
95    { "_averunnable" },		/* 4 */
96
97/* Swap */
98#define VM_SWAPLIST	5
99	{ "_swaplist" },/* list of free swap areas */
100#define VM_SWDEVT	6
101	{ "_swdevt" },	/* list of swap devices and sizes */
102#define VM_NSWAP	7
103	{ "_nswap" },	/* size of largest swap device */
104#define VM_NSWDEV	8
105	{ "_nswdev" },	/* number of swap devices */
106#define VM_DMMAX	9
107	{ "_dmmax" },	/* maximum size of a swap block */
108#define X_BUFSPACE	10
109	{ "_bufspace" },	/* K in buffer cache */
110#define X_CNT           11
111    { "_cnt" },		        /* struct vmmeter cnt */
112
113/* Last pid */
114#define X_LASTPID	12
115    { "_nextpid" },
116    { 0 }
117};
118
119/*
120 *  These definitions control the format of the per-process area
121 */
122
123static char smp_header[] =
124  "  PID X                PRI NICE SIZE   RES STATE C   TIME   WCPU    CPU COMMAND";
125/* 0123456   -- field to fill in starts at header+6 */
126#define SMP_UNAME_START 6
127
128#define smp_Proc_format \
129	"%5d %-16.16s%3d%3d%7s %6s %-6.6s%1x%7s %5.2f%% %5.2f%% %.6s"
130
131static char up_header[] =
132  "  PID X                PRI NICE SIZE    RES STATE    TIME   WCPU    CPU COMMAND";
133/* 0123456   -- field to fill in starts at header+6 */
134#define UP_UNAME_START 6
135
136#define up_Proc_format \
137	"%5d %-16.16s%3d %3d%7s %6s %-6.6s%.0d%7s %5.2f%% %5.2f%% %.6s"
138
139
140
141/* process state names for the "STATE" column of the display */
142/* the extra nulls in the string "run" are for adding a slash and
143   the processor number when needed */
144
145char *state_abbrev[] =
146{
147    "", "START", "RUN\0\0\0", "SLEEP", "STOP", "ZOMB",
148};
149
150
151static kvm_t *kd;
152
153/* values that we stash away in _init and use in later routines */
154
155static double logcpu;
156
157/* these are retrieved from the kernel in _init */
158
159static          long hz;
160static load_avg  ccpu;
161
162/* these are offsets obtained via nlist and used in the get_ functions */
163
164static unsigned long cp_time_offset;
165static unsigned long avenrun_offset;
166static unsigned long lastpid_offset;
167static long lastpid;
168static unsigned long cnt_offset;
169static unsigned long bufspace_offset;
170static long cnt;
171
172/* these are for calculating cpu state percentages */
173
174static long cp_time[CPUSTATES];
175static long cp_old[CPUSTATES];
176static long cp_diff[CPUSTATES];
177
178/* these are for detailing the process states */
179
180int process_states[6];
181char *procstatenames[] = {
182    "", " starting, ", " running, ", " sleeping, ", " stopped, ",
183    " zombie, ",
184    NULL
185};
186
187/* these are for detailing the cpu states */
188
189int cpu_states[CPUSTATES];
190char *cpustatenames[] = {
191    "user", "nice", "system", "interrupt", "idle", NULL
192};
193
194/* these are for detailing the memory statistics */
195
196int memory_stats[7];
197char *memorynames[] = {
198    "K Active, ", "K Inact, ", "K Wired, ", "K Cache, ", "K Buf, ", "K Free",
199    NULL
200};
201
202int swap_stats[7];
203char *swapnames[] = {
204/*   0           1            2           3            4       5 */
205    "K Total, ", "K Used, ", "K Free, ", "% Inuse, ", "K In, ", "K Out",
206    NULL
207};
208
209
210/* these are for keeping track of the proc array */
211
212static int nproc;
213static int onproc = -1;
214static int pref_len;
215static struct kinfo_proc *pbase;
216static struct kinfo_proc **pref;
217
218/* these are for getting the memory statistics */
219
220static int pageshift;		/* log base 2 of the pagesize */
221
222/* define pagetok in terms of pageshift */
223
224#define pagetok(size) ((size) << pageshift)
225
226/* useful externals */
227long percentages();
228
229int
230machine_init(statics)
231
232struct statics *statics;
233
234{
235    register int i = 0;
236    register int pagesize;
237    int modelen;
238
239    modelen = sizeof(smpmode);
240    if (sysctlbyname("kern.smp_active", &smpmode, &modelen, NULL, 0) < 0 ||
241	modelen != sizeof(smpmode))
242	    smpmode = 0;
243
244    if ((kd = kvm_open(NULL, NULL, NULL, O_RDONLY, "kvm_open")) == NULL)
245	return -1;
246
247
248    /* get the list of symbols we want to access in the kernel */
249    (void) kvm_nlist(kd, nlst);
250    if (nlst[0].n_type == 0)
251    {
252	fprintf(stderr, "top: nlist failed\n");
253	return(-1);
254    }
255
256    /* make sure they were all found */
257    if (i > 0 && check_nlist(nlst) > 0)
258    {
259	return(-1);
260    }
261
262    /* get the symbol values out of kmem */
263    (void) getkval(nlst[X_STATHZ].n_value, (int *)(&hz), sizeof(hz), "!");
264    if (!hz) {
265	(void) getkval(nlst[X_HZ].n_value, (int *)(&hz), sizeof(hz),
266		       nlst[X_HZ].n_name);
267    }
268
269    (void) getkval(nlst[X_CCPU].n_value,   (int *)(&ccpu),	sizeof(ccpu),
270	    nlst[X_CCPU].n_name);
271
272    /* stash away certain offsets for later use */
273    cp_time_offset = nlst[X_CP_TIME].n_value;
274    avenrun_offset = nlst[X_AVENRUN].n_value;
275    lastpid_offset =  nlst[X_LASTPID].n_value;
276    cnt_offset = nlst[X_CNT].n_value;
277    bufspace_offset = nlst[X_BUFSPACE].n_value;
278
279    /* this is used in calculating WCPU -- calculate it ahead of time */
280    logcpu = log(loaddouble(ccpu));
281
282    pbase = NULL;
283    pref = NULL;
284    nproc = 0;
285    onproc = -1;
286    /* get the page size with "getpagesize" and calculate pageshift from it */
287    pagesize = getpagesize();
288    pageshift = 0;
289    while (pagesize > 1)
290    {
291	pageshift++;
292	pagesize >>= 1;
293    }
294
295    /* we only need the amount of log(2)1024 for our conversion */
296    pageshift -= LOG1024;
297
298    /* fill in the statics information */
299    statics->procstate_names = procstatenames;
300    statics->cpustate_names = cpustatenames;
301    statics->memory_names = memorynames;
302    statics->swap_names = swapnames;
303
304    /* all done! */
305    return(0);
306}
307
308char *format_header(uname_field)
309
310register char *uname_field;
311
312{
313    register char *ptr;
314
315    if (smpmode)
316	ptr = smp_header + SMP_UNAME_START;
317    else
318	ptr = up_header + UP_UNAME_START;
319
320    while (*uname_field != '\0')
321    {
322	*ptr++ = *uname_field++;
323    }
324
325    return(smpmode ? smp_header : up_header);
326}
327
328static int swappgsin = -1;
329static int swappgsout = -1;
330extern struct timeval timeout;
331
332void
333get_system_info(si)
334
335struct system_info *si;
336
337{
338    long total;
339    load_avg avenrun[3];
340
341    /* get the cp_time array */
342    (void) getkval(cp_time_offset, (int *)cp_time, sizeof(cp_time),
343		   nlst[X_CP_TIME].n_name);
344    (void) getkval(avenrun_offset, (int *)avenrun, sizeof(avenrun),
345		   nlst[X_AVENRUN].n_name);
346
347    (void) getkval(lastpid_offset, (int *)(&lastpid), sizeof(lastpid),
348		   "!");
349
350    /* convert load averages to doubles */
351    {
352	register int i;
353	register double *infoloadp;
354	load_avg *avenrunp;
355
356#ifdef notyet
357	struct loadavg sysload;
358	int size;
359	getkerninfo(KINFO_LOADAVG, &sysload, &size, 0);
360#endif
361
362	infoloadp = si->load_avg;
363	avenrunp = avenrun;
364	for (i = 0; i < 3; i++)
365	{
366#ifdef notyet
367	    *infoloadp++ = ((double) sysload.ldavg[i]) / sysload.fscale;
368#endif
369	    *infoloadp++ = loaddouble(*avenrunp++);
370	}
371    }
372
373    /* convert cp_time counts to percentages */
374    total = percentages(CPUSTATES, cpu_states, cp_time, cp_old, cp_diff);
375
376    /* sum memory & swap statistics */
377    {
378	struct vmmeter sum;
379	static unsigned int swap_delay = 0;
380	static int swapavail = 0;
381	static int swapfree = 0;
382	static int bufspace = 0;
383
384        (void) getkval(cnt_offset, (int *)(&sum), sizeof(sum),
385		   "_cnt");
386        (void) getkval(bufspace_offset, (int *)(&bufspace), sizeof(bufspace),
387		   "_bufspace");
388
389	/* convert memory stats to Kbytes */
390	memory_stats[0] = pagetok(sum.v_active_count);
391	memory_stats[1] = pagetok(sum.v_inactive_count);
392	memory_stats[2] = pagetok(sum.v_wire_count);
393	memory_stats[3] = pagetok(sum.v_cache_count);
394	memory_stats[4] = bufspace / 1024;
395	memory_stats[5] = pagetok(sum.v_free_count);
396	memory_stats[6] = -1;
397
398	/* first interval */
399        if (swappgsin < 0) {
400	    swap_stats[4] = 0;
401	    swap_stats[5] = 0;
402	}
403
404	/* compute differences between old and new swap statistic */
405	else {
406	    swap_stats[4] = pagetok(((sum.v_swappgsin - swappgsin)));
407	    swap_stats[5] = pagetok(((sum.v_swappgsout - swappgsout)));
408	}
409
410        swappgsin = sum.v_swappgsin;
411	swappgsout = sum.v_swappgsout;
412
413	/* call CPU heavy swapmode() only for changes */
414        if (swap_stats[4] > 0 || swap_stats[5] > 0 || swap_delay == 0) {
415	    swap_stats[3] = swapmode(&swapavail, &swapfree);
416	    swap_stats[0] = swapavail;
417	    swap_stats[1] = swapavail - swapfree;
418	    swap_stats[2] = swapfree;
419	}
420        swap_delay = 1;
421	swap_stats[6] = -1;
422    }
423
424    /* set arrays and strings */
425    si->cpustates = cpu_states;
426    si->memory = memory_stats;
427    si->swap = swap_stats;
428
429
430    if(lastpid > 0) {
431	si->last_pid = lastpid;
432    } else {
433	si->last_pid = -1;
434    }
435}
436
437static struct handle handle;
438
439caddr_t get_process_info(si, sel, compare)
440
441struct system_info *si;
442struct process_select *sel;
443int (*compare)();
444
445{
446    register int i;
447    register int total_procs;
448    register int active_procs;
449    register struct kinfo_proc **prefp;
450    register struct kinfo_proc *pp;
451
452    /* these are copied out of sel for speed */
453    int show_idle;
454    int show_system;
455    int show_uid;
456    int show_command;
457
458
459    pbase = kvm_getprocs(kd, KERN_PROC_ALL, 0, &nproc);
460    if (nproc > onproc)
461	pref = (struct kinfo_proc **) realloc(pref, sizeof(struct kinfo_proc *)
462		* (onproc = nproc));
463    if (pref == NULL || pbase == NULL) {
464	(void) fprintf(stderr, "top: Out of memory.\n");
465	quit(23);
466    }
467    /* get a pointer to the states summary array */
468    si->procstates = process_states;
469
470    /* set up flags which define what we are going to select */
471    show_idle = sel->idle;
472    show_system = sel->system;
473    show_uid = sel->uid != -1;
474    show_command = sel->command != NULL;
475
476    /* count up process states and get pointers to interesting procs */
477    total_procs = 0;
478    active_procs = 0;
479    memset((char *)process_states, 0, sizeof(process_states));
480    prefp = pref;
481    for (pp = pbase, i = 0; i < nproc; pp++, i++)
482    {
483	/*
484	 *  Place pointers to each valid proc structure in pref[].
485	 *  Process slots that are actually in use have a non-zero
486	 *  status field.  Processes with P_SYSTEM set are system
487	 *  processes---these get ignored unless show_sysprocs is set.
488	 */
489	if (PP(pp, p_stat) != 0 &&
490	    (show_system || ((PP(pp, p_flag) & P_SYSTEM) == 0)))
491	{
492	    total_procs++;
493	    process_states[(unsigned char) PP(pp, p_stat)]++;
494	    if ((PP(pp, p_stat) != SZOMB) &&
495		(show_idle || (PP(pp, p_pctcpu) != 0) ||
496		 (PP(pp, p_stat) == SRUN)) &&
497		(!show_uid || EP(pp, e_pcred.p_ruid) == (uid_t)sel->uid))
498	    {
499		*prefp++ = pp;
500		active_procs++;
501	    }
502	}
503    }
504
505    /* if requested, sort the "interesting" processes */
506    if (compare != NULL)
507    {
508	qsort((char *)pref, active_procs, sizeof(struct kinfo_proc *), compare);
509    }
510
511    /* remember active and total counts */
512    si->p_total = total_procs;
513    si->p_active = pref_len = active_procs;
514
515    /* pass back a handle */
516    handle.next_proc = pref;
517    handle.remaining = active_procs;
518    return((caddr_t)&handle);
519}
520
521char fmt[128];		/* static area where result is built */
522
523char *format_next_process(handle, get_userid)
524
525caddr_t handle;
526char *(*get_userid)();
527
528{
529    register struct kinfo_proc *pp;
530    register long cputime;
531    register double pct;
532    struct handle *hp;
533    char status[16];
534
535    /* find and remember the next proc structure */
536    hp = (struct handle *)handle;
537    pp = *(hp->next_proc++);
538    hp->remaining--;
539
540
541    /* get the process's user struct and set cputime */
542    if ((PP(pp, p_flag) & P_INMEM) == 0) {
543	/*
544	 * Print swapped processes as <pname>
545	 */
546	char *comm = PP(pp, p_comm);
547#define COMSIZ sizeof(PP(pp, p_comm))
548	char buf[COMSIZ];
549	(void) strncpy(buf, comm, COMSIZ);
550	comm[0] = '<';
551	(void) strncpy(&comm[1], buf, COMSIZ - 2);
552	comm[COMSIZ - 2] = '\0';
553	(void) strncat(comm, ">", COMSIZ - 1);
554	comm[COMSIZ - 1] = '\0';
555    }
556
557#if 0
558    /* This does not produce the correct results */
559    cputime = PP(pp, p_uticks) + PP(pp, p_sticks) + PP(pp, p_iticks);
560#endif
561    cputime = PP(pp, p_rtime).tv_sec;	/* This does not count interrupts */
562
563    /* calculate the base for cpu percentages */
564    pct = pctdouble(PP(pp, p_pctcpu));
565
566    /* generate "STATE" field */
567    switch (PP(pp, p_stat)) {
568	case SRUN:
569	    if (smpmode && PP(pp, p_oncpu) >= 0)
570		sprintf(status, "CPU%d", PP(pp, p_oncpu));
571	    else
572		strcpy(status, "RUN");
573	    break;
574	case SSLEEP:
575	    if (PP(pp, p_wmesg) != NULL) {
576		sprintf(status, "%.6s", EP(pp, e_wmesg));
577		break;
578	    }
579	    /* fall through */
580	default:
581	    sprintf(status, "%.6s", state_abbrev[(unsigned char) PP(pp, p_stat)]);
582	    break;
583    }
584
585    /* format this entry */
586    sprintf(fmt,
587	    smpmode ? smp_Proc_format : up_Proc_format,
588	    PP(pp, p_pid),
589	    (*get_userid)(EP(pp, e_pcred.p_ruid)),
590	    PP(pp, p_priority) - PZERO,
591	    PP(pp, p_nice) - NZERO,
592	    format_k2(pagetok(PROCSIZE(pp))),
593	    format_k2(pagetok(VP(pp, vm_rssize))),
594	    status,
595	    smpmode ? PP(pp, p_lastcpu) : 0,
596	    format_time(cputime),
597	    10000.0 * weighted_cpu(pct, pp) / hz,
598	    10000.0 * pct / hz,
599	    printable(PP(pp, p_comm)));
600
601    /* return the result */
602    return(fmt);
603}
604
605
606/*
607 * check_nlist(nlst) - checks the nlist to see if any symbols were not
608 *		found.  For every symbol that was not found, a one-line
609 *		message is printed to stderr.  The routine returns the
610 *		number of symbols NOT found.
611 */
612
613static int check_nlist(nlst)
614
615register struct nlist *nlst;
616
617{
618    register int i;
619
620    /* check to see if we got ALL the symbols we requested */
621    /* this will write one line to stderr for every symbol not found */
622
623    i = 0;
624    while (nlst->n_name != NULL)
625    {
626	if (nlst->n_type == 0)
627	{
628	    /* this one wasn't found */
629	    (void) fprintf(stderr, "kernel: no symbol named `%s'\n",
630			   nlst->n_name);
631	    i = 1;
632	}
633	nlst++;
634    }
635
636    return(i);
637}
638
639
640/*
641 *  getkval(offset, ptr, size, refstr) - get a value out of the kernel.
642 *	"offset" is the byte offset into the kernel for the desired value,
643 *  	"ptr" points to a buffer into which the value is retrieved,
644 *  	"size" is the size of the buffer (and the object to retrieve),
645 *  	"refstr" is a reference string used when printing error meessages,
646 *	    if "refstr" starts with a '!', then a failure on read will not
647 *  	    be fatal (this may seem like a silly way to do things, but I
648 *  	    really didn't want the overhead of another argument).
649 *
650 */
651
652static int getkval(offset, ptr, size, refstr)
653
654unsigned long offset;
655int *ptr;
656int size;
657char *refstr;
658
659{
660    if (kvm_read(kd, offset, (char *) ptr, size) != size)
661    {
662	if (*refstr == '!')
663	{
664	    return(0);
665	}
666	else
667	{
668	    fprintf(stderr, "top: kvm_read for %s: %s\n",
669		refstr, strerror(errno));
670	    quit(23);
671	}
672    }
673    return(1);
674}
675
676/* comparison routine for qsort */
677
678/*
679 *  proc_compare - comparison function for "qsort"
680 *	Compares the resource consumption of two processes using five
681 *  	distinct keys.  The keys (in descending order of importance) are:
682 *  	percent cpu, cpu ticks, state, resident set size, total virtual
683 *  	memory usage.  The process states are ordered as follows (from least
684 *  	to most important):  WAIT, zombie, sleep, stop, start, run.  The
685 *  	array declaration below maps a process state index into a number
686 *  	that reflects this ordering.
687 */
688
689static unsigned char sorted_state[] =
690{
691    0,	/* not used		*/
692    3,	/* sleep		*/
693    1,	/* ABANDONED (WAIT)	*/
694    6,	/* run			*/
695    5,	/* start		*/
696    2,	/* zombie		*/
697    4	/* stop			*/
698};
699
700int
701proc_compare(pp1, pp2)
702
703struct proc **pp1;
704struct proc **pp2;
705
706{
707    register struct kinfo_proc *p1;
708    register struct kinfo_proc *p2;
709    register int result;
710    register pctcpu lresult;
711
712    /* remove one level of indirection */
713    p1 = *(struct kinfo_proc **) pp1;
714    p2 = *(struct kinfo_proc **) pp2;
715
716    /* compare percent cpu (pctcpu) */
717    if ((lresult = PP(p2, p_pctcpu) - PP(p1, p_pctcpu)) == 0)
718    {
719	/* use cpticks to break the tie */
720	if ((result = PP(p2, p_cpticks) - PP(p1, p_cpticks)) == 0)
721	{
722	    /* use process state to break the tie */
723	    if ((result = sorted_state[(unsigned char) PP(p2, p_stat)] -
724			  sorted_state[(unsigned char) PP(p1, p_stat)])  == 0)
725	    {
726		/* use priority to break the tie */
727		if ((result = PP(p2, p_priority) - PP(p1, p_priority)) == 0)
728		{
729		    /* use resident set size (rssize) to break the tie */
730		    if ((result = VP(p2, vm_rssize) - VP(p1, vm_rssize)) == 0)
731		    {
732			/* use total memory to break the tie */
733			result = PROCSIZE(p2) - PROCSIZE(p1);
734		    }
735		}
736	    }
737	}
738    }
739    else
740    {
741	result = lresult < 0 ? -1 : 1;
742    }
743
744    return(result);
745}
746
747
748/*
749 * proc_owner(pid) - returns the uid that owns process "pid", or -1 if
750 *		the process does not exist.
751 *		It is EXTREMLY IMPORTANT that this function work correctly.
752 *		If top runs setuid root (as in SVR4), then this function
753 *		is the only thing that stands in the way of a serious
754 *		security problem.  It validates requests for the "kill"
755 *		and "renice" commands.
756 */
757
758int proc_owner(pid)
759
760int pid;
761
762{
763    register int cnt;
764    register struct kinfo_proc **prefp;
765    register struct kinfo_proc *pp;
766
767    prefp = pref;
768    cnt = pref_len;
769    while (--cnt >= 0)
770    {
771	pp = *prefp++;
772	if (PP(pp, p_pid) == (pid_t)pid)
773	{
774	    return((int)EP(pp, e_pcred.p_ruid));
775	}
776    }
777    return(-1);
778}
779
780
781/*
782 * swapmode is based on a program called swapinfo written
783 * by Kevin Lahey <kml@rokkaku.atl.ga.us>.
784 */
785
786#define	SVAR(var) __STRING(var)	/* to force expansion */
787#define	KGET(idx, var)							\
788	KGET1(idx, &var, sizeof(var), SVAR(var))
789#define	KGET1(idx, p, s, msg)						\
790	KGET2(nlst[idx].n_value, p, s, msg)
791#define	KGET2(addr, p, s, msg)						\
792	if (kvm_read(kd, (u_long)(addr), p, s) != s) {		        \
793		warnx("cannot read %s: %s", msg, kvm_geterr(kd));       \
794		return (0);                                             \
795       }
796#define	KGETRET(addr, p, s, msg)					\
797	if (kvm_read(kd, (u_long)(addr), p, s) != s) {			\
798		warnx("cannot read %s: %s", msg, kvm_geterr(kd));	\
799		return (0);						\
800	}
801
802
803int
804swapmode(retavail, retfree)
805	int *retavail;
806	int *retfree;
807{
808	char *header;
809	int hlen, nswap, nswdev, dmmax;
810	int i, div, avail, nfree, npfree, used;
811	struct swdevt *sw;
812	long blocksize, *perdev;
813	u_long ptr;
814	struct rlist head;
815#if __FreeBSD_version >= 220000
816	struct rlisthdr swaplist;
817#else
818	struct rlist *swaplist;
819#endif
820	struct rlist *swapptr;
821
822	/*
823	 * Counter for error messages. If we reach the limit,
824	 * stop reading information from swap devices and
825	 * return zero. This prevent endless 'bad address'
826	 * messages.
827	 */
828	static warning = 10;
829
830	if (warning <= 0) {
831	    /* a single warning */
832	    if (!warning) {
833		warning--;
834		fprintf(stderr,
835			"Too much errors, stop reading swap devices ...\n");
836		(void)sleep(3);
837	    }
838	    return(0);
839	}
840	warning--; /* decrease counter, see end of function */
841
842	KGET(VM_NSWAP, nswap);
843	if (!nswap) {
844		fprintf(stderr, "No swap space available\n");
845		return(0);
846	}
847
848	KGET(VM_NSWDEV, nswdev);
849	KGET(VM_DMMAX, dmmax);
850	KGET1(VM_SWAPLIST, &swaplist, sizeof(swaplist), "swaplist");
851	if ((sw = (struct swdevt *)malloc(nswdev * sizeof(*sw))) == NULL ||
852	    (perdev = (long *)malloc(nswdev * sizeof(*perdev))) == NULL)
853		err(1, "malloc");
854	KGET1(VM_SWDEVT, &ptr, sizeof ptr, "swdevt");
855	KGET2(ptr, sw, nswdev * sizeof(*sw), "*swdevt");
856
857	/* Count up swap space. */
858	nfree = 0;
859	memset(perdev, 0, nswdev * sizeof(*perdev));
860#if  __FreeBSD_version >= 220000
861	swapptr = swaplist.rlh_list;
862	while (swapptr) {
863#else
864	while (swaplist) {
865#endif
866		int	top, bottom, next_block;
867#if  __FreeBSD_version >= 220000
868		KGET2(swapptr, &head, sizeof(struct rlist), "swapptr");
869#else
870		KGET2(swaplist, &head, sizeof(struct rlist), "swaplist");
871#endif
872
873		top = head.rl_end;
874		bottom = head.rl_start;
875
876		nfree += top - bottom + 1;
877
878		/*
879		 * Swap space is split up among the configured disks.
880		 *
881		 * For interleaved swap devices, the first dmmax blocks
882		 * of swap space some from the first disk, the next dmmax
883		 * blocks from the next, and so on up to nswap blocks.
884		 *
885		 * The list of free space joins adjacent free blocks,
886		 * ignoring device boundries.  If we want to keep track
887		 * of this information per device, we'll just have to
888		 * extract it ourselves.
889		 */
890		while (top / dmmax != bottom / dmmax) {
891			next_block = ((bottom + dmmax) / dmmax);
892			perdev[(bottom / dmmax) % nswdev] +=
893				next_block * dmmax - bottom;
894			bottom = next_block * dmmax;
895		}
896		perdev[(bottom / dmmax) % nswdev] +=
897			top - bottom + 1;
898
899#if  __FreeBSD_version >= 220000
900		swapptr = head.rl_next;
901#else
902		swaplist = head.rl_next;
903#endif
904	}
905
906	header = getbsize(&hlen, &blocksize);
907	div = blocksize / 512;
908	avail = npfree = 0;
909	for (i = 0; i < nswdev; i++) {
910		int xsize, xfree;
911
912		/*
913		 * Don't report statistics for partitions which have not
914		 * yet been activated via swapon(8).
915		 */
916
917		xsize = sw[i].sw_nblks;
918		xfree = perdev[i];
919		used = xsize - xfree;
920		npfree++;
921		avail += xsize;
922	}
923
924	/*
925	 * If only one partition has been set up via swapon(8), we don't
926	 * need to bother with totals.
927	 */
928	*retavail = avail / 2;
929	*retfree = nfree / 2;
930	used = avail - nfree;
931	free(sw); free(perdev);
932
933	/* increase counter, no errors occurs */
934	warning++;
935
936	return  (int)(((double)used / (double)avail * 100.0) + 0.5);
937}
938