hostres_processor_tbl.c revision 160341
1/*-
2 * Copyright (c) 2005-2006 The FreeBSD Project
3 * All rights reserved.
4 *
5 * Author: Victor Cruceru <soc-victor@freebsd.org>
6 *
7 * Redistribution of this software and documentation and use in source and
8 * binary forms, with or without modification, are permitted provided that
9 * the following conditions are met:
10 *
11 * 1. Redistributions of source code or documentation must retain the above
12 *    copyright notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * $FreeBSD: head/usr.sbin/bsnmpd/modules/snmp_hostres/hostres_processor_tbl.c 160341 2006-07-14 09:07:56Z harti $
30 */
31
32/*
33 * Host Resources MIB for SNMPd. Implementation for hrProcessorTable
34 */
35
36#include <sys/param.h>
37#include <sys/sysctl.h>
38#include <sys/user.h>
39
40#include <assert.h>
41#include <math.h>
42#include <stdlib.h>
43#include <string.h>
44#include <syslog.h>
45
46#include "hostres_snmp.h"
47#include "hostres_oid.h"
48#include "hostres_tree.h"
49
50/*
51 * This structure is used to hold a SNMP table entry
52 * for HOST-RESOURCES-MIB's hrProcessorTable.
53 * Note that index is external being allocated & maintained
54 * by the hrDeviceTable code..
55 */
56struct processor_entry {
57	int32_t		index;
58	const struct asn_oid *frwId;
59	int32_t		load;
60	TAILQ_ENTRY(processor_entry) link;
61	u_char		cpu_no;		/* which cpu, counted from 0 */
62	pid_t		idle_pid;	/* PID of idle process for this CPU */
63
64	/* the samples from the last minute, as required by MIB */
65	double		samples[MAX_CPU_SAMPLES];
66
67	/* current sample to fill in next time, must be < MAX_CPU_SAMPLES */
68	uint32_t	cur_sample_idx;
69
70	/* number of useful samples */
71	uint32_t	sample_cnt;
72};
73TAILQ_HEAD(processor_tbl, processor_entry);
74
75/* the head of the list with hrDeviceTable's entries */
76static struct processor_tbl processor_tbl =
77    TAILQ_HEAD_INITIALIZER(processor_tbl);
78
79/* number of processors in dev tbl */
80static int32_t detected_processor_count;
81
82/* sysctlbyname(hw.ncpu) */
83static int hw_ncpu;
84
85/* sysctlbyname(kern.{ccpu,fscale}) */
86static fixpt_t ccpu;
87static int fscale;
88
89/* tick of PDU where we have refreshed the processor table last */
90static uint64_t proctbl_tick;
91
92/* periodic timer used to get cpu load stats */
93static void *cpus_load_timer;
94
95/*
96 * Average the samples. The entire algorithm seems to be wrong XXX.
97 */
98static int
99get_avg_load(struct processor_entry *e)
100{
101	u_int i;
102	double sum = 0.0;
103
104	assert(e != NULL);
105
106	if (e->sample_cnt == 0)
107		return (0);
108
109	for (i = 0; i < e->sample_cnt; i++)
110		sum += e->samples[i];
111
112	return ((int)floor((double)sum/(double)e->sample_cnt));
113}
114
115/*
116 * Stolen from /usr/src/bin/ps/print.c. The idle process should never
117 * be swapped out :-)
118 */
119static double
120processor_getpcpu(struct kinfo_proc *ki_p)
121{
122
123	if (ccpu == 0 || fscale == 0)
124		return (0.0);
125
126#define	fxtofl(fixpt) ((double)(fixpt) / fscale)
127	return (100.0 * fxtofl(ki_p->ki_pctcpu) /
128	    (1.0 - exp(ki_p->ki_swtime * log(fxtofl(ccpu)))));
129}
130
131/**
132 * Save a new sample
133 */
134static void
135save_sample(struct processor_entry *e, struct kinfo_proc *kp)
136{
137
138	e->samples[e->cur_sample_idx] = 100.0 - processor_getpcpu(kp);
139	e->load = get_avg_load(e);
140	e->cur_sample_idx = (e->cur_sample_idx + 1) % MAX_CPU_SAMPLES;
141
142	if (++e->sample_cnt > MAX_CPU_SAMPLES)
143		e->sample_cnt = MAX_CPU_SAMPLES;
144}
145
146/**
147 * Create a new entry into the processor table.
148 */
149static struct processor_entry *
150proc_create_entry(u_int cpu_no, struct device_map_entry *map)
151{
152	struct device_entry *dev;
153	struct processor_entry *entry;
154	char name[128];
155
156	/*
157	 * If there is no map entry create one by creating a device table
158	 * entry.
159	 */
160	if (map == NULL) {
161		snprintf(name, sizeof(name), "cpu%u", cpu_no);
162		if ((dev = device_entry_create(name, "", "")) == NULL)
163			return (NULL);
164		dev->flags |= HR_DEVICE_IMMUTABLE;
165		STAILQ_FOREACH(map, &device_map, link)
166			if (strcmp(map->name_key, name) == 0)
167				break;
168		if (map == NULL)
169			abort();
170	}
171
172	if ((entry = malloc(sizeof(*entry))) == NULL) {
173		syslog(LOG_ERR, "hrProcessorTable: %s malloc "
174		    "failed: %m", __func__);
175		return (NULL);
176	}
177	memset(entry, 0, sizeof(*entry));
178
179	entry->index = map->hrIndex;
180	entry->load = 0;
181	entry->cpu_no = (u_char)cpu_no;
182	entry->idle_pid = 0;
183	entry->frwId = &oid_zeroDotZero; /* unknown id FIXME */
184
185	INSERT_OBJECT_INT(entry, &processor_tbl);
186
187	HRDBG("CPU %d added with SNMP index=%d",
188	    entry->cpu_no, entry->index);
189
190	return (entry);
191}
192
193/**
194 * Get the PIDs for the idle processes of the CPUs.
195 */
196static void
197processor_get_pids(void)
198{
199	struct kinfo_proc *plist, *kp;
200	int i;
201	int nproc;
202	int cpu;
203	int nchars;
204	struct processor_entry *entry;
205
206	plist = kvm_getprocs(hr_kd, KERN_PROC_ALL, 0, &nproc);
207	if (plist == NULL || nproc < 0) {
208		syslog(LOG_ERR, "hrProcessor: kvm_getprocs() failed: %m");
209		return;
210	}
211
212	for (i = 0, kp = plist; i < nproc; i++, kp++) {
213		if (!IS_KERNPROC(kp))
214			continue;
215
216		if (strcmp(kp->ki_comm, "idle") == 0) {
217			/* single processor system */
218			cpu = 0;
219		} else if (sscanf(kp->ki_comm, "idle: cpu%d%n", &cpu, &nchars)
220		    == 1 && (u_int)nchars == strlen(kp->ki_comm)) {
221			/* MP system */
222		} else
223			/* not an idle process */
224			continue;
225
226		HRDBG("'%s' proc with pid %d is on CPU #%d (last on #%d)",
227		    kp->ki_comm, kp->ki_pid, kp->ki_oncpu, kp->ki_lastcpu);
228
229		TAILQ_FOREACH(entry, &processor_tbl, link)
230			if (entry->cpu_no == kp->ki_lastcpu)
231				break;
232
233		if (entry == NULL) {
234			/* create entry on non-ACPI systems */
235			if ((entry = proc_create_entry(cpu, NULL)) == NULL)
236				continue;
237
238			detected_processor_count++;
239		}
240
241		entry->idle_pid = kp->ki_pid;
242		HRDBG("CPU no. %d with SNMP index=%d has idle PID %d",
243		    entry->cpu_no, entry->index, entry->idle_pid);
244
245		save_sample(entry, kp);
246	}
247}
248
249/**
250 * Scan the device map table for CPUs and create an entry into the
251 * processor table for each CPU. Then fetch the idle PIDs for all CPUs.
252 */
253static void
254create_proc_table(void)
255{
256	struct device_map_entry *map;
257	struct processor_entry *entry;
258	int cpu_no;
259
260	detected_processor_count = 0;
261
262	/*
263	 * Because hrProcessorTable depends on hrDeviceTable,
264	 * the device detection must be performed at this point.
265	 * If not, no entries will be present in the hrProcessor Table.
266	 *
267	 * For non-ACPI system the processors are not in the device table,
268	 * therefor insert them when getting the idle pids. XXX
269	 */
270	STAILQ_FOREACH(map, &device_map, link)
271		if (strncmp(map->name_key, "cpu", strlen("cpu")) == 0 &&
272		    strstr(map->location_key, ".CPU") != NULL) {
273			if (sscanf(map->name_key,"cpu%d", &cpu_no) != 1) {
274				syslog(LOG_ERR, "hrProcessorTable: Failed to "
275				    "get cpu no. from device named '%s'",
276				    map->name_key);
277				continue;
278			}
279
280			if ((entry = proc_create_entry(cpu_no, map)) == NULL)
281				continue;
282
283			detected_processor_count++;
284		}
285
286	HRDBG("%d CPUs detected", detected_processor_count);
287
288	processor_get_pids();
289}
290
291/**
292 * Free the processor table
293 */
294static void
295free_proc_table(void)
296{
297	struct processor_entry *n1;
298
299	while ((n1 = TAILQ_FIRST(&processor_tbl)) != NULL) {
300		TAILQ_REMOVE(&processor_tbl, n1, link);
301		free(n1);
302		detected_processor_count--;
303	}
304
305	assert(detected_processor_count == 0);
306	detected_processor_count = 0;
307}
308
309/**
310 * Init the things for hrProcessorTable.
311 * Scan the device table for processor entries.
312 */
313void
314init_processor_tbl(void)
315{
316	size_t len;
317
318	/* get various parameters from the kernel */
319	len = sizeof(ccpu);
320	if (sysctlbyname("kern.ccpu", &ccpu, &len, NULL, 0) == -1) {
321		syslog(LOG_ERR, "hrProcessorTable: sysctl(kern.ccpu) failed");
322		ccpu = 0;
323	}
324
325	len = sizeof(fscale);
326	if (sysctlbyname("kern.fscale", &fscale, &len, NULL, 0) == -1) {
327		syslog(LOG_ERR, "hrProcessorTable: sysctl(kern.fscale) failed");
328		fscale = 0;
329	}
330
331	/* create the initial processor table */
332	create_proc_table();
333}
334
335/**
336 * Finalization routine for hrProcessorTable.
337 * It destroys the lists and frees any allocated heap memory.
338 */
339void
340fini_processor_tbl(void)
341{
342
343	if (cpus_load_timer != NULL) {
344		timer_stop(cpus_load_timer);
345		cpus_load_timer = NULL;
346	}
347
348	free_proc_table();
349}
350
351/**
352 * Make sure that the number of processors announced by the kernel hw.ncpu
353 * is equal to the number of processors we have found in the device table.
354 * If they differ rescan the device table.
355 */
356static void
357processor_refill_tbl(void)
358{
359
360	HRDBG("hw_ncpu=%d detected_processor_count=%d", hw_ncpu,
361	    detected_processor_count);
362
363	if (hw_ncpu <= 0) {
364		size_t size = sizeof(hw_ncpu);
365
366		if (sysctlbyname("hw.ncpu", &hw_ncpu, &size, NULL, 0) == -1 ||
367		    size != sizeof(hw_ncpu)) {
368			syslog(LOG_ERR, "hrProcessorTable: "
369			    "sysctl(hw.ncpu) failed: %m");
370			hw_ncpu = 0;
371			return;
372		}
373	}
374
375	if (hw_ncpu != detected_processor_count) {
376		free_proc_table();
377		create_proc_table();
378	}
379}
380
381/**
382 * Refresh all values in the processor table. We call this once for
383 * every PDU that accesses the table.
384 */
385static void
386refresh_processor_tbl(void)
387{
388	struct processor_entry *entry;
389	int need_pids;
390	struct kinfo_proc *plist;
391	int nproc;
392
393	processor_refill_tbl();
394
395	need_pids = 0;
396	TAILQ_FOREACH(entry, &processor_tbl, link) {
397		if (entry->idle_pid <= 0) {
398			need_pids = 1;
399			continue;
400		}
401
402		assert(hr_kd != NULL);
403
404		plist = kvm_getprocs(hr_kd, KERN_PROC_PID,
405		    entry->idle_pid, &nproc);
406		if (plist == NULL || nproc != 1) {
407			syslog(LOG_ERR, "%s: missing item with "
408			    "PID = %d for CPU #%d\n ", __func__,
409			    entry->idle_pid, entry->cpu_no);
410			need_pids = 1;
411			continue;
412		}
413		save_sample(entry, plist);
414	}
415
416	if (need_pids == 1)
417		processor_get_pids();
418
419	proctbl_tick = this_tick;
420}
421
422/**
423 * This function is called MAX_CPU_SAMPLES times per minute to collect the
424 * CPU load.
425 */
426static void
427get_cpus_samples(void *arg __unused)
428{
429
430	HRDBG("[%llu] ENTER", (unsigned long long)get_ticks());
431	refresh_processor_tbl();
432	HRDBG("[%llu] EXIT", (unsigned long long)get_ticks());
433}
434
435/**
436 * Called to start this table. We need to start the periodic idle
437 * time collection.
438 */
439void
440start_processor_tbl(struct lmodule *mod)
441{
442
443	/*
444	 * Start the cpu stats collector
445	 * The semantics of timer_start parameters is in "SNMP ticks";
446	 * we have 100 "SNMP ticks" per second, thus we are trying below
447	 * to get MAX_CPU_SAMPLES per minute
448	 */
449	cpus_load_timer = timer_start_repeat(100, 100 * 60 / MAX_CPU_SAMPLES,
450	    get_cpus_samples, NULL, mod);
451}
452
453/**
454 * Access routine for the processor table.
455 */
456int
457op_hrProcessorTable(struct snmp_context *ctx __unused,
458    struct snmp_value *value, u_int sub, u_int iidx __unused,
459    enum snmp_op curr_op)
460{
461	struct processor_entry *entry;
462
463	if (this_tick != proctbl_tick)
464		refresh_processor_tbl();
465
466	switch (curr_op) {
467
468	case SNMP_OP_GETNEXT:
469		if ((entry = NEXT_OBJECT_INT(&processor_tbl,
470		    &value->var, sub)) == NULL)
471			return (SNMP_ERR_NOSUCHNAME);
472		value->var.len = sub + 1;
473		value->var.subs[sub] = entry->index;
474		goto get;
475
476	case SNMP_OP_GET:
477		if ((entry = FIND_OBJECT_INT(&processor_tbl,
478		    &value->var, sub)) == NULL)
479			return (SNMP_ERR_NOSUCHNAME);
480		goto get;
481
482	case SNMP_OP_SET:
483		if ((entry = FIND_OBJECT_INT(&processor_tbl,
484		    &value->var, sub)) == NULL)
485			return (SNMP_ERR_NO_CREATION);
486		return (SNMP_ERR_NOT_WRITEABLE);
487
488	case SNMP_OP_ROLLBACK:
489	case SNMP_OP_COMMIT:
490		abort();
491	}
492	abort();
493
494  get:
495	switch (value->var.subs[sub - 1]) {
496
497	case LEAF_hrProcessorFrwID:
498		assert(entry->frwId != NULL);
499		value->v.oid = *entry->frwId;
500		return (SNMP_ERR_NOERROR);
501
502	case LEAF_hrProcessorLoad:
503		value->v.integer = entry->load;
504		return (SNMP_ERR_NOERROR);
505	}
506	abort();
507}
508