1/*
2 * arch/s390/appldata/appldata_os.c
3 *
4 * Data gathering module for Linux-VM Monitor Stream, Stage 1.
5 * Collects misc. OS related data (CPU utilization, running processes).
6 *
7 * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH.
8 *
9 * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
10 */
11
12#include <linux/module.h>
13#include <linux/init.h>
14#include <linux/slab.h>
15#include <linux/errno.h>
16#include <linux/kernel_stat.h>
17#include <linux/netdevice.h>
18#include <linux/sched.h>
19#include <asm/appldata.h>
20#include <asm/smp.h>
21
22#include "appldata.h"
23
24
25#define MY_PRINT_NAME	"appldata_os"		/* for debug messages, etc. */
26#define LOAD_INT(x) ((x) >> FSHIFT)
27#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
28
29/*
30 * OS data
31 *
32 * This is accessed as binary data by z/VM. If changes to it can't be avoided,
33 * the structure version (product ID, see appldata_base.c) needs to be changed
34 * as well and all documentation and z/VM applications using it must be
35 * updated.
36 *
37 * The record layout is documented in the Linux for zSeries Device Drivers
38 * book:
39 * http://oss.software.ibm.com/developerworks/opensource/linux390/index.shtml
40 */
41struct appldata_os_per_cpu {
42	u32 per_cpu_user;	/* timer ticks spent in user mode   */
43	u32 per_cpu_nice;	/* ... spent with modified priority */
44	u32 per_cpu_system;	/* ... spent in kernel mode         */
45	u32 per_cpu_idle;	/* ... spent in idle mode           */
46
47	/* New in 2.6 */
48	u32 per_cpu_irq;	/* ... spent in interrupts          */
49	u32 per_cpu_softirq;	/* ... spent in softirqs            */
50	u32 per_cpu_iowait;	/* ... spent while waiting for I/O  */
51
52	/* New in modification level 01 */
53	u32 per_cpu_steal;	/* ... stolen by hypervisor	    */
54	u32 cpu_id;		/* number of this CPU		    */
55} __attribute__((packed));
56
57struct appldata_os_data {
58	u64 timestamp;
59	u32 sync_count_1;	/* after VM collected the record data, */
60	u32 sync_count_2;	/* sync_count_1 and sync_count_2 should be the
61				   same. If not, the record has been updated on
62				   the Linux side while VM was collecting the
63				   (possibly corrupt) data */
64
65	u32 nr_cpus;		/* number of (virtual) CPUs        */
66	u32 per_cpu_size;	/* size of the per-cpu data struct */
67	u32 cpu_offset;		/* offset of the first per-cpu data struct */
68
69	u32 nr_running;		/* number of runnable threads      */
70	u32 nr_threads;		/* number of threads               */
71	u32 avenrun[3];		/* average nr. of running processes during */
72				/* the last 1, 5 and 15 minutes */
73
74	/* New in 2.6 */
75	u32 nr_iowait;		/* number of blocked threads
76				   (waiting for I/O)               */
77
78	/* per cpu data */
79	struct appldata_os_per_cpu os_cpu[0];
80} __attribute__((packed));
81
82static struct appldata_os_data *appldata_os_data;
83
84static struct appldata_ops ops = {
85	.ctl_nr    = CTL_APPLDATA_OS,
86	.name	   = "os",
87	.record_nr = APPLDATA_RECORD_OS_ID,
88	.owner	   = THIS_MODULE,
89	.mod_lvl   = {0xF0, 0xF1},		/* EBCDIC "01" */
90};
91
92
93static inline void appldata_print_debug(struct appldata_os_data *os_data)
94{
95	int a0, a1, a2, i;
96
97	P_DEBUG("--- OS - RECORD ---\n");
98	P_DEBUG("nr_threads   = %u\n", os_data->nr_threads);
99	P_DEBUG("nr_running   = %u\n", os_data->nr_running);
100	P_DEBUG("nr_iowait    = %u\n", os_data->nr_iowait);
101	P_DEBUG("avenrun(int) = %8x / %8x / %8x\n", os_data->avenrun[0],
102		os_data->avenrun[1], os_data->avenrun[2]);
103	a0 = os_data->avenrun[0];
104	a1 = os_data->avenrun[1];
105	a2 = os_data->avenrun[2];
106	P_DEBUG("avenrun(float) = %d.%02d / %d.%02d / %d.%02d\n",
107		LOAD_INT(a0), LOAD_FRAC(a0), LOAD_INT(a1), LOAD_FRAC(a1),
108		LOAD_INT(a2), LOAD_FRAC(a2));
109
110	P_DEBUG("nr_cpus = %u\n", os_data->nr_cpus);
111	for (i = 0; i < os_data->nr_cpus; i++) {
112		P_DEBUG("cpu%u : user = %u, nice = %u, system = %u, "
113			"idle = %u, irq = %u, softirq = %u, iowait = %u, "
114			"steal = %u\n",
115				os_data->os_cpu[i].cpu_id,
116				os_data->os_cpu[i].per_cpu_user,
117				os_data->os_cpu[i].per_cpu_nice,
118				os_data->os_cpu[i].per_cpu_system,
119				os_data->os_cpu[i].per_cpu_idle,
120				os_data->os_cpu[i].per_cpu_irq,
121				os_data->os_cpu[i].per_cpu_softirq,
122				os_data->os_cpu[i].per_cpu_iowait,
123				os_data->os_cpu[i].per_cpu_steal);
124	}
125
126	P_DEBUG("sync_count_1 = %u\n", os_data->sync_count_1);
127	P_DEBUG("sync_count_2 = %u\n", os_data->sync_count_2);
128	P_DEBUG("timestamp    = %lX\n", os_data->timestamp);
129}
130
131/*
132 * appldata_get_os_data()
133 *
134 * gather OS data
135 */
136static void appldata_get_os_data(void *data)
137{
138	int i, j, rc;
139	struct appldata_os_data *os_data;
140	unsigned int new_size;
141
142	os_data = data;
143	os_data->sync_count_1++;
144
145	os_data->nr_threads = nr_threads;
146	os_data->nr_running = nr_running();
147	os_data->nr_iowait  = nr_iowait();
148	os_data->avenrun[0] = avenrun[0] + (FIXED_1/200);
149	os_data->avenrun[1] = avenrun[1] + (FIXED_1/200);
150	os_data->avenrun[2] = avenrun[2] + (FIXED_1/200);
151
152	j = 0;
153	for_each_online_cpu(i) {
154		os_data->os_cpu[j].per_cpu_user =
155			cputime_to_jiffies(kstat_cpu(i).cpustat.user);
156		os_data->os_cpu[j].per_cpu_nice =
157			cputime_to_jiffies(kstat_cpu(i).cpustat.nice);
158		os_data->os_cpu[j].per_cpu_system =
159			cputime_to_jiffies(kstat_cpu(i).cpustat.system);
160		os_data->os_cpu[j].per_cpu_idle =
161			cputime_to_jiffies(kstat_cpu(i).cpustat.idle);
162		os_data->os_cpu[j].per_cpu_irq =
163			cputime_to_jiffies(kstat_cpu(i).cpustat.irq);
164		os_data->os_cpu[j].per_cpu_softirq =
165			cputime_to_jiffies(kstat_cpu(i).cpustat.softirq);
166		os_data->os_cpu[j].per_cpu_iowait =
167			cputime_to_jiffies(kstat_cpu(i).cpustat.iowait);
168		os_data->os_cpu[j].per_cpu_steal =
169			cputime_to_jiffies(kstat_cpu(i).cpustat.steal);
170		os_data->os_cpu[j].cpu_id = i;
171		j++;
172	}
173
174	os_data->nr_cpus = j;
175
176	new_size = sizeof(struct appldata_os_data) +
177		   (os_data->nr_cpus * sizeof(struct appldata_os_per_cpu));
178	if (ops.size != new_size) {
179		if (ops.active) {
180			rc = appldata_diag(APPLDATA_RECORD_OS_ID,
181					   APPLDATA_START_INTERVAL_REC,
182					   (unsigned long) ops.data, new_size,
183					   ops.mod_lvl);
184			if (rc != 0) {
185				P_ERROR("os: START NEW DIAG 0xDC failed, "
186					"return code: %d, new size = %i\n", rc,
187					new_size);
188				P_INFO("os: stopping old record now\n");
189			} else
190				P_INFO("os: new record size = %i\n", new_size);
191
192			rc = appldata_diag(APPLDATA_RECORD_OS_ID,
193					   APPLDATA_STOP_REC,
194					   (unsigned long) ops.data, ops.size,
195					   ops.mod_lvl);
196			if (rc != 0)
197				P_ERROR("os: STOP OLD DIAG 0xDC failed, "
198					"return code: %d, old size = %i\n", rc,
199					ops.size);
200			else
201				P_INFO("os: old record size = %i stopped\n",
202					ops.size);
203		}
204		ops.size = new_size;
205	}
206	os_data->timestamp = get_clock();
207	os_data->sync_count_2++;
208#ifdef APPLDATA_DEBUG
209	appldata_print_debug(os_data);
210#endif
211}
212
213
214/*
215 * appldata_os_init()
216 *
217 * init data, register ops
218 */
219static int __init appldata_os_init(void)
220{
221	int rc, max_size;
222
223	max_size = sizeof(struct appldata_os_data) +
224		   (NR_CPUS * sizeof(struct appldata_os_per_cpu));
225	if (max_size > APPLDATA_MAX_REC_SIZE) {
226		P_ERROR("Max. size of OS record = %i, bigger than maximum "
227			"record size (%i)\n", max_size, APPLDATA_MAX_REC_SIZE);
228		rc = -ENOMEM;
229		goto out;
230	}
231	P_DEBUG("max. sizeof(os) = %i, sizeof(os_cpu) = %lu\n", max_size,
232		sizeof(struct appldata_os_per_cpu));
233
234	appldata_os_data = kzalloc(max_size, GFP_DMA);
235	if (appldata_os_data == NULL) {
236		P_ERROR("No memory for %s!\n", ops.name);
237		rc = -ENOMEM;
238		goto out;
239	}
240
241	appldata_os_data->per_cpu_size = sizeof(struct appldata_os_per_cpu);
242	appldata_os_data->cpu_offset   = offsetof(struct appldata_os_data,
243							os_cpu);
244	P_DEBUG("cpu offset = %u\n", appldata_os_data->cpu_offset);
245
246	ops.data = appldata_os_data;
247	ops.callback  = &appldata_get_os_data;
248	rc = appldata_register_ops(&ops);
249	if (rc != 0) {
250		P_ERROR("Error registering ops, rc = %i\n", rc);
251		kfree(appldata_os_data);
252	} else {
253		P_DEBUG("%s-ops registered!\n", ops.name);
254	}
255out:
256	return rc;
257}
258
259/*
260 * appldata_os_exit()
261 *
262 * unregister ops
263 */
264static void __exit appldata_os_exit(void)
265{
266	appldata_unregister_ops(&ops);
267	kfree(appldata_os_data);
268	P_DEBUG("%s-ops unregistered!\n", ops.name);
269}
270
271
272module_init(appldata_os_init);
273module_exit(appldata_os_exit);
274
275MODULE_LICENSE("GPL");
276MODULE_AUTHOR("Gerald Schaefer");
277MODULE_DESCRIPTION("Linux-VM Monitor Stream, OS statistics");
278