1/*
2 * kernel/time/timer_stats.c
3 *
4 * Collect timer usage statistics.
5 *
6 * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
7 * Copyright(C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
8 *
9 * timer_stats is based on timer_top, a similar functionality which was part of
10 * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the
11 * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based
12 * on dynamic allocation of the statistics entries and linear search based
13 * lookup combined with a global lock, rather than the static array, hash
14 * and per-CPU locking which is used by timer_stats. It was written for the
15 * pre hrtimer kernel code and therefore did not take hrtimers into account.
16 * Nevertheless it provided the base for the timer_stats implementation and
17 * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks
18 * for this effort.
19 *
20 * timer_top.c is
21 *	Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus
22 *	Written by Daniel Petrini <d.pensator@gmail.com>
23 *	timer_top.c was released under the GNU General Public License version 2
24 *
25 * We export the addresses and counting of timer functions being called,
26 * the pid and cmdline from the owner process if applicable.
27 *
28 * Start/stop data collection:
29 * # echo 1[0] >/proc/timer_stats
30 *
31 * Display the information collected so far:
32 * # cat /proc/timer_stats
33 *
34 * This program is free software; you can redistribute it and/or modify
35 * it under the terms of the GNU General Public License version 2 as
36 * published by the Free Software Foundation.
37 */
38
39#include <linux/proc_fs.h>
40#include <linux/module.h>
41#include <linux/spinlock.h>
42#include <linux/sched.h>
43#include <linux/seq_file.h>
44#include <linux/kallsyms.h>
45
46#include <asm/uaccess.h>
47
48/*
49 * This is our basic unit of interest: a timer expiry event identified
50 * by the timer, its start/expire functions and the PID of the task that
51 * started the timer. We count the number of times an event happens:
52 */
53struct entry {
54	/*
55	 * Hash list:
56	 */
57	struct entry		*next;
58
59	/*
60	 * Hash keys:
61	 */
62	void			*timer;
63	void			*start_func;
64	void			*expire_func;
65	pid_t			pid;
66
67	/*
68	 * Number of timeout events:
69	 */
70	unsigned long		count;
71
72	/*
73	 * We save the command-line string to preserve
74	 * this information past task exit:
75	 */
76	char			comm[TASK_COMM_LEN + 1];
77
78} ____cacheline_aligned_in_smp;
79
80/*
81 * Spinlock protecting the tables - not taken during lookup:
82 */
83static DEFINE_SPINLOCK(table_lock);
84
85/*
86 * Per-CPU lookup locks for fast hash lookup:
87 */
88static DEFINE_PER_CPU(spinlock_t, lookup_lock);
89
90/*
91 * Mutex to serialize state changes with show-stats activities:
92 */
93static DEFINE_MUTEX(show_mutex);
94
95/*
96 * Collection status, active/inactive:
97 */
98static int __read_mostly active;
99
100/*
101 * Beginning/end timestamps of measurement:
102 */
103static ktime_t time_start, time_stop;
104
105/*
106 * tstat entry structs only get allocated while collection is
107 * active and never freed during that time - this simplifies
108 * things quite a bit.
109 *
110 * They get freed when a new collection period is started.
111 */
112#define MAX_ENTRIES_BITS	10
113#define MAX_ENTRIES		(1UL << MAX_ENTRIES_BITS)
114
115static unsigned long nr_entries;
116static struct entry entries[MAX_ENTRIES];
117
118static atomic_t overflow_count;
119
120/*
121 * The entries are in a hash-table, for fast lookup:
122 */
123#define TSTAT_HASH_BITS		(MAX_ENTRIES_BITS - 1)
124#define TSTAT_HASH_SIZE		(1UL << TSTAT_HASH_BITS)
125#define TSTAT_HASH_MASK		(TSTAT_HASH_SIZE - 1)
126
127#define __tstat_hashfn(entry)						\
128	(((unsigned long)(entry)->timer       ^				\
129	  (unsigned long)(entry)->start_func  ^				\
130	  (unsigned long)(entry)->expire_func ^				\
131	  (unsigned long)(entry)->pid		) & TSTAT_HASH_MASK)
132
133#define tstat_hashentry(entry)	(tstat_hash_table + __tstat_hashfn(entry))
134
135static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
136
137static void reset_entries(void)
138{
139	nr_entries = 0;
140	memset(entries, 0, sizeof(entries));
141	memset(tstat_hash_table, 0, sizeof(tstat_hash_table));
142	atomic_set(&overflow_count, 0);
143}
144
145static struct entry *alloc_entry(void)
146{
147	if (nr_entries >= MAX_ENTRIES)
148		return NULL;
149
150	return entries + nr_entries++;
151}
152
153static int match_entries(struct entry *entry1, struct entry *entry2)
154{
155	return entry1->timer       == entry2->timer	  &&
156	       entry1->start_func  == entry2->start_func  &&
157	       entry1->expire_func == entry2->expire_func &&
158	       entry1->pid	   == entry2->pid;
159}
160
161/*
162 * Look up whether an entry matching this item is present
163 * in the hash already. Must be called with irqs off and the
164 * lookup lock held:
165 */
166static struct entry *tstat_lookup(struct entry *entry, char *comm)
167{
168	struct entry **head, *curr, *prev;
169
170	head = tstat_hashentry(entry);
171	curr = *head;
172
173	/*
174	 * The fastpath is when the entry is already hashed,
175	 * we do this with the lookup lock held, but with the
176	 * table lock not held:
177	 */
178	while (curr) {
179		if (match_entries(curr, entry))
180			return curr;
181
182		curr = curr->next;
183	}
184	/*
185	 * Slowpath: allocate, set up and link a new hash entry:
186	 */
187	prev = NULL;
188	curr = *head;
189
190	spin_lock(&table_lock);
191	/*
192	 * Make sure we have not raced with another CPU:
193	 */
194	while (curr) {
195		if (match_entries(curr, entry))
196			goto out_unlock;
197
198		prev = curr;
199		curr = curr->next;
200	}
201
202	curr = alloc_entry();
203	if (curr) {
204		*curr = *entry;
205		curr->count = 0;
206		curr->next = NULL;
207		memcpy(curr->comm, comm, TASK_COMM_LEN);
208
209		smp_mb(); /* Ensure that curr is initialized before insert */
210
211		if (prev)
212			prev->next = curr;
213		else
214			*head = curr;
215	}
216 out_unlock:
217	spin_unlock(&table_lock);
218
219	return curr;
220}
221
222/**
223 * timer_stats_update_stats - Update the statistics for a timer.
224 * @timer:	pointer to either a timer_list or a hrtimer
225 * @pid:	the pid of the task which set up the timer
226 * @startf:	pointer to the function which did the timer setup
227 * @timerf:	pointer to the timer callback function of the timer
228 * @comm:	name of the process which set up the timer
229 *
230 * When the timer is already registered, then the event counter is
231 * incremented. Otherwise the timer is registered in a free slot.
232 */
233void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
234			      void *timerf, char * comm)
235{
236	/*
237	 * It doesnt matter which lock we take:
238	 */
239	spinlock_t *lock;
240	struct entry *entry, input;
241	unsigned long flags;
242
243	if (likely(!active))
244		return;
245
246	lock = &per_cpu(lookup_lock, raw_smp_processor_id());
247
248	input.timer = timer;
249	input.start_func = startf;
250	input.expire_func = timerf;
251	input.pid = pid;
252
253	spin_lock_irqsave(lock, flags);
254	if (!active)
255		goto out_unlock;
256
257	entry = tstat_lookup(&input, comm);
258	if (likely(entry))
259		entry->count++;
260	else
261		atomic_inc(&overflow_count);
262
263 out_unlock:
264	spin_unlock_irqrestore(lock, flags);
265}
266
267static void print_name_offset(struct seq_file *m, unsigned long addr)
268{
269	char symname[KSYM_NAME_LEN+1];
270
271	if (lookup_symbol_name(addr, symname) < 0)
272		seq_printf(m, "<%p>", (void *)addr);
273	else
274		seq_printf(m, "%s", symname);
275}
276
277static int tstats_show(struct seq_file *m, void *v)
278{
279	struct timespec period;
280	struct entry *entry;
281	unsigned long ms;
282	long events = 0;
283	ktime_t time;
284	int i;
285
286	mutex_lock(&show_mutex);
287	/*
288	 * If still active then calculate up to now:
289	 */
290	if (active)
291		time_stop = ktime_get();
292
293	time = ktime_sub(time_stop, time_start);
294
295	period = ktime_to_timespec(time);
296	ms = period.tv_nsec / 1000000;
297
298	seq_puts(m, "Timer Stats Version: v0.1\n");
299	seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
300	if (atomic_read(&overflow_count))
301		seq_printf(m, "Overflow: %d entries\n",
302			atomic_read(&overflow_count));
303
304	for (i = 0; i < nr_entries; i++) {
305		entry = entries + i;
306		seq_printf(m, "%4lu, %5d %-16s ",
307				entry->count, entry->pid, entry->comm);
308
309		print_name_offset(m, (unsigned long)entry->start_func);
310		seq_puts(m, " (");
311		print_name_offset(m, (unsigned long)entry->expire_func);
312		seq_puts(m, ")\n");
313
314		events += entry->count;
315	}
316
317	ms += period.tv_sec * 1000;
318	if (!ms)
319		ms = 1;
320
321	if (events && period.tv_sec)
322		seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events,
323			   events / period.tv_sec, events * 1000 / ms);
324	else
325		seq_printf(m, "%ld total events\n", events);
326
327	mutex_unlock(&show_mutex);
328
329	return 0;
330}
331
332/*
333 * After a state change, make sure all concurrent lookup/update
334 * activities have stopped:
335 */
336static void sync_access(void)
337{
338	unsigned long flags;
339	int cpu;
340
341	for_each_online_cpu(cpu) {
342		spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags);
343		/* nothing */
344		spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags);
345	}
346}
347
348static ssize_t tstats_write(struct file *file, const char __user *buf,
349			    size_t count, loff_t *offs)
350{
351	char ctl[2];
352
353	if (count != 2 || *offs)
354		return -EINVAL;
355
356	if (copy_from_user(ctl, buf, count))
357		return -EFAULT;
358
359	mutex_lock(&show_mutex);
360	switch (ctl[0]) {
361	case '0':
362		if (active) {
363			active = 0;
364			time_stop = ktime_get();
365			sync_access();
366		}
367		break;
368	case '1':
369		if (!active) {
370			reset_entries();
371			time_start = ktime_get();
372			smp_mb();
373			active = 1;
374		}
375		break;
376	default:
377		count = -EINVAL;
378	}
379	mutex_unlock(&show_mutex);
380
381	return count;
382}
383
384static int tstats_open(struct inode *inode, struct file *filp)
385{
386	return single_open(filp, tstats_show, NULL);
387}
388
389static struct file_operations tstats_fops = {
390	.open		= tstats_open,
391	.read		= seq_read,
392	.write		= tstats_write,
393	.llseek		= seq_lseek,
394	.release	= seq_release,
395};
396
397void __init init_timer_stats(void)
398{
399	int cpu;
400
401	for_each_possible_cpu(cpu)
402		spin_lock_init(&per_cpu(lookup_lock, cpu));
403}
404
405static int __init init_tstats_procfs(void)
406{
407	struct proc_dir_entry *pe;
408
409	pe = create_proc_entry("timer_stats", 0644, NULL);
410	if (!pe)
411		return -ENOMEM;
412
413	pe->proc_fops = &tstats_fops;
414
415	return 0;
416}
417__initcall(init_tstats_procfs);
418