1// SPDX-License-Identifier: GPL-2.0-or-later
2/* delayacct.c - per-task delay accounting
3 *
4 * Copyright (C) Shailabh Nagar, IBM Corp. 2006
5 */
6
7#include <linux/sched.h>
8#include <linux/sched/task.h>
9#include <linux/sched/cputime.h>
10#include <linux/sched/clock.h>
11#include <linux/slab.h>
12#include <linux/taskstats.h>
13#include <linux/sysctl.h>
14#include <linux/delayacct.h>
15#include <linux/module.h>
16
17DEFINE_STATIC_KEY_FALSE(delayacct_key);
18int delayacct_on __read_mostly;	/* Delay accounting turned on/off */
19struct kmem_cache *delayacct_cache;
20
21static void set_delayacct(bool enabled)
22{
23	if (enabled) {
24		static_branch_enable(&delayacct_key);
25		delayacct_on = 1;
26	} else {
27		delayacct_on = 0;
28		static_branch_disable(&delayacct_key);
29	}
30}
31
32static int __init delayacct_setup_enable(char *str)
33{
34	delayacct_on = 1;
35	return 1;
36}
37__setup("delayacct", delayacct_setup_enable);
38
39void delayacct_init(void)
40{
41	delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT);
42	delayacct_tsk_init(&init_task);
43	set_delayacct(delayacct_on);
44}
45
46#ifdef CONFIG_PROC_SYSCTL
47static int sysctl_delayacct(struct ctl_table *table, int write, void *buffer,
48		     size_t *lenp, loff_t *ppos)
49{
50	int state = delayacct_on;
51	struct ctl_table t;
52	int err;
53
54	if (write && !capable(CAP_SYS_ADMIN))
55		return -EPERM;
56
57	t = *table;
58	t.data = &state;
59	err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
60	if (err < 0)
61		return err;
62	if (write)
63		set_delayacct(state);
64	return err;
65}
66
67static struct ctl_table kern_delayacct_table[] = {
68	{
69		.procname       = "task_delayacct",
70		.data           = NULL,
71		.maxlen         = sizeof(unsigned int),
72		.mode           = 0644,
73		.proc_handler   = sysctl_delayacct,
74		.extra1         = SYSCTL_ZERO,
75		.extra2         = SYSCTL_ONE,
76	},
77	{ }
78};
79
80static __init int kernel_delayacct_sysctls_init(void)
81{
82	register_sysctl_init("kernel", kern_delayacct_table);
83	return 0;
84}
85late_initcall(kernel_delayacct_sysctls_init);
86#endif
87
88void __delayacct_tsk_init(struct task_struct *tsk)
89{
90	tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL);
91	if (tsk->delays)
92		raw_spin_lock_init(&tsk->delays->lock);
93}
94
95/*
96 * Finish delay accounting for a statistic using its timestamps (@start),
97 * accumalator (@total) and @count
98 */
99static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count)
100{
101	s64 ns = local_clock() - *start;
102	unsigned long flags;
103
104	if (ns > 0) {
105		raw_spin_lock_irqsave(lock, flags);
106		*total += ns;
107		(*count)++;
108		raw_spin_unlock_irqrestore(lock, flags);
109	}
110}
111
112void __delayacct_blkio_start(void)
113{
114	current->delays->blkio_start = local_clock();
115}
116
117/*
118 * We cannot rely on the `current` macro, as we haven't yet switched back to
119 * the process being woken.
120 */
121void __delayacct_blkio_end(struct task_struct *p)
122{
123	delayacct_end(&p->delays->lock,
124		      &p->delays->blkio_start,
125		      &p->delays->blkio_delay,
126		      &p->delays->blkio_count);
127}
128
129int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
130{
131	u64 utime, stime, stimescaled, utimescaled;
132	unsigned long long t2, t3;
133	unsigned long flags, t1;
134	s64 tmp;
135
136	task_cputime(tsk, &utime, &stime);
137	tmp = (s64)d->cpu_run_real_total;
138	tmp += utime + stime;
139	d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
140
141	task_cputime_scaled(tsk, &utimescaled, &stimescaled);
142	tmp = (s64)d->cpu_scaled_run_real_total;
143	tmp += utimescaled + stimescaled;
144	d->cpu_scaled_run_real_total =
145		(tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp;
146
147	/*
148	 * No locking available for sched_info (and too expensive to add one)
149	 * Mitigate by taking snapshot of values
150	 */
151	t1 = tsk->sched_info.pcount;
152	t2 = tsk->sched_info.run_delay;
153	t3 = tsk->se.sum_exec_runtime;
154
155	d->cpu_count += t1;
156
157	tmp = (s64)d->cpu_delay_total + t2;
158	d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
159
160	tmp = (s64)d->cpu_run_virtual_total + t3;
161	d->cpu_run_virtual_total =
162		(tmp < (s64)d->cpu_run_virtual_total) ?	0 : tmp;
163
164	if (!tsk->delays)
165		return 0;
166
167	/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
168
169	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
170	tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
171	d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
172	tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
173	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
174	tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
175	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
176	tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
177	d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
178	tmp = d->compact_delay_total + tsk->delays->compact_delay;
179	d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
180	tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
181	d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
182	tmp = d->irq_delay_total + tsk->delays->irq_delay;
183	d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp;
184	d->blkio_count += tsk->delays->blkio_count;
185	d->swapin_count += tsk->delays->swapin_count;
186	d->freepages_count += tsk->delays->freepages_count;
187	d->thrashing_count += tsk->delays->thrashing_count;
188	d->compact_count += tsk->delays->compact_count;
189	d->wpcopy_count += tsk->delays->wpcopy_count;
190	d->irq_count += tsk->delays->irq_count;
191	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
192
193	return 0;
194}
195
196__u64 __delayacct_blkio_ticks(struct task_struct *tsk)
197{
198	__u64 ret;
199	unsigned long flags;
200
201	raw_spin_lock_irqsave(&tsk->delays->lock, flags);
202	ret = nsec_to_clock_t(tsk->delays->blkio_delay);
203	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
204	return ret;
205}
206
207void __delayacct_freepages_start(void)
208{
209	current->delays->freepages_start = local_clock();
210}
211
212void __delayacct_freepages_end(void)
213{
214	delayacct_end(&current->delays->lock,
215		      &current->delays->freepages_start,
216		      &current->delays->freepages_delay,
217		      &current->delays->freepages_count);
218}
219
220void __delayacct_thrashing_start(bool *in_thrashing)
221{
222	*in_thrashing = !!current->in_thrashing;
223	if (*in_thrashing)
224		return;
225
226	current->in_thrashing = 1;
227	current->delays->thrashing_start = local_clock();
228}
229
230void __delayacct_thrashing_end(bool *in_thrashing)
231{
232	if (*in_thrashing)
233		return;
234
235	current->in_thrashing = 0;
236	delayacct_end(&current->delays->lock,
237		      &current->delays->thrashing_start,
238		      &current->delays->thrashing_delay,
239		      &current->delays->thrashing_count);
240}
241
242void __delayacct_swapin_start(void)
243{
244	current->delays->swapin_start = local_clock();
245}
246
247void __delayacct_swapin_end(void)
248{
249	delayacct_end(&current->delays->lock,
250		      &current->delays->swapin_start,
251		      &current->delays->swapin_delay,
252		      &current->delays->swapin_count);
253}
254
255void __delayacct_compact_start(void)
256{
257	current->delays->compact_start = local_clock();
258}
259
260void __delayacct_compact_end(void)
261{
262	delayacct_end(&current->delays->lock,
263		      &current->delays->compact_start,
264		      &current->delays->compact_delay,
265		      &current->delays->compact_count);
266}
267
268void __delayacct_wpcopy_start(void)
269{
270	current->delays->wpcopy_start = local_clock();
271}
272
273void __delayacct_wpcopy_end(void)
274{
275	delayacct_end(&current->delays->lock,
276		      &current->delays->wpcopy_start,
277		      &current->delays->wpcopy_delay,
278		      &current->delays->wpcopy_count);
279}
280
281void __delayacct_irq(struct task_struct *task, u32 delta)
282{
283	unsigned long flags;
284
285	raw_spin_lock_irqsave(&task->delays->lock, flags);
286	task->delays->irq_delay += delta;
287	task->delays->irq_count++;
288	raw_spin_unlock_irqrestore(&task->delays->lock, flags);
289}
290
291