1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * CPU accounting code for task groups.
5 *
6 * Based on the work by Paul Menage (menage@google.com) and Balbir Singh
7 * (balbir@in.ibm.com).
8 */
9
10/* Time spent by the tasks of the CPU accounting group executing in ... */
11enum cpuacct_stat_index {
12	CPUACCT_STAT_USER,	/* ... user mode */
13	CPUACCT_STAT_SYSTEM,	/* ... kernel mode */
14
15	CPUACCT_STAT_NSTATS,
16};
17
18static const char * const cpuacct_stat_desc[] = {
19	[CPUACCT_STAT_USER] = "user",
20	[CPUACCT_STAT_SYSTEM] = "system",
21};
22
23/* track CPU usage of a group of tasks and its child groups */
24struct cpuacct {
25	struct cgroup_subsys_state	css;
26	/* cpuusage holds pointer to a u64-type object on every CPU */
27	u64 __percpu	*cpuusage;
28	struct kernel_cpustat __percpu	*cpustat;
29};
30
31static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
32{
33	return css ? container_of(css, struct cpuacct, css) : NULL;
34}
35
36/* Return CPU accounting group to which this task belongs */
37static inline struct cpuacct *task_ca(struct task_struct *tsk)
38{
39	return css_ca(task_css(tsk, cpuacct_cgrp_id));
40}
41
42static inline struct cpuacct *parent_ca(struct cpuacct *ca)
43{
44	return css_ca(ca->css.parent);
45}
46
47static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
48static struct cpuacct root_cpuacct = {
49	.cpustat	= &kernel_cpustat,
50	.cpuusage	= &root_cpuacct_cpuusage,
51};
52
53/* Create a new CPU accounting group */
54static struct cgroup_subsys_state *
55cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
56{
57	struct cpuacct *ca;
58
59	if (!parent_css)
60		return &root_cpuacct.css;
61
62	ca = kzalloc(sizeof(*ca), GFP_KERNEL);
63	if (!ca)
64		goto out;
65
66	ca->cpuusage = alloc_percpu(u64);
67	if (!ca->cpuusage)
68		goto out_free_ca;
69
70	ca->cpustat = alloc_percpu(struct kernel_cpustat);
71	if (!ca->cpustat)
72		goto out_free_cpuusage;
73
74	return &ca->css;
75
76out_free_cpuusage:
77	free_percpu(ca->cpuusage);
78out_free_ca:
79	kfree(ca);
80out:
81	return ERR_PTR(-ENOMEM);
82}
83
84/* Destroy an existing CPU accounting group */
85static void cpuacct_css_free(struct cgroup_subsys_state *css)
86{
87	struct cpuacct *ca = css_ca(css);
88
89	free_percpu(ca->cpustat);
90	free_percpu(ca->cpuusage);
91	kfree(ca);
92}
93
94static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
95				 enum cpuacct_stat_index index)
96{
97	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
98	u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
99	u64 data;
100
101	/*
102	 * We allow index == CPUACCT_STAT_NSTATS here to read
103	 * the sum of usages.
104	 */
105	if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS))
106		return 0;
107
108#ifndef CONFIG_64BIT
109	/*
110	 * Take rq->lock to make 64-bit read safe on 32-bit platforms.
111	 */
112	raw_spin_rq_lock_irq(cpu_rq(cpu));
113#endif
114
115	switch (index) {
116	case CPUACCT_STAT_USER:
117		data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE];
118		break;
119	case CPUACCT_STAT_SYSTEM:
120		data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] +
121			cpustat[CPUTIME_SOFTIRQ];
122		break;
123	case CPUACCT_STAT_NSTATS:
124		data = *cpuusage;
125		break;
126	}
127
128#ifndef CONFIG_64BIT
129	raw_spin_rq_unlock_irq(cpu_rq(cpu));
130#endif
131
132	return data;
133}
134
135static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu)
136{
137	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
138	u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
139
140	/* Don't allow to reset global kernel_cpustat */
141	if (ca == &root_cpuacct)
142		return;
143
144#ifndef CONFIG_64BIT
145	/*
146	 * Take rq->lock to make 64-bit write safe on 32-bit platforms.
147	 */
148	raw_spin_rq_lock_irq(cpu_rq(cpu));
149#endif
150	*cpuusage = 0;
151	cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0;
152	cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0;
153	cpustat[CPUTIME_SOFTIRQ] = 0;
154
155#ifndef CONFIG_64BIT
156	raw_spin_rq_unlock_irq(cpu_rq(cpu));
157#endif
158}
159
160/* Return total CPU usage (in nanoseconds) of a group */
161static u64 __cpuusage_read(struct cgroup_subsys_state *css,
162			   enum cpuacct_stat_index index)
163{
164	struct cpuacct *ca = css_ca(css);
165	u64 totalcpuusage = 0;
166	int i;
167
168	for_each_possible_cpu(i)
169		totalcpuusage += cpuacct_cpuusage_read(ca, i, index);
170
171	return totalcpuusage;
172}
173
174static u64 cpuusage_user_read(struct cgroup_subsys_state *css,
175			      struct cftype *cft)
176{
177	return __cpuusage_read(css, CPUACCT_STAT_USER);
178}
179
180static u64 cpuusage_sys_read(struct cgroup_subsys_state *css,
181			     struct cftype *cft)
182{
183	return __cpuusage_read(css, CPUACCT_STAT_SYSTEM);
184}
185
186static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft)
187{
188	return __cpuusage_read(css, CPUACCT_STAT_NSTATS);
189}
190
191static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft,
192			  u64 val)
193{
194	struct cpuacct *ca = css_ca(css);
195	int cpu;
196
197	/*
198	 * Only allow '0' here to do a reset.
199	 */
200	if (val)
201		return -EINVAL;
202
203	for_each_possible_cpu(cpu)
204		cpuacct_cpuusage_write(ca, cpu);
205
206	return 0;
207}
208
209static int __cpuacct_percpu_seq_show(struct seq_file *m,
210				     enum cpuacct_stat_index index)
211{
212	struct cpuacct *ca = css_ca(seq_css(m));
213	u64 percpu;
214	int i;
215
216	for_each_possible_cpu(i) {
217		percpu = cpuacct_cpuusage_read(ca, i, index);
218		seq_printf(m, "%llu ", (unsigned long long) percpu);
219	}
220	seq_printf(m, "\n");
221	return 0;
222}
223
224static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V)
225{
226	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER);
227}
228
229static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V)
230{
231	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM);
232}
233
234static int cpuacct_percpu_seq_show(struct seq_file *m, void *V)
235{
236	return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS);
237}
238
239static int cpuacct_all_seq_show(struct seq_file *m, void *V)
240{
241	struct cpuacct *ca = css_ca(seq_css(m));
242	int index;
243	int cpu;
244
245	seq_puts(m, "cpu");
246	for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
247		seq_printf(m, " %s", cpuacct_stat_desc[index]);
248	seq_puts(m, "\n");
249
250	for_each_possible_cpu(cpu) {
251		seq_printf(m, "%d", cpu);
252		for (index = 0; index < CPUACCT_STAT_NSTATS; index++)
253			seq_printf(m, " %llu",
254				   cpuacct_cpuusage_read(ca, cpu, index));
255		seq_puts(m, "\n");
256	}
257	return 0;
258}
259
260static int cpuacct_stats_show(struct seq_file *sf, void *v)
261{
262	struct cpuacct *ca = css_ca(seq_css(sf));
263	struct task_cputime cputime;
264	u64 val[CPUACCT_STAT_NSTATS];
265	int cpu;
266	int stat;
267
268	memset(&cputime, 0, sizeof(cputime));
269	for_each_possible_cpu(cpu) {
270		u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat;
271
272		cputime.utime += cpustat[CPUTIME_USER];
273		cputime.utime += cpustat[CPUTIME_NICE];
274		cputime.stime += cpustat[CPUTIME_SYSTEM];
275		cputime.stime += cpustat[CPUTIME_IRQ];
276		cputime.stime += cpustat[CPUTIME_SOFTIRQ];
277
278		cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu);
279	}
280
281	cputime_adjust(&cputime, &seq_css(sf)->cgroup->prev_cputime,
282		&val[CPUACCT_STAT_USER], &val[CPUACCT_STAT_SYSTEM]);
283
284	for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
285		seq_printf(sf, "%s %llu\n", cpuacct_stat_desc[stat],
286			nsec_to_clock_t(val[stat]));
287	}
288
289	return 0;
290}
291
292static struct cftype files[] = {
293	{
294		.name = "usage",
295		.read_u64 = cpuusage_read,
296		.write_u64 = cpuusage_write,
297	},
298	{
299		.name = "usage_user",
300		.read_u64 = cpuusage_user_read,
301	},
302	{
303		.name = "usage_sys",
304		.read_u64 = cpuusage_sys_read,
305	},
306	{
307		.name = "usage_percpu",
308		.seq_show = cpuacct_percpu_seq_show,
309	},
310	{
311		.name = "usage_percpu_user",
312		.seq_show = cpuacct_percpu_user_seq_show,
313	},
314	{
315		.name = "usage_percpu_sys",
316		.seq_show = cpuacct_percpu_sys_seq_show,
317	},
318	{
319		.name = "usage_all",
320		.seq_show = cpuacct_all_seq_show,
321	},
322	{
323		.name = "stat",
324		.seq_show = cpuacct_stats_show,
325	},
326	{ }	/* terminate */
327};
328
329/*
330 * charge this task's execution time to its accounting group.
331 *
332 * called with rq->lock held.
333 */
334void cpuacct_charge(struct task_struct *tsk, u64 cputime)
335{
336	unsigned int cpu = task_cpu(tsk);
337	struct cpuacct *ca;
338
339	lockdep_assert_rq_held(cpu_rq(cpu));
340
341	for (ca = task_ca(tsk); ca; ca = parent_ca(ca))
342		*per_cpu_ptr(ca->cpuusage, cpu) += cputime;
343}
344
345/*
346 * Add user/system time to cpuacct.
347 *
348 * Note: it's the caller that updates the account of the root cgroup.
349 */
350void cpuacct_account_field(struct task_struct *tsk, int index, u64 val)
351{
352	struct cpuacct *ca;
353
354	for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca))
355		__this_cpu_add(ca->cpustat->cpustat[index], val);
356}
357
358struct cgroup_subsys cpuacct_cgrp_subsys = {
359	.css_alloc	= cpuacct_css_alloc,
360	.css_free	= cpuacct_css_free,
361	.legacy_cftypes	= files,
362	.early_init	= true,
363};
364