1// SPDX-License-Identifier: GPL-2.0
2
3#define _GNU_SOURCE
4#include <linux/limits.h>
5#include <sys/sysinfo.h>
6#include <sys/wait.h>
7#include <errno.h>
8#include <pthread.h>
9#include <stdio.h>
10#include <time.h>
11
12#include "../kselftest.h"
13#include "cgroup_util.h"
14
15enum hog_clock_type {
16	// Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock.
17	CPU_HOG_CLOCK_PROCESS,
18	// Count elapsed time using system wallclock time.
19	CPU_HOG_CLOCK_WALL,
20};
21
22struct cpu_hogger {
23	char *cgroup;
24	pid_t pid;
25	long usage;
26};
27
28struct cpu_hog_func_param {
29	int nprocs;
30	struct timespec ts;
31	enum hog_clock_type clock_type;
32};
33
34/*
35 * This test creates two nested cgroups with and without enabling
36 * the cpu controller.
37 */
38static int test_cpucg_subtree_control(const char *root)
39{
40	char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL;
41	int ret = KSFT_FAIL;
42
43	// Create two nested cgroups with the cpu controller enabled.
44	parent = cg_name(root, "cpucg_test_0");
45	if (!parent)
46		goto cleanup;
47
48	if (cg_create(parent))
49		goto cleanup;
50
51	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
52		goto cleanup;
53
54	child = cg_name(parent, "cpucg_test_child");
55	if (!child)
56		goto cleanup;
57
58	if (cg_create(child))
59		goto cleanup;
60
61	if (cg_read_strstr(child, "cgroup.controllers", "cpu"))
62		goto cleanup;
63
64	// Create two nested cgroups without enabling the cpu controller.
65	parent2 = cg_name(root, "cpucg_test_1");
66	if (!parent2)
67		goto cleanup;
68
69	if (cg_create(parent2))
70		goto cleanup;
71
72	child2 = cg_name(parent2, "cpucg_test_child");
73	if (!child2)
74		goto cleanup;
75
76	if (cg_create(child2))
77		goto cleanup;
78
79	if (!cg_read_strstr(child2, "cgroup.controllers", "cpu"))
80		goto cleanup;
81
82	ret = KSFT_PASS;
83
84cleanup:
85	cg_destroy(child);
86	free(child);
87	cg_destroy(child2);
88	free(child2);
89	cg_destroy(parent);
90	free(parent);
91	cg_destroy(parent2);
92	free(parent2);
93
94	return ret;
95}
96
97static void *hog_cpu_thread_func(void *arg)
98{
99	while (1)
100		;
101
102	return NULL;
103}
104
105static struct timespec
106timespec_sub(const struct timespec *lhs, const struct timespec *rhs)
107{
108	struct timespec zero = {
109		.tv_sec = 0,
110		.tv_nsec = 0,
111	};
112	struct timespec ret;
113
114	if (lhs->tv_sec < rhs->tv_sec)
115		return zero;
116
117	ret.tv_sec = lhs->tv_sec - rhs->tv_sec;
118
119	if (lhs->tv_nsec < rhs->tv_nsec) {
120		if (ret.tv_sec == 0)
121			return zero;
122
123		ret.tv_sec--;
124		ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec;
125	} else
126		ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec;
127
128	return ret;
129}
130
131static int hog_cpus_timed(const char *cgroup, void *arg)
132{
133	const struct cpu_hog_func_param *param =
134		(struct cpu_hog_func_param *)arg;
135	struct timespec ts_run = param->ts;
136	struct timespec ts_remaining = ts_run;
137	struct timespec ts_start;
138	int i, ret;
139
140	ret = clock_gettime(CLOCK_MONOTONIC, &ts_start);
141	if (ret != 0)
142		return ret;
143
144	for (i = 0; i < param->nprocs; i++) {
145		pthread_t tid;
146
147		ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL);
148		if (ret != 0)
149			return ret;
150	}
151
152	while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) {
153		struct timespec ts_total;
154
155		ret = nanosleep(&ts_remaining, NULL);
156		if (ret && errno != EINTR)
157			return ret;
158
159		if (param->clock_type == CPU_HOG_CLOCK_PROCESS) {
160			ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total);
161			if (ret != 0)
162				return ret;
163		} else {
164			struct timespec ts_current;
165
166			ret = clock_gettime(CLOCK_MONOTONIC, &ts_current);
167			if (ret != 0)
168				return ret;
169
170			ts_total = timespec_sub(&ts_current, &ts_start);
171		}
172
173		ts_remaining = timespec_sub(&ts_run, &ts_total);
174	}
175
176	return 0;
177}
178
179/*
180 * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that
181 * cpu.stat shows the expected output.
182 */
183static int test_cpucg_stats(const char *root)
184{
185	int ret = KSFT_FAIL;
186	long usage_usec, user_usec, system_usec;
187	long usage_seconds = 2;
188	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
189	char *cpucg;
190
191	cpucg = cg_name(root, "cpucg_test");
192	if (!cpucg)
193		goto cleanup;
194
195	if (cg_create(cpucg))
196		goto cleanup;
197
198	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
199	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
200	system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec");
201	if (usage_usec != 0 || user_usec != 0 || system_usec != 0)
202		goto cleanup;
203
204	struct cpu_hog_func_param param = {
205		.nprocs = 1,
206		.ts = {
207			.tv_sec = usage_seconds,
208			.tv_nsec = 0,
209		},
210		.clock_type = CPU_HOG_CLOCK_PROCESS,
211	};
212	if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
213		goto cleanup;
214
215	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
216	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
217	if (user_usec <= 0)
218		goto cleanup;
219
220	if (!values_close(usage_usec, expected_usage_usec, 1))
221		goto cleanup;
222
223	ret = KSFT_PASS;
224
225cleanup:
226	cg_destroy(cpucg);
227	free(cpucg);
228
229	return ret;
230}
231
232static int
233run_cpucg_weight_test(
234		const char *root,
235		pid_t (*spawn_child)(const struct cpu_hogger *child),
236		int (*validate)(const struct cpu_hogger *children, int num_children))
237{
238	int ret = KSFT_FAIL, i;
239	char *parent = NULL;
240	struct cpu_hogger children[3] = {NULL};
241
242	parent = cg_name(root, "cpucg_test_0");
243	if (!parent)
244		goto cleanup;
245
246	if (cg_create(parent))
247		goto cleanup;
248
249	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
250		goto cleanup;
251
252	for (i = 0; i < ARRAY_SIZE(children); i++) {
253		children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i);
254		if (!children[i].cgroup)
255			goto cleanup;
256
257		if (cg_create(children[i].cgroup))
258			goto cleanup;
259
260		if (cg_write_numeric(children[i].cgroup, "cpu.weight",
261					50 * (i + 1)))
262			goto cleanup;
263	}
264
265	for (i = 0; i < ARRAY_SIZE(children); i++) {
266		pid_t pid = spawn_child(&children[i]);
267		if (pid <= 0)
268			goto cleanup;
269		children[i].pid = pid;
270	}
271
272	for (i = 0; i < ARRAY_SIZE(children); i++) {
273		int retcode;
274
275		waitpid(children[i].pid, &retcode, 0);
276		if (!WIFEXITED(retcode))
277			goto cleanup;
278		if (WEXITSTATUS(retcode))
279			goto cleanup;
280	}
281
282	for (i = 0; i < ARRAY_SIZE(children); i++)
283		children[i].usage = cg_read_key_long(children[i].cgroup,
284				"cpu.stat", "usage_usec");
285
286	if (validate(children, ARRAY_SIZE(children)))
287		goto cleanup;
288
289	ret = KSFT_PASS;
290cleanup:
291	for (i = 0; i < ARRAY_SIZE(children); i++) {
292		cg_destroy(children[i].cgroup);
293		free(children[i].cgroup);
294	}
295	cg_destroy(parent);
296	free(parent);
297
298	return ret;
299}
300
301static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus)
302{
303	long usage_seconds = 10;
304	struct cpu_hog_func_param param = {
305		.nprocs = ncpus,
306		.ts = {
307			.tv_sec = usage_seconds,
308			.tv_nsec = 0,
309		},
310		.clock_type = CPU_HOG_CLOCK_WALL,
311	};
312	return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)&param);
313}
314
315static pid_t weight_hog_all_cpus(const struct cpu_hogger *child)
316{
317	return weight_hog_ncpus(child, get_nprocs());
318}
319
320static int
321overprovision_validate(const struct cpu_hogger *children, int num_children)
322{
323	int ret = KSFT_FAIL, i;
324
325	for (i = 0; i < num_children - 1; i++) {
326		long delta;
327
328		if (children[i + 1].usage <= children[i].usage)
329			goto cleanup;
330
331		delta = children[i + 1].usage - children[i].usage;
332		if (!values_close(delta, children[0].usage, 35))
333			goto cleanup;
334	}
335
336	ret = KSFT_PASS;
337cleanup:
338	return ret;
339}
340
341/*
342 * First, this test creates the following hierarchy:
343 * A
344 * A/B     cpu.weight = 50
345 * A/C     cpu.weight = 100
346 * A/D     cpu.weight = 150
347 *
348 * A separate process is then created for each child cgroup which spawns as
349 * many threads as there are cores, and hogs each CPU as much as possible
350 * for some time interval.
351 *
352 * Once all of the children have exited, we verify that each child cgroup
353 * was given proportional runtime as informed by their cpu.weight.
354 */
355static int test_cpucg_weight_overprovisioned(const char *root)
356{
357	return run_cpucg_weight_test(root, weight_hog_all_cpus,
358			overprovision_validate);
359}
360
361static pid_t weight_hog_one_cpu(const struct cpu_hogger *child)
362{
363	return weight_hog_ncpus(child, 1);
364}
365
366static int
367underprovision_validate(const struct cpu_hogger *children, int num_children)
368{
369	int ret = KSFT_FAIL, i;
370
371	for (i = 0; i < num_children - 1; i++) {
372		if (!values_close(children[i + 1].usage, children[0].usage, 15))
373			goto cleanup;
374	}
375
376	ret = KSFT_PASS;
377cleanup:
378	return ret;
379}
380
381/*
382 * First, this test creates the following hierarchy:
383 * A
384 * A/B     cpu.weight = 50
385 * A/C     cpu.weight = 100
386 * A/D     cpu.weight = 150
387 *
388 * A separate process is then created for each child cgroup which spawns a
389 * single thread that hogs a CPU. The testcase is only run on systems that
390 * have at least one core per-thread in the child processes.
391 *
392 * Once all of the children have exited, we verify that each child cgroup
393 * had roughly the same runtime despite having different cpu.weight.
394 */
395static int test_cpucg_weight_underprovisioned(const char *root)
396{
397	// Only run the test if there are enough cores to avoid overprovisioning
398	// the system.
399	if (get_nprocs() < 4)
400		return KSFT_SKIP;
401
402	return run_cpucg_weight_test(root, weight_hog_one_cpu,
403			underprovision_validate);
404}
405
406static int
407run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
408{
409	int ret = KSFT_FAIL, i;
410	char *parent = NULL, *child = NULL;
411	struct cpu_hogger leaf[3] = {NULL};
412	long nested_leaf_usage, child_usage;
413	int nprocs = get_nprocs();
414
415	if (!overprovisioned) {
416		if (nprocs < 4)
417			/*
418			 * Only run the test if there are enough cores to avoid overprovisioning
419			 * the system.
420			 */
421			return KSFT_SKIP;
422		nprocs /= 4;
423	}
424
425	parent = cg_name(root, "cpucg_test");
426	child = cg_name(parent, "cpucg_child");
427	if (!parent || !child)
428		goto cleanup;
429
430	if (cg_create(parent))
431		goto cleanup;
432	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
433		goto cleanup;
434
435	if (cg_create(child))
436		goto cleanup;
437	if (cg_write(child, "cgroup.subtree_control", "+cpu"))
438		goto cleanup;
439	if (cg_write(child, "cpu.weight", "1000"))
440		goto cleanup;
441
442	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
443		const char *ancestor;
444		long weight;
445
446		if (i == 0) {
447			ancestor = parent;
448			weight = 1000;
449		} else {
450			ancestor = child;
451			weight = 5000;
452		}
453		leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i);
454		if (!leaf[i].cgroup)
455			goto cleanup;
456
457		if (cg_create(leaf[i].cgroup))
458			goto cleanup;
459
460		if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight))
461			goto cleanup;
462	}
463
464	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
465		pid_t pid;
466		struct cpu_hog_func_param param = {
467			.nprocs = nprocs,
468			.ts = {
469				.tv_sec = 10,
470				.tv_nsec = 0,
471			},
472			.clock_type = CPU_HOG_CLOCK_WALL,
473		};
474
475		pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed,
476				(void *)&param);
477		if (pid <= 0)
478			goto cleanup;
479		leaf[i].pid = pid;
480	}
481
482	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
483		int retcode;
484
485		waitpid(leaf[i].pid, &retcode, 0);
486		if (!WIFEXITED(retcode))
487			goto cleanup;
488		if (WEXITSTATUS(retcode))
489			goto cleanup;
490	}
491
492	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
493		leaf[i].usage = cg_read_key_long(leaf[i].cgroup,
494				"cpu.stat", "usage_usec");
495		if (leaf[i].usage <= 0)
496			goto cleanup;
497	}
498
499	nested_leaf_usage = leaf[1].usage + leaf[2].usage;
500	if (overprovisioned) {
501		if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
502			goto cleanup;
503	} else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15))
504		goto cleanup;
505
506
507	child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
508	if (child_usage <= 0)
509		goto cleanup;
510	if (!values_close(child_usage, nested_leaf_usage, 1))
511		goto cleanup;
512
513	ret = KSFT_PASS;
514cleanup:
515	for (i = 0; i < ARRAY_SIZE(leaf); i++) {
516		cg_destroy(leaf[i].cgroup);
517		free(leaf[i].cgroup);
518	}
519	cg_destroy(child);
520	free(child);
521	cg_destroy(parent);
522	free(parent);
523
524	return ret;
525}
526
527/*
528 * First, this test creates the following hierarchy:
529 * A
530 * A/B     cpu.weight = 1000
531 * A/C     cpu.weight = 1000
532 * A/C/D   cpu.weight = 5000
533 * A/C/E   cpu.weight = 5000
534 *
535 * A separate process is then created for each leaf, which spawn nproc threads
536 * that burn a CPU for a few seconds.
537 *
538 * Once all of those processes have exited, we verify that each of the leaf
539 * cgroups have roughly the same usage from cpu.stat.
540 */
541static int
542test_cpucg_nested_weight_overprovisioned(const char *root)
543{
544	return run_cpucg_nested_weight_test(root, true);
545}
546
547/*
548 * First, this test creates the following hierarchy:
549 * A
550 * A/B     cpu.weight = 1000
551 * A/C     cpu.weight = 1000
552 * A/C/D   cpu.weight = 5000
553 * A/C/E   cpu.weight = 5000
554 *
555 * A separate process is then created for each leaf, which nproc / 4 threads
556 * that burns a CPU for a few seconds.
557 *
558 * Once all of those processes have exited, we verify that each of the leaf
559 * cgroups have roughly the same usage from cpu.stat.
560 */
561static int
562test_cpucg_nested_weight_underprovisioned(const char *root)
563{
564	return run_cpucg_nested_weight_test(root, false);
565}
566
567/*
568 * This test creates a cgroup with some maximum value within a period, and
569 * verifies that a process in the cgroup is not overscheduled.
570 */
571static int test_cpucg_max(const char *root)
572{
573	int ret = KSFT_FAIL;
574	long usage_usec, user_usec;
575	long usage_seconds = 1;
576	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
577	char *cpucg;
578
579	cpucg = cg_name(root, "cpucg_test");
580	if (!cpucg)
581		goto cleanup;
582
583	if (cg_create(cpucg))
584		goto cleanup;
585
586	if (cg_write(cpucg, "cpu.max", "1000"))
587		goto cleanup;
588
589	struct cpu_hog_func_param param = {
590		.nprocs = 1,
591		.ts = {
592			.tv_sec = usage_seconds,
593			.tv_nsec = 0,
594		},
595		.clock_type = CPU_HOG_CLOCK_WALL,
596	};
597	if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
598		goto cleanup;
599
600	usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
601	user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
602	if (user_usec <= 0)
603		goto cleanup;
604
605	if (user_usec >= expected_usage_usec)
606		goto cleanup;
607
608	if (values_close(usage_usec, expected_usage_usec, 95))
609		goto cleanup;
610
611	ret = KSFT_PASS;
612
613cleanup:
614	cg_destroy(cpucg);
615	free(cpucg);
616
617	return ret;
618}
619
620/*
621 * This test verifies that a process inside of a nested cgroup whose parent
622 * group has a cpu.max value set, is properly throttled.
623 */
624static int test_cpucg_max_nested(const char *root)
625{
626	int ret = KSFT_FAIL;
627	long usage_usec, user_usec;
628	long usage_seconds = 1;
629	long expected_usage_usec = usage_seconds * USEC_PER_SEC;
630	char *parent, *child;
631
632	parent = cg_name(root, "cpucg_parent");
633	child = cg_name(parent, "cpucg_child");
634	if (!parent || !child)
635		goto cleanup;
636
637	if (cg_create(parent))
638		goto cleanup;
639
640	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
641		goto cleanup;
642
643	if (cg_create(child))
644		goto cleanup;
645
646	if (cg_write(parent, "cpu.max", "1000"))
647		goto cleanup;
648
649	struct cpu_hog_func_param param = {
650		.nprocs = 1,
651		.ts = {
652			.tv_sec = usage_seconds,
653			.tv_nsec = 0,
654		},
655		.clock_type = CPU_HOG_CLOCK_WALL,
656	};
657	if (cg_run(child, hog_cpus_timed, (void *)&param))
658		goto cleanup;
659
660	usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec");
661	user_usec = cg_read_key_long(child, "cpu.stat", "user_usec");
662	if (user_usec <= 0)
663		goto cleanup;
664
665	if (user_usec >= expected_usage_usec)
666		goto cleanup;
667
668	if (values_close(usage_usec, expected_usage_usec, 95))
669		goto cleanup;
670
671	ret = KSFT_PASS;
672
673cleanup:
674	cg_destroy(child);
675	free(child);
676	cg_destroy(parent);
677	free(parent);
678
679	return ret;
680}
681
682#define T(x) { x, #x }
683struct cpucg_test {
684	int (*fn)(const char *root);
685	const char *name;
686} tests[] = {
687	T(test_cpucg_subtree_control),
688	T(test_cpucg_stats),
689	T(test_cpucg_weight_overprovisioned),
690	T(test_cpucg_weight_underprovisioned),
691	T(test_cpucg_nested_weight_overprovisioned),
692	T(test_cpucg_nested_weight_underprovisioned),
693	T(test_cpucg_max),
694	T(test_cpucg_max_nested),
695};
696#undef T
697
698int main(int argc, char *argv[])
699{
700	char root[PATH_MAX];
701	int i, ret = EXIT_SUCCESS;
702
703	if (cg_find_unified_root(root, sizeof(root)))
704		ksft_exit_skip("cgroup v2 isn't mounted\n");
705
706	if (cg_read_strstr(root, "cgroup.subtree_control", "cpu"))
707		if (cg_write(root, "cgroup.subtree_control", "+cpu"))
708			ksft_exit_skip("Failed to set cpu controller\n");
709
710	for (i = 0; i < ARRAY_SIZE(tests); i++) {
711		switch (tests[i].fn(root)) {
712		case KSFT_PASS:
713			ksft_test_result_pass("%s\n", tests[i].name);
714			break;
715		case KSFT_SKIP:
716			ksft_test_result_skip("%s\n", tests[i].name);
717			break;
718		default:
719			ret = EXIT_FAILURE;
720			ksft_test_result_fail("%s\n", tests[i].name);
721			break;
722		}
723	}
724
725	return ret;
726}
727