1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3
4#include <linux/limits.h>
5#include <unistd.h>
6#include <stdio.h>
7#include <signal.h>
8#include <sys/sysinfo.h>
9#include <string.h>
10#include <sys/wait.h>
11#include <sys/mman.h>
12
13#include "../kselftest.h"
14#include "cgroup_util.h"
15
16static int read_int(const char *path, size_t *value)
17{
18	FILE *file;
19	int ret = 0;
20
21	file = fopen(path, "r");
22	if (!file)
23		return -1;
24	if (fscanf(file, "%ld", value) != 1)
25		ret = -1;
26	fclose(file);
27	return ret;
28}
29
30static int set_min_free_kb(size_t value)
31{
32	FILE *file;
33	int ret;
34
35	file = fopen("/proc/sys/vm/min_free_kbytes", "w");
36	if (!file)
37		return -1;
38	ret = fprintf(file, "%ld\n", value);
39	fclose(file);
40	return ret;
41}
42
43static int read_min_free_kb(size_t *value)
44{
45	return read_int("/proc/sys/vm/min_free_kbytes", value);
46}
47
48static int get_zswap_stored_pages(size_t *value)
49{
50	return read_int("/sys/kernel/debug/zswap/stored_pages", value);
51}
52
53static int get_cg_wb_count(const char *cg)
54{
55	return cg_read_key_long(cg, "memory.stat", "zswpwb");
56}
57
58static long get_zswpout(const char *cgroup)
59{
60	return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
61}
62
63static int allocate_and_read_bytes(const char *cgroup, void *arg)
64{
65	size_t size = (size_t)arg;
66	char *mem = (char *)malloc(size);
67	int ret = 0;
68
69	if (!mem)
70		return -1;
71	for (int i = 0; i < size; i += 4095)
72		mem[i] = 'a';
73
74	/* Go through the allocated memory to (z)swap in and out pages */
75	for (int i = 0; i < size; i += 4095) {
76		if (mem[i] != 'a')
77			ret = -1;
78	}
79
80	free(mem);
81	return ret;
82}
83
84static int allocate_bytes(const char *cgroup, void *arg)
85{
86	size_t size = (size_t)arg;
87	char *mem = (char *)malloc(size);
88
89	if (!mem)
90		return -1;
91	for (int i = 0; i < size; i += 4095)
92		mem[i] = 'a';
93	free(mem);
94	return 0;
95}
96
97static char *setup_test_group_1M(const char *root, const char *name)
98{
99	char *group_name = cg_name(root, name);
100
101	if (!group_name)
102		return NULL;
103	if (cg_create(group_name))
104		goto fail;
105	if (cg_write(group_name, "memory.max", "1M")) {
106		cg_destroy(group_name);
107		goto fail;
108	}
109	return group_name;
110fail:
111	free(group_name);
112	return NULL;
113}
114
115/*
116 * Sanity test to check that pages are written into zswap.
117 */
118static int test_zswap_usage(const char *root)
119{
120	long zswpout_before, zswpout_after;
121	int ret = KSFT_FAIL;
122	char *test_group;
123
124	test_group = cg_name(root, "no_shrink_test");
125	if (!test_group)
126		goto out;
127	if (cg_create(test_group))
128		goto out;
129	if (cg_write(test_group, "memory.max", "1M"))
130		goto out;
131
132	zswpout_before = get_zswpout(test_group);
133	if (zswpout_before < 0) {
134		ksft_print_msg("Failed to get zswpout\n");
135		goto out;
136	}
137
138	/* Allocate more than memory.max to push memory into zswap */
139	if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
140		goto out;
141
142	/* Verify that pages come into zswap */
143	zswpout_after = get_zswpout(test_group);
144	if (zswpout_after <= zswpout_before) {
145		ksft_print_msg("zswpout does not increase after test program\n");
146		goto out;
147	}
148	ret = KSFT_PASS;
149
150out:
151	cg_destroy(test_group);
152	free(test_group);
153	return ret;
154}
155
156/*
157 * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
158 * the cgroup.
159 */
160static int test_swapin_nozswap(const char *root)
161{
162	int ret = KSFT_FAIL;
163	char *test_group;
164	long swap_peak, zswpout;
165
166	test_group = cg_name(root, "no_zswap_test");
167	if (!test_group)
168		goto out;
169	if (cg_create(test_group))
170		goto out;
171	if (cg_write(test_group, "memory.max", "8M"))
172		goto out;
173	if (cg_write(test_group, "memory.zswap.max", "0"))
174		goto out;
175
176	/* Allocate and read more than memory.max to trigger swapin */
177	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
178		goto out;
179
180	/* Verify that pages are swapped out, but no zswap happened */
181	swap_peak = cg_read_long(test_group, "memory.swap.peak");
182	if (swap_peak < 0) {
183		ksft_print_msg("failed to get cgroup's swap_peak\n");
184		goto out;
185	}
186
187	if (swap_peak < MB(24)) {
188		ksft_print_msg("at least 24MB of memory should be swapped out\n");
189		goto out;
190	}
191
192	zswpout = get_zswpout(test_group);
193	if (zswpout < 0) {
194		ksft_print_msg("failed to get zswpout\n");
195		goto out;
196	}
197
198	if (zswpout > 0) {
199		ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
200		goto out;
201	}
202
203	ret = KSFT_PASS;
204
205out:
206	cg_destroy(test_group);
207	free(test_group);
208	return ret;
209}
210
211/* Simple test to verify the (z)swapin code paths */
212static int test_zswapin(const char *root)
213{
214	int ret = KSFT_FAIL;
215	char *test_group;
216	long zswpin;
217
218	test_group = cg_name(root, "zswapin_test");
219	if (!test_group)
220		goto out;
221	if (cg_create(test_group))
222		goto out;
223	if (cg_write(test_group, "memory.max", "8M"))
224		goto out;
225	if (cg_write(test_group, "memory.zswap.max", "max"))
226		goto out;
227
228	/* Allocate and read more than memory.max to trigger (z)swap in */
229	if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
230		goto out;
231
232	zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
233	if (zswpin < 0) {
234		ksft_print_msg("failed to get zswpin\n");
235		goto out;
236	}
237
238	if (zswpin < MB(24) / PAGE_SIZE) {
239		ksft_print_msg("at least 24MB should be brought back from zswap\n");
240		goto out;
241	}
242
243	ret = KSFT_PASS;
244
245out:
246	cg_destroy(test_group);
247	free(test_group);
248	return ret;
249}
250
251/*
252 * When trying to store a memcg page in zswap, if the memcg hits its memory
253 * limit in zswap, writeback should affect only the zswapped pages of that
254 * memcg.
255 */
256static int test_no_invasive_cgroup_shrink(const char *root)
257{
258	int ret = KSFT_FAIL;
259	size_t control_allocation_size = MB(10);
260	char *control_allocation, *wb_group = NULL, *control_group = NULL;
261
262	wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
263	if (!wb_group)
264		return KSFT_FAIL;
265	if (cg_write(wb_group, "memory.zswap.max", "10K"))
266		goto out;
267	control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
268	if (!control_group)
269		goto out;
270
271	/* Push some test_group2 memory into zswap */
272	if (cg_enter_current(control_group))
273		goto out;
274	control_allocation = malloc(control_allocation_size);
275	for (int i = 0; i < control_allocation_size; i += 4095)
276		control_allocation[i] = 'a';
277	if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
278		goto out;
279
280	/* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
281	if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
282		goto out;
283
284	/* Verify that only zswapped memory from gwb_group has been written back */
285	if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
286		ret = KSFT_PASS;
287out:
288	cg_enter_current(root);
289	if (control_group) {
290		cg_destroy(control_group);
291		free(control_group);
292	}
293	cg_destroy(wb_group);
294	free(wb_group);
295	if (control_allocation)
296		free(control_allocation);
297	return ret;
298}
299
300struct no_kmem_bypass_child_args {
301	size_t target_alloc_bytes;
302	size_t child_allocated;
303};
304
305static int no_kmem_bypass_child(const char *cgroup, void *arg)
306{
307	struct no_kmem_bypass_child_args *values = arg;
308	void *allocation;
309
310	allocation = malloc(values->target_alloc_bytes);
311	if (!allocation) {
312		values->child_allocated = true;
313		return -1;
314	}
315	for (long i = 0; i < values->target_alloc_bytes; i += 4095)
316		((char *)allocation)[i] = 'a';
317	values->child_allocated = true;
318	pause();
319	free(allocation);
320	return 0;
321}
322
323/*
324 * When pages owned by a memcg are pushed to zswap by kswapd, they should be
325 * charged to that cgroup. This wasn't the case before commit
326 * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
327 *
328 * The test first allocates memory in a memcg, then raises min_free_kbytes to
329 * a very high value so that the allocation falls below low wm, then makes
330 * another allocation to trigger kswapd that should push the memcg-owned pages
331 * to zswap and verifies that the zswap pages are correctly charged.
332 *
333 * To be run on a VM with at most 4G of memory.
334 */
335static int test_no_kmem_bypass(const char *root)
336{
337	size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
338	struct no_kmem_bypass_child_args *values;
339	size_t trigger_allocation_size;
340	int wait_child_iteration = 0;
341	long stored_pages_threshold;
342	struct sysinfo sys_info;
343	int ret = KSFT_FAIL;
344	int child_status;
345	char *test_group;
346	pid_t child_pid;
347
348	/* Read sys info and compute test values accordingly */
349	if (sysinfo(&sys_info) != 0)
350		return KSFT_FAIL;
351	if (sys_info.totalram > 5000000000)
352		return KSFT_SKIP;
353	values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
354			PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
355	if (values == MAP_FAILED)
356		return KSFT_FAIL;
357	if (read_min_free_kb(&min_free_kb_original))
358		return KSFT_FAIL;
359	min_free_kb_high = sys_info.totalram / 2000;
360	min_free_kb_low = sys_info.totalram / 500000;
361	values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
362		sys_info.totalram * 5 / 100;
363	stored_pages_threshold = sys_info.totalram / 5 / 4096;
364	trigger_allocation_size = sys_info.totalram / 20;
365
366	/* Set up test memcg */
367	if (cg_write(root, "cgroup.subtree_control", "+memory"))
368		goto out;
369	test_group = cg_name(root, "kmem_bypass_test");
370	if (!test_group)
371		goto out;
372
373	/* Spawn memcg child and wait for it to allocate */
374	set_min_free_kb(min_free_kb_low);
375	if (cg_create(test_group))
376		goto out;
377	values->child_allocated = false;
378	child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
379	if (child_pid < 0)
380		goto out;
381	while (!values->child_allocated && wait_child_iteration++ < 10000)
382		usleep(1000);
383
384	/* Try to wakeup kswapd and let it push child memory to zswap */
385	set_min_free_kb(min_free_kb_high);
386	for (int i = 0; i < 20; i++) {
387		size_t stored_pages;
388		char *trigger_allocation = malloc(trigger_allocation_size);
389
390		if (!trigger_allocation)
391			break;
392		for (int i = 0; i < trigger_allocation_size; i += 4095)
393			trigger_allocation[i] = 'b';
394		usleep(100000);
395		free(trigger_allocation);
396		if (get_zswap_stored_pages(&stored_pages))
397			break;
398		if (stored_pages < 0)
399			break;
400		/* If memory was pushed to zswap, verify it belongs to memcg */
401		if (stored_pages > stored_pages_threshold) {
402			int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
403			int delta = stored_pages * 4096 - zswapped;
404			int result_ok = delta < stored_pages * 4096 / 4;
405
406			ret = result_ok ? KSFT_PASS : KSFT_FAIL;
407			break;
408		}
409	}
410
411	kill(child_pid, SIGTERM);
412	waitpid(child_pid, &child_status, 0);
413out:
414	set_min_free_kb(min_free_kb_original);
415	cg_destroy(test_group);
416	free(test_group);
417	return ret;
418}
419
420#define T(x) { x, #x }
421struct zswap_test {
422	int (*fn)(const char *root);
423	const char *name;
424} tests[] = {
425	T(test_zswap_usage),
426	T(test_swapin_nozswap),
427	T(test_zswapin),
428	T(test_no_kmem_bypass),
429	T(test_no_invasive_cgroup_shrink),
430};
431#undef T
432
433static bool zswap_configured(void)
434{
435	return access("/sys/module/zswap", F_OK) == 0;
436}
437
438int main(int argc, char **argv)
439{
440	char root[PATH_MAX];
441	int i, ret = EXIT_SUCCESS;
442
443	if (cg_find_unified_root(root, sizeof(root)))
444		ksft_exit_skip("cgroup v2 isn't mounted\n");
445
446	if (!zswap_configured())
447		ksft_exit_skip("zswap isn't configured\n");
448
449	/*
450	 * Check that memory controller is available:
451	 * memory is listed in cgroup.controllers
452	 */
453	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
454		ksft_exit_skip("memory controller isn't available\n");
455
456	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
457		if (cg_write(root, "cgroup.subtree_control", "+memory"))
458			ksft_exit_skip("Failed to set memory controller\n");
459
460	for (i = 0; i < ARRAY_SIZE(tests); i++) {
461		switch (tests[i].fn(root)) {
462		case KSFT_PASS:
463			ksft_test_result_pass("%s\n", tests[i].name);
464			break;
465		case KSFT_SKIP:
466			ksft_test_result_skip("%s\n", tests[i].name);
467			break;
468		default:
469			ret = EXIT_FAILURE;
470			ksft_test_result_fail("%s\n", tests[i].name);
471			break;
472		}
473	}
474
475	return ret;
476}
477