1139823Simp// SPDX-License-Identifier: GPL-2.0
265534Salfred#include <linux/swap_cgroup.h>
365534Salfred#include <linux/vmalloc.h>
461837Salfred#include <linux/mm.h>
561837Salfred
661837Salfred#include <linux/swapops.h> /* depends on mm.h include */
761837Salfred
861837Salfredstatic DEFINE_MUTEX(swap_cgroup_mutex);
961837Salfredstruct swap_cgroup_ctrl {
1061837Salfred	struct page **map;
1161837Salfred	unsigned long length;
1261837Salfred	spinlock_t	lock;
1361837Salfred};
1461837Salfred
1561837Salfredstatic struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
1661837Salfred
1761837Salfredstruct swap_cgroup {
1861837Salfred	unsigned short		id;
1961837Salfred};
2061837Salfred#define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
2161837Salfred
2261837Salfred/*
2361837Salfred * SwapCgroup implements "lookup" and "exchange" operations.
2461837Salfred * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
2561837Salfred * against SwapCache. At swap_free(), this is accessed directly from swap.
2661837Salfred *
2761837Salfred * This means,
28172467Ssilby *  - we have no race in "exchange" when we're accessed via SwapCache because
29172467Ssilby *    SwapCache(and its swp_entry) is under lock.
30172467Ssilby *  - When called via swap_free(), there is no user of this entry and no race.
3161837Salfred * Then, we don't need lock around "exchange".
3261837Salfred *
3361837Salfred * TODO: we can push these buffers out to HIGHMEM.
3461837Salfred */
3595759Stanimura
36129880Sphk/*
3795759Stanimura * allocate buffer for swap_cgroup.
3865534Salfred */
3961837Salfredstatic int swap_cgroup_prepare(int type)
4061837Salfred{
4165534Salfred	struct page *page;
4261837Salfred	struct swap_cgroup_ctrl *ctrl;
4365534Salfred	unsigned long idx, max;
4465534Salfred
4565534Salfred	ctrl = &swap_cgroup_ctrl[type];
4661837Salfred
4765534Salfred	for (idx = 0; idx < ctrl->length; idx++) {
4865534Salfred		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
4965534Salfred		if (!page)
5065534Salfred			goto not_enough_page;
5165534Salfred		ctrl->map[idx] = page;
5265534Salfred
5365534Salfred		if (!(idx % SWAP_CLUSTER_MAX))
5461837Salfred			cond_resched();
5561837Salfred	}
5661837Salfred	return 0;
5761837Salfrednot_enough_page:
5861837Salfred	max = idx;
5961837Salfred	for (idx = 0; idx < max; idx++)
6061837Salfred		__free_page(ctrl->map[idx]);
6161837Salfred
6261837Salfred	return -ENOMEM;
6361837Salfred}
6461837Salfred
6561837Salfredstatic struct swap_cgroup *__lookup_swap_cgroup(struct swap_cgroup_ctrl *ctrl,
6661837Salfred						pgoff_t offset)
6761837Salfred{
6861837Salfred	struct page *mappage;
6961837Salfred	struct swap_cgroup *sc;
7065534Salfred
7161837Salfred	mappage = ctrl->map[offset / SC_PER_PAGE];
7265534Salfred	sc = page_address(mappage);
7365534Salfred	return sc + offset % SC_PER_PAGE;
7465534Salfred}
7565534Salfred
7665534Salfredstatic struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
7765534Salfred					struct swap_cgroup_ctrl **ctrlp)
7865534Salfred{
7995865Salfred	pgoff_t offset = swp_offset(ent);
8095865Salfred	struct swap_cgroup_ctrl *ctrl;
8195865Salfred
8265534Salfred	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
8365534Salfred	if (ctrlp)
8465534Salfred		*ctrlp = ctrl;
8565534Salfred	return __lookup_swap_cgroup(ctrl, offset);
8665534Salfred}
8765534Salfred
8865534Salfred/**
8961837Salfred * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
9061837Salfred * @ent: swap entry to be cmpxchged
9195865Salfred * @old: old id
9295865Salfred * @new: new id
9395865Salfred *
9495865Salfred * Returns old id at success, 0 at failure.
9595865Salfred * (There is no mem_cgroup using 0 as its id)
9665534Salfred */
9765534Salfredunsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
9865534Salfred					unsigned short old, unsigned short new)
9965534Salfred{
10065534Salfred	struct swap_cgroup_ctrl *ctrl;
10165534Salfred	struct swap_cgroup *sc;
10265534Salfred	unsigned long flags;
10365534Salfred	unsigned short retval;
10465534Salfred
10565534Salfred	sc = lookup_swap_cgroup(ent, &ctrl);
10665534Salfred
10795865Salfred	spin_lock_irqsave(&ctrl->lock, flags);
10865534Salfred	retval = sc->id;
10965534Salfred	if (retval == old)
11065534Salfred		sc->id = new;
11165534Salfred	else
11265534Salfred		retval = 0;
11395865Salfred	spin_unlock_irqrestore(&ctrl->lock, flags);
11465534Salfred	return retval;
11595865Salfred}
11665534Salfred
11765534Salfred/**
11895867Salfred * swap_cgroup_record - record mem_cgroup for a set of swap entries
11995867Salfred * @ent: the first swap entry to be recorded into
12065534Salfred * @id: mem_cgroup to be recorded
12161837Salfred * @nr_ents: number of swap entries to be recorded
12261837Salfred *
12365534Salfred * Returns old value at success, 0 at failure.
12461837Salfred * (Of course, old value can be 0.)
12561837Salfred */
12665534Salfredunsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
12765534Salfred				  unsigned int nr_ents)
12865534Salfred{
12965534Salfred	struct swap_cgroup_ctrl *ctrl;
13065534Salfred	struct swap_cgroup *sc;
13165534Salfred	unsigned short old;
13265534Salfred	unsigned long flags;
13365534Salfred	pgoff_t offset = swp_offset(ent);
13465534Salfred	pgoff_t end = offset + nr_ents;
13565534Salfred
13695865Salfred	sc = lookup_swap_cgroup(ent, &ctrl);
13765534Salfred
13865534Salfred	spin_lock_irqsave(&ctrl->lock, flags);
13965534Salfred	old = sc->id;
14065534Salfred	for (;;) {
14165534Salfred		VM_BUG_ON(sc->id != old);
14295865Salfred		sc->id = id;
14365534Salfred		offset++;
14495865Salfred		if (offset == end)
14565534Salfred			break;
14665534Salfred		if (offset % SC_PER_PAGE)
14795867Salfred			sc++;
14895867Salfred		else
14965534Salfred			sc = __lookup_swap_cgroup(ctrl, offset);
15065534Salfred	}
15165534Salfred	spin_unlock_irqrestore(&ctrl->lock, flags);
15265534Salfred
15365534Salfred	return old;
15465534Salfred}
15595865Salfred
15695865Salfred/**
15795865Salfred * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
15895865Salfred * @ent: swap entry to be looked up.
15965534Salfred *
16065534Salfred * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
16161837Salfred */
16261837Salfredunsigned short lookup_swap_cgroup_id(swp_entry_t ent)
16361837Salfred{
16461837Salfred	return lookup_swap_cgroup(ent, NULL)->id;
165130480Srwatson}
16661837Salfred
16765534Salfredint swap_cgroup_swapon(int type, unsigned long max_pages)
16865534Salfred{
16961837Salfred	void *array;
17061837Salfred	unsigned long length;
17165534Salfred	struct swap_cgroup_ctrl *ctrl;
17265534Salfred
17361837Salfred	if (mem_cgroup_disabled())
17465534Salfred		return 0;
17565534Salfred
17665534Salfred	length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
17765534Salfred
17865534Salfred	array = vcalloc(length, sizeof(void *));
17965534Salfred	if (!array)
18065534Salfred		goto nomem;
18165534Salfred
18265534Salfred	ctrl = &swap_cgroup_ctrl[type];
18361837Salfred	mutex_lock(&swap_cgroup_mutex);
18465534Salfred	ctrl->length = length;
18565534Salfred	ctrl->map = array;
18665534Salfred	spin_lock_init(&ctrl->lock);
18765534Salfred	if (swap_cgroup_prepare(type)) {
18865534Salfred		/* memory shortage */
18965534Salfred		ctrl->map = NULL;
19065534Salfred		ctrl->length = 0;
19165534Salfred		mutex_unlock(&swap_cgroup_mutex);
19265534Salfred		vfree(array);
19365534Salfred		goto nomem;
19465534Salfred	}
19565534Salfred	mutex_unlock(&swap_cgroup_mutex);
19665534Salfred
19765534Salfred	return 0;
19865534Salfrednomem:
19965534Salfred	pr_info("couldn't allocate enough memory for swap_cgroup\n");
20065534Salfred	pr_info("swap_cgroup can be disabled by swapaccount=0 boot option\n");
20165534Salfred	return -ENOMEM;
20297658Stanimura}
20361837Salfred
20465534Salfredvoid swap_cgroup_swapoff(int type)
20565534Salfred{
20661837Salfred	struct page **map;
20761837Salfred	unsigned long i, length;
20898385Stanimura	struct swap_cgroup_ctrl *ctrl;
20961837Salfred
21061837Salfred	if (mem_cgroup_disabled())
21161837Salfred		return;
21261837Salfred
21365534Salfred	mutex_lock(&swap_cgroup_mutex);
21465534Salfred	ctrl = &swap_cgroup_ctrl[type];
21565534Salfred	map = ctrl->map;
21665534Salfred	length = ctrl->length;
21765534Salfred	ctrl->map = NULL;
218130480Srwatson	ctrl->length = 0;
21965534Salfred	mutex_unlock(&swap_cgroup_mutex);
22065534Salfred
22165534Salfred	if (map) {
22265534Salfred		for (i = 0; i < length; i++) {
22365534Salfred			struct page *page = map[i];
22465534Salfred			if (page)
22565534Salfred				__free_page(page);
22665534Salfred			if (!(i % SWAP_CLUSTER_MAX))
22765534Salfred				cond_resched();
22865534Salfred		}
22965534Salfred		vfree(map);
23095865Salfred	}
23165534Salfred}
23265534Salfred