1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Manage cache of swap slots to be used for and returned from
4 * swap.
5 *
6 * Copyright(c) 2016 Intel Corporation.
7 *
8 * Author: Tim Chen <tim.c.chen@linux.intel.com>
9 *
10 * We allocate the swap slots from the global pool and put
11 * it into local per cpu caches.  This has the advantage
12 * of no needing to acquire the swap_info lock every time
13 * we need a new slot.
14 *
15 * There is also opportunity to simply return the slot
16 * to local caches without needing to acquire swap_info
17 * lock.  We do not reuse the returned slots directly but
18 * move them back to the global pool in a batch.  This
19 * allows the slots to coalesce and reduce fragmentation.
20 *
21 * The swap entry allocated is marked with SWAP_HAS_CACHE
22 * flag in map_count that prevents it from being allocated
23 * again from the global pool.
24 *
25 * The swap slots cache is protected by a mutex instead of
26 * a spin lock as when we search for slots with scan_swap_map,
27 * we can possibly sleep.
28 */
29
30#include <linux/swap_slots.h>
31#include <linux/cpu.h>
32#include <linux/cpumask.h>
33#include <linux/slab.h>
34#include <linux/vmalloc.h>
35#include <linux/mutex.h>
36#include <linux/mm.h>
37
38static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
39static bool	swap_slot_cache_active;
40bool	swap_slot_cache_enabled;
41static bool	swap_slot_cache_initialized;
42static DEFINE_MUTEX(swap_slots_cache_mutex);
43/* Serialize swap slots cache enable/disable operations */
44static DEFINE_MUTEX(swap_slots_cache_enable_mutex);
45
46static void __drain_swap_slots_cache(unsigned int type);
47
48#define use_swap_slot_cache (swap_slot_cache_active && swap_slot_cache_enabled)
49#define SLOTS_CACHE 0x1
50#define SLOTS_CACHE_RET 0x2
51
52static void deactivate_swap_slots_cache(void)
53{
54	mutex_lock(&swap_slots_cache_mutex);
55	swap_slot_cache_active = false;
56	__drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
57	mutex_unlock(&swap_slots_cache_mutex);
58}
59
60static void reactivate_swap_slots_cache(void)
61{
62	mutex_lock(&swap_slots_cache_mutex);
63	swap_slot_cache_active = true;
64	mutex_unlock(&swap_slots_cache_mutex);
65}
66
67/* Must not be called with cpu hot plug lock */
68void disable_swap_slots_cache_lock(void)
69{
70	mutex_lock(&swap_slots_cache_enable_mutex);
71	swap_slot_cache_enabled = false;
72	if (swap_slot_cache_initialized) {
73		/* serialize with cpu hotplug operations */
74		cpus_read_lock();
75		__drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
76		cpus_read_unlock();
77	}
78}
79
80static void __reenable_swap_slots_cache(void)
81{
82	swap_slot_cache_enabled = has_usable_swap();
83}
84
85void reenable_swap_slots_cache_unlock(void)
86{
87	__reenable_swap_slots_cache();
88	mutex_unlock(&swap_slots_cache_enable_mutex);
89}
90
91static bool check_cache_active(void)
92{
93	long pages;
94
95	if (!swap_slot_cache_enabled)
96		return false;
97
98	pages = get_nr_swap_pages();
99	if (!swap_slot_cache_active) {
100		if (pages > num_online_cpus() *
101		    THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE)
102			reactivate_swap_slots_cache();
103		goto out;
104	}
105
106	/* if global pool of slot caches too low, deactivate cache */
107	if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE)
108		deactivate_swap_slots_cache();
109out:
110	return swap_slot_cache_active;
111}
112
113static int alloc_swap_slot_cache(unsigned int cpu)
114{
115	struct swap_slots_cache *cache;
116	swp_entry_t *slots, *slots_ret;
117
118	/*
119	 * Do allocation outside swap_slots_cache_mutex
120	 * as kvzalloc could trigger reclaim and folio_alloc_swap,
121	 * which can lock swap_slots_cache_mutex.
122	 */
123	slots = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
124			 GFP_KERNEL);
125	if (!slots)
126		return -ENOMEM;
127
128	slots_ret = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
129			     GFP_KERNEL);
130	if (!slots_ret) {
131		kvfree(slots);
132		return -ENOMEM;
133	}
134
135	mutex_lock(&swap_slots_cache_mutex);
136	cache = &per_cpu(swp_slots, cpu);
137	if (cache->slots || cache->slots_ret) {
138		/* cache already allocated */
139		mutex_unlock(&swap_slots_cache_mutex);
140
141		kvfree(slots);
142		kvfree(slots_ret);
143
144		return 0;
145	}
146
147	if (!cache->lock_initialized) {
148		mutex_init(&cache->alloc_lock);
149		spin_lock_init(&cache->free_lock);
150		cache->lock_initialized = true;
151	}
152	cache->nr = 0;
153	cache->cur = 0;
154	cache->n_ret = 0;
155	/*
156	 * We initialized alloc_lock and free_lock earlier.  We use
157	 * !cache->slots or !cache->slots_ret to know if it is safe to acquire
158	 * the corresponding lock and use the cache.  Memory barrier below
159	 * ensures the assumption.
160	 */
161	mb();
162	cache->slots = slots;
163	cache->slots_ret = slots_ret;
164	mutex_unlock(&swap_slots_cache_mutex);
165	return 0;
166}
167
168static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
169				  bool free_slots)
170{
171	struct swap_slots_cache *cache;
172	swp_entry_t *slots = NULL;
173
174	cache = &per_cpu(swp_slots, cpu);
175	if ((type & SLOTS_CACHE) && cache->slots) {
176		mutex_lock(&cache->alloc_lock);
177		swapcache_free_entries(cache->slots + cache->cur, cache->nr);
178		cache->cur = 0;
179		cache->nr = 0;
180		if (free_slots && cache->slots) {
181			kvfree(cache->slots);
182			cache->slots = NULL;
183		}
184		mutex_unlock(&cache->alloc_lock);
185	}
186	if ((type & SLOTS_CACHE_RET) && cache->slots_ret) {
187		spin_lock_irq(&cache->free_lock);
188		swapcache_free_entries(cache->slots_ret, cache->n_ret);
189		cache->n_ret = 0;
190		if (free_slots && cache->slots_ret) {
191			slots = cache->slots_ret;
192			cache->slots_ret = NULL;
193		}
194		spin_unlock_irq(&cache->free_lock);
195		kvfree(slots);
196	}
197}
198
199static void __drain_swap_slots_cache(unsigned int type)
200{
201	unsigned int cpu;
202
203	/*
204	 * This function is called during
205	 *	1) swapoff, when we have to make sure no
206	 *	   left over slots are in cache when we remove
207	 *	   a swap device;
208	 *      2) disabling of swap slot cache, when we run low
209	 *	   on swap slots when allocating memory and need
210	 *	   to return swap slots to global pool.
211	 *
212	 * We cannot acquire cpu hot plug lock here as
213	 * this function can be invoked in the cpu
214	 * hot plug path:
215	 * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback
216	 *   -> memory allocation -> direct reclaim -> folio_alloc_swap
217	 *   -> drain_swap_slots_cache
218	 *
219	 * Hence the loop over current online cpu below could miss cpu that
220	 * is being brought online but not yet marked as online.
221	 * That is okay as we do not schedule and run anything on a
222	 * cpu before it has been marked online. Hence, we will not
223	 * fill any swap slots in slots cache of such cpu.
224	 * There are no slots on such cpu that need to be drained.
225	 */
226	for_each_online_cpu(cpu)
227		drain_slots_cache_cpu(cpu, type, false);
228}
229
230static int free_slot_cache(unsigned int cpu)
231{
232	mutex_lock(&swap_slots_cache_mutex);
233	drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true);
234	mutex_unlock(&swap_slots_cache_mutex);
235	return 0;
236}
237
238void enable_swap_slots_cache(void)
239{
240	mutex_lock(&swap_slots_cache_enable_mutex);
241	if (!swap_slot_cache_initialized) {
242		int ret;
243
244		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache",
245					alloc_swap_slot_cache, free_slot_cache);
246		if (WARN_ONCE(ret < 0, "Cache allocation failed (%s), operating "
247				       "without swap slots cache.\n", __func__))
248			goto out_unlock;
249
250		swap_slot_cache_initialized = true;
251	}
252
253	__reenable_swap_slots_cache();
254out_unlock:
255	mutex_unlock(&swap_slots_cache_enable_mutex);
256}
257
258/* called with swap slot cache's alloc lock held */
259static int refill_swap_slots_cache(struct swap_slots_cache *cache)
260{
261	if (!use_swap_slot_cache)
262		return 0;
263
264	cache->cur = 0;
265	if (swap_slot_cache_active)
266		cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
267					   cache->slots, 1);
268
269	return cache->nr;
270}
271
272void free_swap_slot(swp_entry_t entry)
273{
274	struct swap_slots_cache *cache;
275
276	/* Large folio swap slot is not covered. */
277	zswap_invalidate(entry);
278
279	cache = raw_cpu_ptr(&swp_slots);
280	if (likely(use_swap_slot_cache && cache->slots_ret)) {
281		spin_lock_irq(&cache->free_lock);
282		/* Swap slots cache may be deactivated before acquiring lock */
283		if (!use_swap_slot_cache || !cache->slots_ret) {
284			spin_unlock_irq(&cache->free_lock);
285			goto direct_free;
286		}
287		if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) {
288			/*
289			 * Return slots to global pool.
290			 * The current swap_map value is SWAP_HAS_CACHE.
291			 * Set it to 0 to indicate it is available for
292			 * allocation in global pool
293			 */
294			swapcache_free_entries(cache->slots_ret, cache->n_ret);
295			cache->n_ret = 0;
296		}
297		cache->slots_ret[cache->n_ret++] = entry;
298		spin_unlock_irq(&cache->free_lock);
299	} else {
300direct_free:
301		swapcache_free_entries(&entry, 1);
302	}
303}
304
305swp_entry_t folio_alloc_swap(struct folio *folio)
306{
307	swp_entry_t entry;
308	struct swap_slots_cache *cache;
309
310	entry.val = 0;
311
312	if (folio_test_large(folio)) {
313		if (IS_ENABLED(CONFIG_THP_SWAP) && arch_thp_swp_supported())
314			get_swap_pages(1, &entry, folio_nr_pages(folio));
315		goto out;
316	}
317
318	/*
319	 * Preemption is allowed here, because we may sleep
320	 * in refill_swap_slots_cache().  But it is safe, because
321	 * accesses to the per-CPU data structure are protected by the
322	 * mutex cache->alloc_lock.
323	 *
324	 * The alloc path here does not touch cache->slots_ret
325	 * so cache->free_lock is not taken.
326	 */
327	cache = raw_cpu_ptr(&swp_slots);
328
329	if (likely(check_cache_active() && cache->slots)) {
330		mutex_lock(&cache->alloc_lock);
331		if (cache->slots) {
332repeat:
333			if (cache->nr) {
334				entry = cache->slots[cache->cur];
335				cache->slots[cache->cur++].val = 0;
336				cache->nr--;
337			} else if (refill_swap_slots_cache(cache)) {
338				goto repeat;
339			}
340		}
341		mutex_unlock(&cache->alloc_lock);
342		if (entry.val)
343			goto out;
344	}
345
346	get_swap_pages(1, &entry, 1);
347out:
348	if (mem_cgroup_try_charge_swap(folio, entry)) {
349		put_swap_folio(folio, entry);
350		entry.val = 0;
351	}
352	return entry;
353}
354