1// SPDX-License-Identifier: GPL-2.0-or-later
2/* Volume-level cache cookie handling.
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#define FSCACHE_DEBUG_LEVEL COOKIE
9#include <linux/export.h>
10#include <linux/slab.h>
11#include "internal.h"
12
13#define fscache_volume_hash_shift 10
14static struct hlist_bl_head fscache_volume_hash[1 << fscache_volume_hash_shift];
15static atomic_t fscache_volume_debug_id;
16static LIST_HEAD(fscache_volumes);
17
18static void fscache_create_volume_work(struct work_struct *work);
19
20struct fscache_volume *fscache_get_volume(struct fscache_volume *volume,
21					  enum fscache_volume_trace where)
22{
23	int ref;
24
25	__refcount_inc(&volume->ref, &ref);
26	trace_fscache_volume(volume->debug_id, ref + 1, where);
27	return volume;
28}
29
30static void fscache_see_volume(struct fscache_volume *volume,
31			       enum fscache_volume_trace where)
32{
33	int ref = refcount_read(&volume->ref);
34
35	trace_fscache_volume(volume->debug_id, ref, where);
36}
37
38/*
39 * Pin the cache behind a volume so that we can access it.
40 */
41static void __fscache_begin_volume_access(struct fscache_volume *volume,
42					  struct fscache_cookie *cookie,
43					  enum fscache_access_trace why)
44{
45	int n_accesses;
46
47	n_accesses = atomic_inc_return(&volume->n_accesses);
48	smp_mb__after_atomic();
49	trace_fscache_access_volume(volume->debug_id, cookie ? cookie->debug_id : 0,
50				    refcount_read(&volume->ref),
51				    n_accesses, why);
52}
53
54/**
55 * fscache_begin_volume_access - Pin a cache so a volume can be accessed
56 * @volume: The volume cookie
57 * @cookie: A datafile cookie for a tracing reference (or NULL)
58 * @why: An indication of the circumstances of the access for tracing
59 *
60 * Attempt to pin the cache to prevent it from going away whilst we're
61 * accessing a volume and returns true if successful.  This works as follows:
62 *
63 *  (1) If the cache tests as not live (state is not FSCACHE_CACHE_IS_ACTIVE),
64 *      then we return false to indicate access was not permitted.
65 *
66 *  (2) If the cache tests as live, then we increment the volume's n_accesses
67 *      count and then recheck the cache liveness, ending the access if it
68 *      ceased to be live.
69 *
70 *  (3) When we end the access, we decrement the volume's n_accesses and wake
71 *      up the any waiters if it reaches 0.
72 *
73 *  (4) Whilst the cache is caching, the volume's n_accesses is kept
74 *      artificially incremented to prevent wakeups from happening.
75 *
76 *  (5) When the cache is taken offline, the state is changed to prevent new
77 *      accesses, the volume's n_accesses is decremented and we wait for it to
78 *      become 0.
79 *
80 * The datafile @cookie and the @why indicator are merely provided for tracing
81 * purposes.
82 */
83bool fscache_begin_volume_access(struct fscache_volume *volume,
84				 struct fscache_cookie *cookie,
85				 enum fscache_access_trace why)
86{
87	if (!fscache_cache_is_live(volume->cache))
88		return false;
89	__fscache_begin_volume_access(volume, cookie, why);
90	if (!fscache_cache_is_live(volume->cache)) {
91		fscache_end_volume_access(volume, cookie, fscache_access_unlive);
92		return false;
93	}
94	return true;
95}
96
97/**
98 * fscache_end_volume_access - Unpin a cache at the end of an access.
99 * @volume: The volume cookie
100 * @cookie: A datafile cookie for a tracing reference (or NULL)
101 * @why: An indication of the circumstances of the access for tracing
102 *
103 * Unpin a cache volume after we've accessed it.  The datafile @cookie and the
104 * @why indicator are merely provided for tracing purposes.
105 */
106void fscache_end_volume_access(struct fscache_volume *volume,
107			       struct fscache_cookie *cookie,
108			       enum fscache_access_trace why)
109{
110	int n_accesses;
111
112	smp_mb__before_atomic();
113	n_accesses = atomic_dec_return(&volume->n_accesses);
114	trace_fscache_access_volume(volume->debug_id, cookie ? cookie->debug_id : 0,
115				    refcount_read(&volume->ref),
116				    n_accesses, why);
117	if (n_accesses == 0)
118		wake_up_var(&volume->n_accesses);
119}
120EXPORT_SYMBOL(fscache_end_volume_access);
121
122static bool fscache_volume_same(const struct fscache_volume *a,
123				const struct fscache_volume *b)
124{
125	size_t klen;
126
127	if (a->key_hash	!= b->key_hash ||
128	    a->cache	!= b->cache ||
129	    a->key[0]	!= b->key[0])
130		return false;
131
132	klen = round_up(a->key[0] + 1, sizeof(__le32));
133	return memcmp(a->key, b->key, klen) == 0;
134}
135
136static bool fscache_is_acquire_pending(struct fscache_volume *volume)
137{
138	return test_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &volume->flags);
139}
140
141static void fscache_wait_on_volume_collision(struct fscache_volume *candidate,
142					     unsigned int collidee_debug_id)
143{
144	wait_on_bit_timeout(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING,
145			    TASK_UNINTERRUPTIBLE, 20 * HZ);
146	if (fscache_is_acquire_pending(candidate)) {
147		pr_notice("Potential volume collision new=%08x old=%08x",
148			  candidate->debug_id, collidee_debug_id);
149		fscache_stat(&fscache_n_volumes_collision);
150		wait_on_bit(&candidate->flags, FSCACHE_VOLUME_ACQUIRE_PENDING,
151			    TASK_UNINTERRUPTIBLE);
152	}
153}
154
155/*
156 * Attempt to insert the new volume into the hash.  If there's a collision, we
157 * wait for the old volume to complete if it's being relinquished and an error
158 * otherwise.
159 */
160static bool fscache_hash_volume(struct fscache_volume *candidate)
161{
162	struct fscache_volume *cursor;
163	struct hlist_bl_head *h;
164	struct hlist_bl_node *p;
165	unsigned int bucket, collidee_debug_id = 0;
166
167	bucket = candidate->key_hash & (ARRAY_SIZE(fscache_volume_hash) - 1);
168	h = &fscache_volume_hash[bucket];
169
170	hlist_bl_lock(h);
171	hlist_bl_for_each_entry(cursor, p, h, hash_link) {
172		if (fscache_volume_same(candidate, cursor)) {
173			if (!test_bit(FSCACHE_VOLUME_RELINQUISHED, &cursor->flags))
174				goto collision;
175			fscache_see_volume(cursor, fscache_volume_get_hash_collision);
176			set_bit(FSCACHE_VOLUME_COLLIDED_WITH, &cursor->flags);
177			set_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &candidate->flags);
178			collidee_debug_id = cursor->debug_id;
179			break;
180		}
181	}
182
183	hlist_bl_add_head(&candidate->hash_link, h);
184	hlist_bl_unlock(h);
185
186	if (fscache_is_acquire_pending(candidate))
187		fscache_wait_on_volume_collision(candidate, collidee_debug_id);
188	return true;
189
190collision:
191	fscache_see_volume(cursor, fscache_volume_collision);
192	hlist_bl_unlock(h);
193	return false;
194}
195
196/*
197 * Allocate and initialise a volume representation cookie.
198 */
199static struct fscache_volume *fscache_alloc_volume(const char *volume_key,
200						   const char *cache_name,
201						   const void *coherency_data,
202						   size_t coherency_len)
203{
204	struct fscache_volume *volume;
205	struct fscache_cache *cache;
206	size_t klen, hlen;
207	u8 *key;
208
209	klen = strlen(volume_key);
210	if (klen > NAME_MAX)
211		return NULL;
212
213	if (!coherency_data)
214		coherency_len = 0;
215
216	cache = fscache_lookup_cache(cache_name, false);
217	if (IS_ERR(cache))
218		return NULL;
219
220	volume = kzalloc(struct_size(volume, coherency, coherency_len),
221			 GFP_KERNEL);
222	if (!volume)
223		goto err_cache;
224
225	volume->cache = cache;
226	volume->coherency_len = coherency_len;
227	if (coherency_data)
228		memcpy(volume->coherency, coherency_data, coherency_len);
229	INIT_LIST_HEAD(&volume->proc_link);
230	INIT_WORK(&volume->work, fscache_create_volume_work);
231	refcount_set(&volume->ref, 1);
232	spin_lock_init(&volume->lock);
233
234	/* Stick the length on the front of the key and pad it out to make
235	 * hashing easier.
236	 */
237	hlen = round_up(1 + klen + 1, sizeof(__le32));
238	key = kzalloc(hlen, GFP_KERNEL);
239	if (!key)
240		goto err_vol;
241	key[0] = klen;
242	memcpy(key + 1, volume_key, klen);
243
244	volume->key = key;
245	volume->key_hash = fscache_hash(0, key, hlen);
246
247	volume->debug_id = atomic_inc_return(&fscache_volume_debug_id);
248	down_write(&fscache_addremove_sem);
249	atomic_inc(&cache->n_volumes);
250	list_add_tail(&volume->proc_link, &fscache_volumes);
251	fscache_see_volume(volume, fscache_volume_new_acquire);
252	fscache_stat(&fscache_n_volumes);
253	up_write(&fscache_addremove_sem);
254	_leave(" = v=%x", volume->debug_id);
255	return volume;
256
257err_vol:
258	kfree(volume);
259err_cache:
260	fscache_put_cache(cache, fscache_cache_put_alloc_volume);
261	fscache_stat(&fscache_n_volumes_nomem);
262	return NULL;
263}
264
265/*
266 * Create a volume's representation on disk.  Have a volume ref and a cache
267 * access we have to release.
268 */
269static void fscache_create_volume_work(struct work_struct *work)
270{
271	const struct fscache_cache_ops *ops;
272	struct fscache_volume *volume =
273		container_of(work, struct fscache_volume, work);
274
275	fscache_see_volume(volume, fscache_volume_see_create_work);
276
277	ops = volume->cache->ops;
278	if (ops->acquire_volume)
279		ops->acquire_volume(volume);
280	fscache_end_cache_access(volume->cache,
281				 fscache_access_acquire_volume_end);
282
283	clear_and_wake_up_bit(FSCACHE_VOLUME_CREATING, &volume->flags);
284	fscache_put_volume(volume, fscache_volume_put_create_work);
285}
286
287/*
288 * Dispatch a worker thread to create a volume's representation on disk.
289 */
290void fscache_create_volume(struct fscache_volume *volume, bool wait)
291{
292	if (test_and_set_bit(FSCACHE_VOLUME_CREATING, &volume->flags))
293		goto maybe_wait;
294	if (volume->cache_priv)
295		goto no_wait; /* We raced */
296	if (!fscache_begin_cache_access(volume->cache,
297					fscache_access_acquire_volume))
298		goto no_wait;
299
300	fscache_get_volume(volume, fscache_volume_get_create_work);
301	if (!schedule_work(&volume->work))
302		fscache_put_volume(volume, fscache_volume_put_create_work);
303
304maybe_wait:
305	if (wait) {
306		fscache_see_volume(volume, fscache_volume_wait_create_work);
307		wait_on_bit(&volume->flags, FSCACHE_VOLUME_CREATING,
308			    TASK_UNINTERRUPTIBLE);
309	}
310	return;
311no_wait:
312	clear_bit_unlock(FSCACHE_VOLUME_CREATING, &volume->flags);
313	wake_up_bit(&volume->flags, FSCACHE_VOLUME_CREATING);
314}
315
316/*
317 * Acquire a volume representation cookie and link it to a (proposed) cache.
318 */
319struct fscache_volume *__fscache_acquire_volume(const char *volume_key,
320						const char *cache_name,
321						const void *coherency_data,
322						size_t coherency_len)
323{
324	struct fscache_volume *volume;
325
326	volume = fscache_alloc_volume(volume_key, cache_name,
327				      coherency_data, coherency_len);
328	if (!volume)
329		return ERR_PTR(-ENOMEM);
330
331	if (!fscache_hash_volume(volume)) {
332		fscache_put_volume(volume, fscache_volume_put_hash_collision);
333		return ERR_PTR(-EBUSY);
334	}
335
336	fscache_create_volume(volume, false);
337	return volume;
338}
339EXPORT_SYMBOL(__fscache_acquire_volume);
340
341static void fscache_wake_pending_volume(struct fscache_volume *volume,
342					struct hlist_bl_head *h)
343{
344	struct fscache_volume *cursor;
345	struct hlist_bl_node *p;
346
347	hlist_bl_for_each_entry(cursor, p, h, hash_link) {
348		if (fscache_volume_same(cursor, volume)) {
349			fscache_see_volume(cursor, fscache_volume_see_hash_wake);
350			clear_and_wake_up_bit(FSCACHE_VOLUME_ACQUIRE_PENDING,
351					      &cursor->flags);
352			return;
353		}
354	}
355}
356
357/*
358 * Remove a volume cookie from the hash table.
359 */
360static void fscache_unhash_volume(struct fscache_volume *volume)
361{
362	struct hlist_bl_head *h;
363	unsigned int bucket;
364
365	bucket = volume->key_hash & (ARRAY_SIZE(fscache_volume_hash) - 1);
366	h = &fscache_volume_hash[bucket];
367
368	hlist_bl_lock(h);
369	hlist_bl_del(&volume->hash_link);
370	if (test_bit(FSCACHE_VOLUME_COLLIDED_WITH, &volume->flags))
371		fscache_wake_pending_volume(volume, h);
372	hlist_bl_unlock(h);
373}
374
375/*
376 * Drop a cache's volume attachments.
377 */
378static void fscache_free_volume(struct fscache_volume *volume)
379{
380	struct fscache_cache *cache = volume->cache;
381
382	if (volume->cache_priv) {
383		__fscache_begin_volume_access(volume, NULL,
384					      fscache_access_relinquish_volume);
385		if (volume->cache_priv)
386			cache->ops->free_volume(volume);
387		fscache_end_volume_access(volume, NULL,
388					  fscache_access_relinquish_volume_end);
389	}
390
391	down_write(&fscache_addremove_sem);
392	list_del_init(&volume->proc_link);
393	atomic_dec(&volume->cache->n_volumes);
394	up_write(&fscache_addremove_sem);
395
396	if (!hlist_bl_unhashed(&volume->hash_link))
397		fscache_unhash_volume(volume);
398
399	trace_fscache_volume(volume->debug_id, 0, fscache_volume_free);
400	kfree(volume->key);
401	kfree(volume);
402	fscache_stat_d(&fscache_n_volumes);
403	fscache_put_cache(cache, fscache_cache_put_volume);
404}
405
406/*
407 * Drop a reference to a volume cookie.
408 */
409void fscache_put_volume(struct fscache_volume *volume,
410			enum fscache_volume_trace where)
411{
412	if (volume) {
413		unsigned int debug_id = volume->debug_id;
414		bool zero;
415		int ref;
416
417		zero = __refcount_dec_and_test(&volume->ref, &ref);
418		trace_fscache_volume(debug_id, ref - 1, where);
419		if (zero)
420			fscache_free_volume(volume);
421	}
422}
423
424/*
425 * Relinquish a volume representation cookie.
426 */
427void __fscache_relinquish_volume(struct fscache_volume *volume,
428				 const void *coherency_data,
429				 bool invalidate)
430{
431	if (WARN_ON(test_and_set_bit(FSCACHE_VOLUME_RELINQUISHED, &volume->flags)))
432		return;
433
434	if (invalidate) {
435		set_bit(FSCACHE_VOLUME_INVALIDATE, &volume->flags);
436	} else if (coherency_data) {
437		memcpy(volume->coherency, coherency_data, volume->coherency_len);
438	}
439
440	fscache_put_volume(volume, fscache_volume_put_relinquish);
441}
442EXPORT_SYMBOL(__fscache_relinquish_volume);
443
444/**
445 * fscache_withdraw_volume - Withdraw a volume from being cached
446 * @volume: Volume cookie
447 *
448 * Withdraw a cache volume from service, waiting for all accesses to complete
449 * before returning.
450 */
451void fscache_withdraw_volume(struct fscache_volume *volume)
452{
453	int n_accesses;
454
455	_debug("withdraw V=%x", volume->debug_id);
456
457	/* Allow wakeups on dec-to-0 */
458	n_accesses = atomic_dec_return(&volume->n_accesses);
459	trace_fscache_access_volume(volume->debug_id, 0,
460				    refcount_read(&volume->ref),
461				    n_accesses, fscache_access_cache_unpin);
462
463	wait_var_event(&volume->n_accesses,
464		       atomic_read(&volume->n_accesses) == 0);
465}
466EXPORT_SYMBOL(fscache_withdraw_volume);
467
468#ifdef CONFIG_PROC_FS
469/*
470 * Generate a list of volumes in /proc/fs/fscache/volumes
471 */
472static int fscache_volumes_seq_show(struct seq_file *m, void *v)
473{
474	struct fscache_volume *volume;
475
476	if (v == &fscache_volumes) {
477		seq_puts(m,
478			 "VOLUME   REF   nCOOK ACC FL CACHE           KEY\n"
479			 "======== ===== ===== === == =============== ================\n");
480		return 0;
481	}
482
483	volume = list_entry(v, struct fscache_volume, proc_link);
484	seq_printf(m,
485		   "%08x %5d %5d %3d %02lx %-15.15s %s\n",
486		   volume->debug_id,
487		   refcount_read(&volume->ref),
488		   atomic_read(&volume->n_cookies),
489		   atomic_read(&volume->n_accesses),
490		   volume->flags,
491		   volume->cache->name ?: "-",
492		   volume->key + 1);
493	return 0;
494}
495
496static void *fscache_volumes_seq_start(struct seq_file *m, loff_t *_pos)
497	__acquires(&fscache_addremove_sem)
498{
499	down_read(&fscache_addremove_sem);
500	return seq_list_start_head(&fscache_volumes, *_pos);
501}
502
503static void *fscache_volumes_seq_next(struct seq_file *m, void *v, loff_t *_pos)
504{
505	return seq_list_next(v, &fscache_volumes, _pos);
506}
507
508static void fscache_volumes_seq_stop(struct seq_file *m, void *v)
509	__releases(&fscache_addremove_sem)
510{
511	up_read(&fscache_addremove_sem);
512}
513
514const struct seq_operations fscache_volumes_seq_ops = {
515	.start  = fscache_volumes_seq_start,
516	.next   = fscache_volumes_seq_next,
517	.stop   = fscache_volumes_seq_stop,
518	.show   = fscache_volumes_seq_show,
519};
520#endif /* CONFIG_PROC_FS */
521