1// SPDX-License-Identifier: GPL-2.0-or-later
2/* AFS volume management
3 *
4 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#include <linux/kernel.h>
9#include <linux/slab.h>
10#include "internal.h"
11
12static unsigned __read_mostly afs_volume_record_life = 60 * 60;
13
14static void afs_destroy_volume(struct work_struct *work);
15
16/*
17 * Insert a volume into a cell.  If there's an existing volume record, that is
18 * returned instead with a ref held.
19 */
20static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell,
21						      struct afs_volume *volume)
22{
23	struct afs_volume *p;
24	struct rb_node *parent = NULL, **pp;
25
26	write_seqlock(&cell->volume_lock);
27
28	pp = &cell->volumes.rb_node;
29	while (*pp) {
30		parent = *pp;
31		p = rb_entry(parent, struct afs_volume, cell_node);
32		if (p->vid < volume->vid) {
33			pp = &(*pp)->rb_left;
34		} else if (p->vid > volume->vid) {
35			pp = &(*pp)->rb_right;
36		} else {
37			if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) {
38				volume = p;
39				goto found;
40			}
41
42			set_bit(AFS_VOLUME_RM_TREE, &volume->flags);
43			rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes);
44		}
45	}
46
47	rb_link_node_rcu(&volume->cell_node, parent, pp);
48	rb_insert_color(&volume->cell_node, &cell->volumes);
49	hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes);
50
51found:
52	write_sequnlock(&cell->volume_lock);
53	return volume;
54
55}
56
57static void afs_remove_volume_from_cell(struct afs_volume *volume)
58{
59	struct afs_cell *cell = volume->cell;
60
61	if (!hlist_unhashed(&volume->proc_link)) {
62		trace_afs_volume(volume->vid, refcount_read(&cell->ref),
63				 afs_volume_trace_remove);
64		write_seqlock(&cell->volume_lock);
65		hlist_del_rcu(&volume->proc_link);
66		if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags))
67			rb_erase(&volume->cell_node, &cell->volumes);
68		write_sequnlock(&cell->volume_lock);
69	}
70}
71
72/*
73 * Allocate a volume record and load it up from a vldb record.
74 */
75static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
76					   struct afs_vldb_entry *vldb,
77					   struct afs_server_list **_slist)
78{
79	struct afs_server_list *slist;
80	struct afs_volume *volume;
81	int ret = -ENOMEM, i;
82
83	volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
84	if (!volume)
85		goto error_0;
86
87	volume->vid		= vldb->vid[params->type];
88	volume->update_at	= ktime_get_real_seconds() + afs_volume_record_life;
89	volume->cell		= afs_get_cell(params->cell, afs_cell_trace_get_vol);
90	volume->type		= params->type;
91	volume->type_force	= params->force;
92	volume->name_len	= vldb->name_len;
93	volume->creation_time	= TIME64_MIN;
94	volume->update_time	= TIME64_MIN;
95
96	refcount_set(&volume->ref, 1);
97	INIT_HLIST_NODE(&volume->proc_link);
98	INIT_WORK(&volume->destructor, afs_destroy_volume);
99	rwlock_init(&volume->servers_lock);
100	mutex_init(&volume->volsync_lock);
101	mutex_init(&volume->cb_check_lock);
102	rwlock_init(&volume->cb_v_break_lock);
103	INIT_LIST_HEAD(&volume->open_mmaps);
104	init_rwsem(&volume->open_mmaps_lock);
105	memcpy(volume->name, vldb->name, vldb->name_len + 1);
106
107	for (i = 0; i < AFS_MAXTYPES; i++)
108		volume->vids[i] = vldb->vid[i];
109
110	slist = afs_alloc_server_list(volume, params->key, vldb);
111	if (IS_ERR(slist)) {
112		ret = PTR_ERR(slist);
113		goto error_1;
114	}
115
116	*_slist = slist;
117	rcu_assign_pointer(volume->servers, slist);
118	trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc);
119	return volume;
120
121error_1:
122	afs_put_cell(volume->cell, afs_cell_trace_put_vol);
123	kfree(volume);
124error_0:
125	return ERR_PTR(ret);
126}
127
128/*
129 * Look up or allocate a volume record.
130 */
131static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params,
132					    struct afs_vldb_entry *vldb)
133{
134	struct afs_server_list *slist;
135	struct afs_volume *candidate, *volume;
136
137	candidate = afs_alloc_volume(params, vldb, &slist);
138	if (IS_ERR(candidate))
139		return candidate;
140
141	volume = afs_insert_volume_into_cell(params->cell, candidate);
142	if (volume == candidate)
143		afs_attach_volume_to_servers(volume, slist);
144	else
145		afs_put_volume(candidate, afs_volume_trace_put_cell_dup);
146	return volume;
147}
148
149/*
150 * Look up a VLDB record for a volume.
151 */
152static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
153						 struct key *key,
154						 const char *volname,
155						 size_t volnamesz)
156{
157	struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ);
158	struct afs_vl_cursor vc;
159	int ret;
160
161	if (!afs_begin_vlserver_operation(&vc, cell, key))
162		return ERR_PTR(-ERESTARTSYS);
163
164	while (afs_select_vlserver(&vc)) {
165		vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
166	}
167
168	ret = afs_end_vlserver_operation(&vc);
169	return ret < 0 ? ERR_PTR(ret) : vldb;
170}
171
172/*
173 * Look up a volume in the VL server and create a candidate volume record for
174 * it.
175 *
176 * The volume name can be one of the following:
177 *	"%[cell:]volume[.]"		R/W volume
178 *	"#[cell:]volume[.]"		R/O or R/W volume (rwparent=0),
179 *					 or R/W (rwparent=1) volume
180 *	"%[cell:]volume.readonly"	R/O volume
181 *	"#[cell:]volume.readonly"	R/O volume
182 *	"%[cell:]volume.backup"		Backup volume
183 *	"#[cell:]volume.backup"		Backup volume
184 *
185 * The cell name is optional, and defaults to the current cell.
186 *
187 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
188 * Guide
189 * - Rule 1: Explicit type suffix forces access of that type or nothing
190 *           (no suffix, then use Rule 2 & 3)
191 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
192 *           if not available
193 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
194 *           explicitly told otherwise
195 */
196struct afs_volume *afs_create_volume(struct afs_fs_context *params)
197{
198	struct afs_vldb_entry *vldb;
199	struct afs_volume *volume;
200	unsigned long type_mask = 1UL << params->type;
201
202	vldb = afs_vl_lookup_vldb(params->cell, params->key,
203				  params->volname, params->volnamesz);
204	if (IS_ERR(vldb))
205		return ERR_CAST(vldb);
206
207	if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) {
208		volume = ERR_PTR(vldb->error);
209		goto error;
210	}
211
212	/* Make the final decision on the type we want */
213	volume = ERR_PTR(-ENOMEDIUM);
214	if (params->force) {
215		if (!(vldb->flags & type_mask))
216			goto error;
217	} else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) {
218		params->type = AFSVL_ROVOL;
219	} else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) {
220		params->type = AFSVL_RWVOL;
221	} else {
222		goto error;
223	}
224
225	volume = afs_lookup_volume(params, vldb);
226
227error:
228	kfree(vldb);
229	return volume;
230}
231
232/*
233 * Destroy a volume record
234 */
235static void afs_destroy_volume(struct work_struct *work)
236{
237	struct afs_volume *volume = container_of(work, struct afs_volume, destructor);
238	struct afs_server_list *slist = rcu_access_pointer(volume->servers);
239
240	_enter("%p", volume);
241
242#ifdef CONFIG_AFS_FSCACHE
243	ASSERTCMP(volume->cache, ==, NULL);
244#endif
245
246	afs_detach_volume_from_servers(volume, slist);
247	afs_remove_volume_from_cell(volume);
248	afs_put_serverlist(volume->cell->net, slist);
249	afs_put_cell(volume->cell, afs_cell_trace_put_vol);
250	trace_afs_volume(volume->vid, refcount_read(&volume->ref),
251			 afs_volume_trace_free);
252	kfree_rcu(volume, rcu);
253
254	_leave(" [destroyed]");
255}
256
257/*
258 * Try to get a reference on a volume record.
259 */
260bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason)
261{
262	int r;
263
264	if (__refcount_inc_not_zero(&volume->ref, &r)) {
265		trace_afs_volume(volume->vid, r + 1, reason);
266		return true;
267	}
268	return false;
269}
270
271/*
272 * Get a reference on a volume record.
273 */
274struct afs_volume *afs_get_volume(struct afs_volume *volume,
275				  enum afs_volume_trace reason)
276{
277	if (volume) {
278		int r;
279
280		__refcount_inc(&volume->ref, &r);
281		trace_afs_volume(volume->vid, r + 1, reason);
282	}
283	return volume;
284}
285
286
287/*
288 * Drop a reference on a volume record.
289 */
290void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason)
291{
292	if (volume) {
293		afs_volid_t vid = volume->vid;
294		bool zero;
295		int r;
296
297		zero = __refcount_dec_and_test(&volume->ref, &r);
298		trace_afs_volume(vid, r - 1, reason);
299		if (zero)
300			schedule_work(&volume->destructor);
301	}
302}
303
304/*
305 * Activate a volume.
306 */
307int afs_activate_volume(struct afs_volume *volume)
308{
309#ifdef CONFIG_AFS_FSCACHE
310	struct fscache_volume *vcookie;
311	char *name;
312
313	name = kasprintf(GFP_KERNEL, "afs,%s,%llx",
314			 volume->cell->name, volume->vid);
315	if (!name)
316		return -ENOMEM;
317
318	vcookie = fscache_acquire_volume(name, NULL, NULL, 0);
319	if (IS_ERR(vcookie)) {
320		if (vcookie != ERR_PTR(-EBUSY)) {
321			kfree(name);
322			return PTR_ERR(vcookie);
323		}
324		pr_err("AFS: Cache volume key already in use (%s)\n", name);
325		vcookie = NULL;
326	}
327	volume->cache = vcookie;
328	kfree(name);
329#endif
330	return 0;
331}
332
333/*
334 * Deactivate a volume.
335 */
336void afs_deactivate_volume(struct afs_volume *volume)
337{
338	_enter("%s", volume->name);
339
340#ifdef CONFIG_AFS_FSCACHE
341	fscache_relinquish_volume(volume->cache, NULL,
342				  test_bit(AFS_VOLUME_DELETED, &volume->flags));
343	volume->cache = NULL;
344#endif
345
346	_leave("");
347}
348
349/*
350 * Query the VL service to update the volume status.
351 */
352static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
353{
354	struct afs_server_list *new, *old, *discard;
355	struct afs_vldb_entry *vldb;
356	char idbuf[24];
357	int ret, idsz;
358
359	_enter("");
360
361	/* We look up an ID by passing it as a decimal string in the
362	 * operation's name parameter.
363	 */
364	idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid);
365
366	vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
367	if (IS_ERR(vldb)) {
368		ret = PTR_ERR(vldb);
369		goto error;
370	}
371
372	/* See if the volume got renamed. */
373	if (vldb->name_len != volume->name_len ||
374	    memcmp(vldb->name, volume->name, vldb->name_len) != 0) {
375		/* TODO: Use RCU'd string. */
376		memcpy(volume->name, vldb->name, AFS_MAXVOLNAME);
377		volume->name_len = vldb->name_len;
378	}
379
380	/* See if the volume's server list got updated. */
381	new = afs_alloc_server_list(volume, key, vldb);
382	if (IS_ERR(new)) {
383		ret = PTR_ERR(new);
384		goto error_vldb;
385	}
386
387	write_lock(&volume->servers_lock);
388
389	discard = new;
390	old = rcu_dereference_protected(volume->servers,
391					lockdep_is_held(&volume->servers_lock));
392	if (afs_annotate_server_list(new, old)) {
393		new->seq = volume->servers_seq + 1;
394		rcu_assign_pointer(volume->servers, new);
395		smp_wmb();
396		volume->servers_seq++;
397		discard = old;
398	}
399
400	/* Check more often if replication is ongoing. */
401	if (new->ro_replicating)
402		volume->update_at = ktime_get_real_seconds() + 10 * 60;
403	else
404		volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
405	write_unlock(&volume->servers_lock);
406
407	if (discard == old)
408		afs_reattach_volume_to_servers(volume, new, old);
409	afs_put_serverlist(volume->cell->net, discard);
410	ret = 0;
411error_vldb:
412	kfree(vldb);
413error:
414	_leave(" = %d", ret);
415	return ret;
416}
417
418/*
419 * Make sure the volume record is up to date.
420 */
421int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op)
422{
423	int ret, retries = 0;
424
425	_enter("");
426
427retry:
428	if (test_bit(AFS_VOLUME_WAIT, &volume->flags))
429		goto wait;
430	if (volume->update_at <= ktime_get_real_seconds() ||
431	    test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags))
432		goto update;
433	_leave(" = 0");
434	return 0;
435
436update:
437	if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
438		clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
439		ret = afs_update_volume_status(volume, op->key);
440		if (ret < 0)
441			set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
442		clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
443		clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
444		wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
445		_leave(" = %d", ret);
446		return ret;
447	}
448
449wait:
450	if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
451		_leave(" = 0 [no wait]");
452		return 0;
453	}
454
455	ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT,
456			  (op->flags & AFS_OPERATION_UNINTR) ?
457			  TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
458	if (ret == -ERESTARTSYS) {
459		_leave(" = %d", ret);
460		return ret;
461	}
462
463	retries++;
464	if (retries == 4) {
465		_leave(" = -ESTALE");
466		return -ESTALE;
467	}
468	goto retry;
469}
470