1/* AFS volume location management
2 *
3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 */
11
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/slab.h>
15#include <linux/init.h>
16#include <linux/sched.h>
17#include "internal.h"
18
19static unsigned afs_vlocation_timeout = 10;	/* volume location timeout in seconds */
20static unsigned afs_vlocation_update_timeout = 10 * 60;
21
22static void afs_vlocation_reaper(struct work_struct *);
23static void afs_vlocation_updater(struct work_struct *);
24
25static LIST_HEAD(afs_vlocation_updates);
26static LIST_HEAD(afs_vlocation_graveyard);
27static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
28static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
29static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
30static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
31static struct workqueue_struct *afs_vlocation_update_worker;
32
33/*
34 * iterate through the VL servers in a cell until one of them admits knowing
35 * about the volume in question
36 */
37static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
38					   struct key *key,
39					   struct afs_cache_vlocation *vldb)
40{
41	struct afs_cell *cell = vl->cell;
42	struct in_addr addr;
43	int count, ret;
44
45	_enter("%s,%s", cell->name, vl->vldb.name);
46
47	down_write(&vl->cell->vl_sem);
48	ret = -ENOMEDIUM;
49	for (count = cell->vl_naddrs; count > 0; count--) {
50		addr = cell->vl_addrs[cell->vl_curr_svix];
51
52		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
53
54		/* attempt to access the VL server */
55		ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
56					       &afs_sync_call);
57		switch (ret) {
58		case 0:
59			goto out;
60		case -ENOMEM:
61		case -ENONET:
62		case -ENETUNREACH:
63		case -EHOSTUNREACH:
64		case -ECONNREFUSED:
65			if (ret == -ENOMEM || ret == -ENONET)
66				goto out;
67			goto rotate;
68		case -ENOMEDIUM:
69		case -EKEYREJECTED:
70		case -EKEYEXPIRED:
71			goto out;
72		default:
73			ret = -EIO;
74			goto rotate;
75		}
76
77		/* rotate the server records upon lookup failure */
78	rotate:
79		cell->vl_curr_svix++;
80		cell->vl_curr_svix %= cell->vl_naddrs;
81	}
82
83out:
84	up_write(&vl->cell->vl_sem);
85	_leave(" = %d", ret);
86	return ret;
87}
88
89/*
90 * iterate through the VL servers in a cell until one of them admits knowing
91 * about the volume in question
92 */
93static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
94					 struct key *key,
95					 afs_volid_t volid,
96					 afs_voltype_t voltype,
97					 struct afs_cache_vlocation *vldb)
98{
99	struct afs_cell *cell = vl->cell;
100	struct in_addr addr;
101	int count, ret;
102
103	_enter("%s,%x,%d,", cell->name, volid, voltype);
104
105	down_write(&vl->cell->vl_sem);
106	ret = -ENOMEDIUM;
107	for (count = cell->vl_naddrs; count > 0; count--) {
108		addr = cell->vl_addrs[cell->vl_curr_svix];
109
110		_debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
111
112		/* attempt to access the VL server */
113		ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
114					     &afs_sync_call);
115		switch (ret) {
116		case 0:
117			goto out;
118		case -ENOMEM:
119		case -ENONET:
120		case -ENETUNREACH:
121		case -EHOSTUNREACH:
122		case -ECONNREFUSED:
123			if (ret == -ENOMEM || ret == -ENONET)
124				goto out;
125			goto rotate;
126		case -EBUSY:
127			vl->upd_busy_cnt++;
128			if (vl->upd_busy_cnt <= 3) {
129				if (vl->upd_busy_cnt > 1) {
130					/* second+ BUSY - sleep a little bit */
131					set_current_state(TASK_UNINTERRUPTIBLE);
132					schedule_timeout(1);
133					__set_current_state(TASK_RUNNING);
134				}
135				continue;
136			}
137			break;
138		case -ENOMEDIUM:
139			vl->upd_rej_cnt++;
140			goto rotate;
141		default:
142			ret = -EIO;
143			goto rotate;
144		}
145
146		/* rotate the server records upon lookup failure */
147	rotate:
148		cell->vl_curr_svix++;
149		cell->vl_curr_svix %= cell->vl_naddrs;
150		vl->upd_busy_cnt = 0;
151	}
152
153out:
154	if (ret < 0 && vl->upd_rej_cnt > 0) {
155		printk(KERN_NOTICE "kAFS:"
156		       " Active volume no longer valid '%s'\n",
157		       vl->vldb.name);
158		vl->valid = 0;
159		ret = -ENOMEDIUM;
160	}
161
162	up_write(&vl->cell->vl_sem);
163	_leave(" = %d", ret);
164	return ret;
165}
166
167/*
168 * allocate a volume location record
169 */
170static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
171						 const char *name,
172						 size_t namesz)
173{
174	struct afs_vlocation *vl;
175
176	vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
177	if (vl) {
178		vl->cell = cell;
179		vl->state = AFS_VL_NEW;
180		atomic_set(&vl->usage, 1);
181		INIT_LIST_HEAD(&vl->link);
182		INIT_LIST_HEAD(&vl->grave);
183		INIT_LIST_HEAD(&vl->update);
184		init_waitqueue_head(&vl->waitq);
185		spin_lock_init(&vl->lock);
186		memcpy(vl->vldb.name, name, namesz);
187	}
188
189	_leave(" = %p", vl);
190	return vl;
191}
192
193/*
194 * update record if we found it in the cache
195 */
196static int afs_vlocation_update_record(struct afs_vlocation *vl,
197				       struct key *key,
198				       struct afs_cache_vlocation *vldb)
199{
200	afs_voltype_t voltype;
201	afs_volid_t vid;
202	int ret;
203
204	/* try to look up a cached volume in the cell VL databases by ID */
205	_debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
206	       vl->vldb.name,
207	       vl->vldb.vidmask,
208	       ntohl(vl->vldb.servers[0].s_addr),
209	       vl->vldb.srvtmask[0],
210	       ntohl(vl->vldb.servers[1].s_addr),
211	       vl->vldb.srvtmask[1],
212	       ntohl(vl->vldb.servers[2].s_addr),
213	       vl->vldb.srvtmask[2]);
214
215	_debug("Vids: %08x %08x %08x",
216	       vl->vldb.vid[0],
217	       vl->vldb.vid[1],
218	       vl->vldb.vid[2]);
219
220	if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
221		vid = vl->vldb.vid[0];
222		voltype = AFSVL_RWVOL;
223	} else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
224		vid = vl->vldb.vid[1];
225		voltype = AFSVL_ROVOL;
226	} else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
227		vid = vl->vldb.vid[2];
228		voltype = AFSVL_BACKVOL;
229	} else {
230		BUG();
231		vid = 0;
232		voltype = 0;
233	}
234
235	/* contact the server to make sure the volume is still available
236	 * - TODO: need to handle disconnected operation here
237	 */
238	ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
239	switch (ret) {
240		/* net error */
241	default:
242		printk(KERN_WARNING "kAFS:"
243		       " failed to update volume '%s' (%x) up in '%s': %d\n",
244		       vl->vldb.name, vid, vl->cell->name, ret);
245		_leave(" = %d", ret);
246		return ret;
247
248		/* pulled from local cache into memory */
249	case 0:
250		_leave(" = 0");
251		return 0;
252
253		/* uh oh... looks like the volume got deleted */
254	case -ENOMEDIUM:
255		printk(KERN_ERR "kAFS:"
256		       " volume '%s' (%x) does not exist '%s'\n",
257		       vl->vldb.name, vid, vl->cell->name);
258
259		/* TODO: make existing record unavailable */
260		_leave(" = %d", ret);
261		return ret;
262	}
263}
264
265/*
266 * apply the update to a VL record
267 */
268static void afs_vlocation_apply_update(struct afs_vlocation *vl,
269				       struct afs_cache_vlocation *vldb)
270{
271	_debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
272	       vldb->name, vldb->vidmask,
273	       ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
274	       ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
275	       ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
276
277	_debug("Vids: %08x %08x %08x",
278	       vldb->vid[0], vldb->vid[1], vldb->vid[2]);
279
280	if (strcmp(vldb->name, vl->vldb.name) != 0)
281		printk(KERN_NOTICE "kAFS:"
282		       " name of volume '%s' changed to '%s' on server\n",
283		       vl->vldb.name, vldb->name);
284
285	vl->vldb = *vldb;
286
287#ifdef CONFIG_AFS_FSCACHE
288	fscache_update_cookie(vl->cache);
289#endif
290}
291
292/*
293 * fill in a volume location record, consulting the cache and the VL server
294 * both
295 */
296static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
297					struct key *key)
298{
299	struct afs_cache_vlocation vldb;
300	int ret;
301
302	_enter("");
303
304	ASSERTCMP(vl->valid, ==, 0);
305
306	memset(&vldb, 0, sizeof(vldb));
307
308	/* see if we have an in-cache copy (will set vl->valid if there is) */
309#ifdef CONFIG_AFS_FSCACHE
310	vl->cache = fscache_acquire_cookie(vl->cell->cache,
311					   &afs_vlocation_cache_index_def, vl);
312#endif
313
314	if (vl->valid) {
315		/* try to update a known volume in the cell VL databases by
316		 * ID as the name may have changed */
317		_debug("found in cache");
318		ret = afs_vlocation_update_record(vl, key, &vldb);
319	} else {
320		/* try to look up an unknown volume in the cell VL databases by
321		 * name */
322		ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
323		if (ret < 0) {
324			printk("kAFS: failed to locate '%s' in cell '%s'\n",
325			       vl->vldb.name, vl->cell->name);
326			return ret;
327		}
328	}
329
330	afs_vlocation_apply_update(vl, &vldb);
331	_leave(" = 0");
332	return 0;
333}
334
335/*
336 * queue a vlocation record for updates
337 */
338static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
339{
340	struct afs_vlocation *xvl;
341
342	/* wait at least 10 minutes before updating... */
343	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
344
345	spin_lock(&afs_vlocation_updates_lock);
346
347	if (!list_empty(&afs_vlocation_updates)) {
348		/* ... but wait at least 1 second more than the newest record
349		 * already queued so that we don't spam the VL server suddenly
350		 * with lots of requests
351		 */
352		xvl = list_entry(afs_vlocation_updates.prev,
353				 struct afs_vlocation, update);
354		if (vl->update_at <= xvl->update_at)
355			vl->update_at = xvl->update_at + 1;
356	} else {
357		queue_delayed_work(afs_vlocation_update_worker,
358				   &afs_vlocation_update,
359				   afs_vlocation_update_timeout * HZ);
360	}
361
362	list_add_tail(&vl->update, &afs_vlocation_updates);
363	spin_unlock(&afs_vlocation_updates_lock);
364}
365
366/*
367 * lookup volume location
368 * - iterate through the VL servers in a cell until one of them admits knowing
369 *   about the volume in question
370 * - lookup in the local cache if not able to find on the VL server
371 * - insert/update in the local cache if did get a VL response
372 */
373struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
374					   struct key *key,
375					   const char *name,
376					   size_t namesz)
377{
378	struct afs_vlocation *vl;
379	int ret;
380
381	_enter("{%s},{%x},%*.*s,%zu",
382	       cell->name, key_serial(key),
383	       (int) namesz, (int) namesz, name, namesz);
384
385	if (namesz >= sizeof(vl->vldb.name)) {
386		_leave(" = -ENAMETOOLONG");
387		return ERR_PTR(-ENAMETOOLONG);
388	}
389
390	/* see if we have an in-memory copy first */
391	down_write(&cell->vl_sem);
392	spin_lock(&cell->vl_lock);
393	list_for_each_entry(vl, &cell->vl_list, link) {
394		if (vl->vldb.name[namesz] != '\0')
395			continue;
396		if (memcmp(vl->vldb.name, name, namesz) == 0)
397			goto found_in_memory;
398	}
399	spin_unlock(&cell->vl_lock);
400
401	/* not in the cell's in-memory lists - create a new record */
402	vl = afs_vlocation_alloc(cell, name, namesz);
403	if (!vl) {
404		up_write(&cell->vl_sem);
405		return ERR_PTR(-ENOMEM);
406	}
407
408	afs_get_cell(cell);
409
410	list_add_tail(&vl->link, &cell->vl_list);
411	vl->state = AFS_VL_CREATING;
412	up_write(&cell->vl_sem);
413
414fill_in_record:
415	ret = afs_vlocation_fill_in_record(vl, key);
416	if (ret < 0)
417		goto error_abandon;
418	spin_lock(&vl->lock);
419	vl->state = AFS_VL_VALID;
420	spin_unlock(&vl->lock);
421	wake_up(&vl->waitq);
422
423	/* update volume entry in local cache */
424#ifdef CONFIG_AFS_FSCACHE
425	fscache_update_cookie(vl->cache);
426#endif
427
428	/* schedule for regular updates */
429	afs_vlocation_queue_for_updates(vl);
430	goto success;
431
432found_in_memory:
433	/* found in memory */
434	_debug("found in memory");
435	atomic_inc(&vl->usage);
436	spin_unlock(&cell->vl_lock);
437	if (!list_empty(&vl->grave)) {
438		spin_lock(&afs_vlocation_graveyard_lock);
439		list_del_init(&vl->grave);
440		spin_unlock(&afs_vlocation_graveyard_lock);
441	}
442	up_write(&cell->vl_sem);
443
444	/* see if it was an abandoned record that we might try filling in */
445	spin_lock(&vl->lock);
446	while (vl->state != AFS_VL_VALID) {
447		afs_vlocation_state_t state = vl->state;
448
449		_debug("invalid [state %d]", state);
450
451		if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
452			vl->state = AFS_VL_CREATING;
453			spin_unlock(&vl->lock);
454			goto fill_in_record;
455		}
456
457		/* must now wait for creation or update by someone else to
458		 * complete */
459		_debug("wait");
460
461		spin_unlock(&vl->lock);
462		ret = wait_event_interruptible(vl->waitq,
463					       vl->state == AFS_VL_NEW ||
464					       vl->state == AFS_VL_VALID ||
465					       vl->state == AFS_VL_NO_VOLUME);
466		if (ret < 0)
467			goto error;
468		spin_lock(&vl->lock);
469	}
470	spin_unlock(&vl->lock);
471
472success:
473	_leave(" = %p", vl);
474	return vl;
475
476error_abandon:
477	spin_lock(&vl->lock);
478	vl->state = AFS_VL_NEW;
479	spin_unlock(&vl->lock);
480	wake_up(&vl->waitq);
481error:
482	ASSERT(vl != NULL);
483	afs_put_vlocation(vl);
484	_leave(" = %d", ret);
485	return ERR_PTR(ret);
486}
487
488/*
489 * finish using a volume location record
490 */
491void afs_put_vlocation(struct afs_vlocation *vl)
492{
493	if (!vl)
494		return;
495
496	_enter("%s", vl->vldb.name);
497
498	ASSERTCMP(atomic_read(&vl->usage), >, 0);
499
500	if (likely(!atomic_dec_and_test(&vl->usage))) {
501		_leave("");
502		return;
503	}
504
505	spin_lock(&afs_vlocation_graveyard_lock);
506	if (atomic_read(&vl->usage) == 0) {
507		_debug("buried");
508		list_move_tail(&vl->grave, &afs_vlocation_graveyard);
509		vl->time_of_death = get_seconds();
510		schedule_delayed_work(&afs_vlocation_reap,
511				      afs_vlocation_timeout * HZ);
512
513		/* suspend updates on this record */
514		if (!list_empty(&vl->update)) {
515			spin_lock(&afs_vlocation_updates_lock);
516			list_del_init(&vl->update);
517			spin_unlock(&afs_vlocation_updates_lock);
518		}
519	}
520	spin_unlock(&afs_vlocation_graveyard_lock);
521	_leave(" [killed?]");
522}
523
524/*
525 * destroy a dead volume location record
526 */
527static void afs_vlocation_destroy(struct afs_vlocation *vl)
528{
529	_enter("%p", vl);
530
531#ifdef CONFIG_AFS_FSCACHE
532	fscache_relinquish_cookie(vl->cache, 0);
533#endif
534	afs_put_cell(vl->cell);
535	kfree(vl);
536}
537
538/*
539 * reap dead volume location records
540 */
541static void afs_vlocation_reaper(struct work_struct *work)
542{
543	LIST_HEAD(corpses);
544	struct afs_vlocation *vl;
545	unsigned long delay, expiry;
546	time_t now;
547
548	_enter("");
549
550	now = get_seconds();
551	spin_lock(&afs_vlocation_graveyard_lock);
552
553	while (!list_empty(&afs_vlocation_graveyard)) {
554		vl = list_entry(afs_vlocation_graveyard.next,
555				struct afs_vlocation, grave);
556
557		_debug("check %p", vl);
558
559		/* the queue is ordered most dead first */
560		expiry = vl->time_of_death + afs_vlocation_timeout;
561		if (expiry > now) {
562			delay = (expiry - now) * HZ;
563			_debug("delay %lu", delay);
564			if (!schedule_delayed_work(&afs_vlocation_reap,
565						   delay)) {
566				cancel_delayed_work(&afs_vlocation_reap);
567				schedule_delayed_work(&afs_vlocation_reap,
568						      delay);
569			}
570			break;
571		}
572
573		spin_lock(&vl->cell->vl_lock);
574		if (atomic_read(&vl->usage) > 0) {
575			_debug("no reap");
576			list_del_init(&vl->grave);
577		} else {
578			_debug("reap");
579			list_move_tail(&vl->grave, &corpses);
580			list_del_init(&vl->link);
581		}
582		spin_unlock(&vl->cell->vl_lock);
583	}
584
585	spin_unlock(&afs_vlocation_graveyard_lock);
586
587	/* now reap the corpses we've extracted */
588	while (!list_empty(&corpses)) {
589		vl = list_entry(corpses.next, struct afs_vlocation, grave);
590		list_del(&vl->grave);
591		afs_vlocation_destroy(vl);
592	}
593
594	_leave("");
595}
596
597/*
598 * initialise the VL update process
599 */
600int __init afs_vlocation_update_init(void)
601{
602	afs_vlocation_update_worker =
603		create_singlethread_workqueue("kafs_vlupdated");
604	return afs_vlocation_update_worker ? 0 : -ENOMEM;
605}
606
607/*
608 * discard all the volume location records for rmmod
609 */
610void afs_vlocation_purge(void)
611{
612	afs_vlocation_timeout = 0;
613
614	spin_lock(&afs_vlocation_updates_lock);
615	list_del_init(&afs_vlocation_updates);
616	spin_unlock(&afs_vlocation_updates_lock);
617	cancel_delayed_work(&afs_vlocation_update);
618	queue_delayed_work(afs_vlocation_update_worker,
619			   &afs_vlocation_update, 0);
620	destroy_workqueue(afs_vlocation_update_worker);
621
622	cancel_delayed_work(&afs_vlocation_reap);
623	schedule_delayed_work(&afs_vlocation_reap, 0);
624}
625
626/*
627 * update a volume location
628 */
629static void afs_vlocation_updater(struct work_struct *work)
630{
631	struct afs_cache_vlocation vldb;
632	struct afs_vlocation *vl, *xvl;
633	time_t now;
634	long timeout;
635	int ret;
636
637	_enter("");
638
639	now = get_seconds();
640
641	/* find a record to update */
642	spin_lock(&afs_vlocation_updates_lock);
643	for (;;) {
644		if (list_empty(&afs_vlocation_updates)) {
645			spin_unlock(&afs_vlocation_updates_lock);
646			_leave(" [nothing]");
647			return;
648		}
649
650		vl = list_entry(afs_vlocation_updates.next,
651				struct afs_vlocation, update);
652		if (atomic_read(&vl->usage) > 0)
653			break;
654		list_del_init(&vl->update);
655	}
656
657	timeout = vl->update_at - now;
658	if (timeout > 0) {
659		queue_delayed_work(afs_vlocation_update_worker,
660				   &afs_vlocation_update, timeout * HZ);
661		spin_unlock(&afs_vlocation_updates_lock);
662		_leave(" [nothing]");
663		return;
664	}
665
666	list_del_init(&vl->update);
667	atomic_inc(&vl->usage);
668	spin_unlock(&afs_vlocation_updates_lock);
669
670	/* we can now perform the update */
671	_debug("update %s", vl->vldb.name);
672	vl->state = AFS_VL_UPDATING;
673	vl->upd_rej_cnt = 0;
674	vl->upd_busy_cnt = 0;
675
676	ret = afs_vlocation_update_record(vl, NULL, &vldb);
677	spin_lock(&vl->lock);
678	switch (ret) {
679	case 0:
680		afs_vlocation_apply_update(vl, &vldb);
681		vl->state = AFS_VL_VALID;
682		break;
683	case -ENOMEDIUM:
684		vl->state = AFS_VL_VOLUME_DELETED;
685		break;
686	default:
687		vl->state = AFS_VL_UNCERTAIN;
688		break;
689	}
690	spin_unlock(&vl->lock);
691	wake_up(&vl->waitq);
692
693	/* and then reschedule */
694	_debug("reschedule");
695	vl->update_at = get_seconds() + afs_vlocation_update_timeout;
696
697	spin_lock(&afs_vlocation_updates_lock);
698
699	if (!list_empty(&afs_vlocation_updates)) {
700		/* next update in 10 minutes, but wait at least 1 second more
701		 * than the newest record already queued so that we don't spam
702		 * the VL server suddenly with lots of requests
703		 */
704		xvl = list_entry(afs_vlocation_updates.prev,
705				 struct afs_vlocation, update);
706		if (vl->update_at <= xvl->update_at)
707			vl->update_at = xvl->update_at + 1;
708		xvl = list_entry(afs_vlocation_updates.next,
709				 struct afs_vlocation, update);
710		timeout = xvl->update_at - now;
711		if (timeout < 0)
712			timeout = 0;
713	} else {
714		timeout = afs_vlocation_update_timeout;
715	}
716
717	ASSERT(list_empty(&vl->update));
718
719	list_add_tail(&vl->update, &afs_vlocation_updates);
720
721	_debug("timeout %ld", timeout);
722	queue_delayed_work(afs_vlocation_update_worker,
723			   &afs_vlocation_update, timeout * HZ);
724	spin_unlock(&afs_vlocation_updates_lock);
725	afs_put_vlocation(vl);
726}
727