1/*
2 * Unix SMB/CIFS implementation.
3 *
4 * OneFS shadow copy implementation that utilizes the file system's native
5 * snapshot support. This file does all of the heavy lifting.
6 *
7 * Copyright (C) Dave Richards, 2007
8 * Copyright (C) Tim Prouty, 2009
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 3 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, see <http://www.gnu.org/licenses/>.
22 */
23
24#include <ifs/ifs_syscalls.h>
25#include <sys/types.h>
26#include <sys/isi_enc.h>
27#include <sys/module.h>
28#include <sys/stat.h>
29#include <sys/syscall.h>
30#include <sys/time.h>
31#include <dirent.h>
32#include <errno.h>
33#include <fcntl.h>
34#include <limits.h>
35#include <search.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <unistd.h>
40
41#include "onefs_shadow_copy.h"
42
43/* Copied from ../include/proto.h */
44void become_root(void);
45void unbecome_root(void);
46
47#define	SNAPSHOT_DIRECTORY	".snapshot"
48
49#define	MAX_VERSIONS		64
50
51/**
52 * A snapshot object.
53 *
54 * During snapshot enumeration, snapshots are represented by snapshot objects
55 * and are stored in a snapshot set.  The snapshot object represents one
56 * snapshot within the set.  An important thing to note about the set is that
57 * the key of the snapshot object is the tv_sec component of the is_time
58 * member.  What this means is that we only store one snapshot for each
59 * second.  If multiple snapshots were created within the same second, we'll
60 * keep the earliest one and ignore the rest.  Thus, not all snapshots are
61 * necessarily retained.
62 */
63struct osc_snapshot {
64	char *			is_name;
65	struct timespec		is_time;
66	struct osc_snapshot * 	is_next;
67};
68
69/**
70 * A snapshot context object.
71 *
72 * Snapshot contexts are used to pass information throughout the snapshot
73 * enumeration routines.  As a result, snapshot contexts are stored on the
74 * stack and are both created and destroyed within a single API function.
75 */
76struct osc_snapshot_ctx {
77	void *		osc_set;
78	struct timespec	osc_mtime;
79};
80
81/**
82 * A directory context.
83 *
84 * Directory contexts are the underlying data structured used to enumerate
85 * snapshot versions.  An opendir()-, readdir()- and closedir()-like interface
86 * is provided that utilizes directory contexts.  At the API level, directory
87 * contexts are passed around as void pointers.  Directory contexts are
88 * allocated on the heap and their lifetime is dictated by the calling
89 * routine.
90 */
91struct osc_directory_ctx {
92	size_t		idc_pos;
93	size_t		idc_len;
94	size_t		idc_size;
95	char **		idc_version;
96};
97
98/**
99 * Return a file descriptor to the STF names directory.
100 *
101 * Opens the STF names directory and returns a file descriptor to it.
102 * Subsequent calls return the same value (avoiding the need to re-open the
103 * directory repeatedly).  Caveat caller: don't close the file descriptor or
104 * you will be shot!
105 */
106static int
107osc_get_names_directory_fd(void)
108{
109	static int fd = -1;
110
111	if (fd == -1) {
112		become_root();
113		fd = pctl2_lin_open(STF_NAMES_LIN, HEAD_SNAPID, O_RDONLY);
114		unbecome_root();
115	}
116
117	return fd;
118}
119
120/**
121 * Compare two time values.
122 *
123 * Accepts two struct timespecs and compares the tv_sec components of these
124 * values.  It returns -1 if the first value preceeds the second, 0 if they
125 * are equal and +1 if the first values succeeds the second.
126 */
127static int
128osc_time_compare(const struct timespec *tsp1, const struct timespec *tsp2)
129{
130	return (tsp1->tv_sec < tsp2->tv_sec) ? -1 :
131	       (tsp1->tv_sec > tsp2->tv_sec) ? +1 : 0;
132}
133
134/**
135 * Compare two timespec values.
136 *
137 * Compares two timespec values.  It returns -1 if the first value preceeds
138 * the second, 0 if they are equal and +1 if the first values succeeds the
139 * second.
140 */
141static int
142osc_timespec_compare(const struct timespec *tsp1, const struct timespec *tsp2)
143{
144	return (tsp1->tv_sec  < tsp2->tv_sec)  ? -1 :
145	       (tsp1->tv_sec  > tsp2->tv_sec)  ? +1 :
146	       (tsp1->tv_nsec < tsp2->tv_nsec) ? -1 :
147	       (tsp1->tv_nsec > tsp2->tv_nsec) ? +1 : 0;
148}
149
150/**
151 * Determine whether a timespec value is zero.
152 *
153 * Return 1 if the struct timespec provided is zero and 0 otherwise.
154 */
155static int
156osc_timespec_is_zero(const struct timespec *tsp)
157{
158	return (tsp->tv_sec  == 0) &&
159	       (tsp->tv_nsec == 0);
160}
161
162/**
163 * Create a snapshot object.
164 *
165 * Allocates and initializes a new snapshot object.  In addition to allocating
166 * space for the snapshot object itself, space is allocated for the snapshot
167 * name.  Both the name and time are then copied to the new object.
168 */
169static struct osc_snapshot *
170osc_snapshot_create(const char *name, const struct timespec *tsp)
171{
172	struct osc_snapshot *isp;
173
174	isp = malloc(sizeof *isp);
175	if (isp == NULL)
176		goto out;
177
178	isp->is_name = malloc(strlen(name) + 1);
179	if (isp->is_name == NULL) {
180		free(isp);
181		isp = NULL;
182		goto out;
183	}
184
185	strcpy(isp->is_name, name);
186	isp->is_time = *tsp;
187	isp->is_next = NULL;
188
189 out:
190	return isp;
191}
192
193/**
194 * Destroy a snapshot object.
195 *
196 * Frees both the name and the snapshot object itself.  Appropriate NULL
197 * checking is performed because counting on free to do so is immoral.
198 */
199static void
200osc_snapshot_destroy(struct osc_snapshot *isp)
201{
202	if (isp != NULL) {
203		if (isp->is_name != NULL)
204			free(isp->is_name);
205		free(isp);
206	}
207}
208
209/**
210 * Destroy all snapshots in the snapshot list.
211 *
212 * Calls osc_snapshot_destroy() on each snapshot in the list.
213 */
214static void
215osc_snapshot_destroy_list(struct osc_snapshot *isp)
216{
217	struct osc_snapshot *tmp;
218
219	while (isp != NULL) {
220		tmp = isp;
221		isp = isp->is_next;
222		osc_snapshot_destroy(tmp);
223	}
224}
225
226/**
227 * Compare two snapshot objects.
228 *
229 * Compare two snapshot objects.  It is really just a wrapper for
230 * osc_time_compare(), which compare the time value of the two snapshots.
231 * N.B. time value in this context refers only to the tv_sec component.
232 */
233static int
234osc_snapshot_compare(const void *vp1, const void *vp2)
235{
236	const struct osc_snapshot *isp1 = vp1;
237	const struct osc_snapshot *isp2 = vp2;
238
239	return -osc_time_compare(&isp1->is_time, &isp2->is_time);
240}
241
242/**
243 * Insert a snapshot into the snapshot set.
244 *
245 * Inserts a new snapshot into the snapshot set.  The key for snapshots is
246 * their creation time (it's actually the seconds portion of the creation
247 * time).  If a duplicate snapshot is found in the set, the new snapshot is
248 * added to a linked list of snapshots for that second.
249 */
250static void
251osc_snapshot_insert(struct osc_snapshot_ctx *oscp, const char *name,
252    const struct timespec *tsp, int *errorp)
253{
254	struct osc_snapshot *isp1;
255	struct osc_snapshot **ispp;
256
257	isp1 = osc_snapshot_create(name, tsp);
258	if (isp1 == NULL) {
259		*errorp = 1;
260		return;
261	}
262
263	ispp = tsearch(isp1, &oscp->osc_set, osc_snapshot_compare);
264	if (ispp != NULL) {
265		struct osc_snapshot *isp2 = *ispp;
266
267		/* If this is the only snapshot for this second, we're done. */
268		if (isp2 == isp1)
269			return;
270
271		/* Collision: add the new snapshot to the list. */
272		isp1->is_next = isp2->is_next;
273		isp2->is_next = isp1;
274
275	} else
276		*errorp = 1;
277
278}
279
280/**
281 * Process the next snapshot.
282 *
283 * Called for (almost) every entry in a .snapshot directory, ("." and ".." are
284 * ignored in osc_process_snapshot_directory()).  All other entries are passed
285 * to osc_process_snapshot(), however.  These entries can fall into one of two
286 * categories: snapshot names and snapshot aliases.  We only care about
287 * snapshot names (as aliases are just redundant entries).  Once it verifies
288 * that name represents a valid snapshot name, it calls fstat() to get the
289 * creation time of the snapshot and then calls osc_snapshot_insert() to add
290 * this entry to the snapshot set.
291 */
292static void
293osc_process_snapshot(struct osc_snapshot_ctx *oscp, const char *name,
294    int *errorp)
295{
296	int fd;
297	struct stf_stat stf_stat;
298	struct stat stbuf;
299
300	fd = osc_get_names_directory_fd();
301	if (fd == -1)
302		goto out;
303
304	fd = enc_openat(fd, name, ENC_DEFAULT, O_RDONLY);
305	if (fd == -1)
306		goto out;
307
308	memset(&stf_stat, 0, sizeof stf_stat);
309	if (ifs_snap_stat(fd, &stf_stat) == -1)
310		goto out;
311
312	if (stf_stat.sf_type != SF_STF)
313		goto out;
314
315	if (fstat(fd, &stbuf) == -1)
316		goto out;
317
318	osc_snapshot_insert(oscp, name, &stbuf.st_birthtimespec, errorp);
319
320 out:
321	if (fd != -1)
322		close(fd);
323}
324
325/**
326 * Process a snapshot directory.
327 *
328 * Opens the snapshot directory and calls osc_process_snapshot() for each
329 * entry.  (Well ok, "." and ".."  are ignored.)  The goal here is to add all
330 * snapshots in the directory to the snapshot set.
331 */
332static void
333osc_process_snapshot_directory(struct osc_snapshot_ctx *oscp, int *errorp)
334{
335	int fd;
336	struct stat stbuf;
337	DIR *dirp;
338	struct dirent *dp;
339
340	fd = osc_get_names_directory_fd();
341	if (fd == -1)
342		goto out;
343
344	if (fstat(fd, &stbuf) == -1)
345		goto out;
346
347	dirp = opendir(SNAPSHOT_DIRECTORY);
348	if (dirp == NULL)
349		goto out;
350
351	for (;;) {
352		dp = readdir(dirp);
353		if (dp == NULL)
354			break;
355
356		if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' ||
357		    (dp->d_name[1] == '.' && dp->d_name[2] == '\0')))
358			continue;
359
360		osc_process_snapshot(oscp, dp->d_name, errorp);
361		if (*errorp)
362			break;
363	}
364
365	closedir(dirp);
366
367	if (!*errorp)
368		oscp->osc_mtime = stbuf.st_mtimespec;
369
370 out:
371	return;
372}
373
374/**
375 * Initialize a snapshot context object.
376 *
377 * Clears all members of the context object.
378 */
379static void
380osc_snapshot_ctx_init(struct osc_snapshot_ctx *oscp)
381{
382	memset(oscp, 0, sizeof *oscp);
383}
384
385/**
386 * Desoy a snapshot context object.
387 *
388 * Frees all snapshots associated with the snapshot context and then calls
389 * osc_snapshot_ctx_init() to re-initialize the context object.
390 */
391static void
392osc_snapshot_ctx_clean(struct osc_snapshot_ctx *oscp)
393{
394	struct osc_snapshot *tmp;
395
396	while (oscp->osc_set != NULL) {
397		tmp = *(void **)oscp->osc_set;
398		tdelete(tmp, &oscp->osc_set, osc_snapshot_compare);
399		osc_snapshot_destroy_list(tmp);
400	}
401
402	osc_snapshot_ctx_init(oscp);
403}
404
405/**
406 * Return the "global" snapshot context.
407 *
408 * We maintain a single open snapshot context.  Return a pointer to it.
409 */
410static struct osc_snapshot_ctx *
411osc_get_snapshot_ctx(void)
412{
413	static struct osc_snapshot_ctx osc = { 0, { 0, 0 } };
414
415	return &osc;
416}
417
418/**
419 * Determine whether a snapshot context is still valid.
420 *
421 * "Valid" in this context means "reusable".  We can re-use a previous
422 * snapshot context iff we successfully built a previous snapshot context
423 * and no snapshots have been created or deleted since we did so.
424 * A "names" directory exists within our snapshot
425 * implementation in which all snapshot names are entered.  Each time a
426 * snapshot is created or deleted, an entry must be added or removed.
427 * When this happens the modification time on the "names" directory
428 * changes.  Therefore, a snapshot context is valid iff the context
429 * pointer is non-NULL, the cached modification time is non-zero
430 * (zero means uninitialized), and the modification time of the "names"
431 * directory matches the cached value.
432 */
433static int
434osc_snapshot_ctx_is_valid(struct osc_snapshot_ctx *oscp)
435{
436	int fd;
437	struct stat stbuf;
438
439	if (oscp == NULL)
440		return 0;
441
442	if (osc_timespec_is_zero(&oscp->osc_mtime))
443		return 0;
444
445	fd = osc_get_names_directory_fd();
446	if (fd == -1)
447		return 0;
448
449	if (fstat(fd, &stbuf) == -1)
450		return 0;
451
452	if (osc_timespec_compare(&oscp->osc_mtime, &stbuf.st_mtimespec) != 0)
453		return 0;
454
455	return 1;
456}
457
458/**
459 * Create and initialize a directory context.
460 *
461 * Allocates a directory context from the heap and initializes it.
462 */
463static struct osc_directory_ctx *
464osc_directory_ctx_create(void)
465{
466	struct osc_directory_ctx *idcp;
467
468	idcp = malloc(sizeof *idcp);
469	if (idcp != NULL)
470		memset(idcp, 0, sizeof *idcp);
471
472	return idcp;
473}
474
475/**
476 * Destroy a directory context.
477 *
478 * Frees any versions associated with the directory context and then frees the
479 * context itself.
480 */
481static void
482osc_directory_ctx_destroy(struct osc_directory_ctx *idcp)
483{
484	int i;
485
486	if (idcp == NULL)
487		return;
488
489	for (i = 0; i < idcp->idc_len; i++)
490		free(idcp->idc_version[i]);
491
492	free(idcp);
493}
494
495/**
496 * Expand the size of a directory context's version list.
497 *
498 * If osc_directory_ctx_append_version() detects that the version list is too
499 * small to accomodate a new version string, it called
500 * osc_directory_ctx_expand_version_list() to expand the version list.
501 */
502static void
503osc_directory_ctx_expand_version_list(struct osc_snapshot_ctx *oscp,
504    struct osc_directory_ctx *idcp, int *errorp)
505{
506	size_t size;
507	char **cpp;
508
509	size = idcp->idc_size * 2 ?: 1;
510
511	cpp = realloc(idcp->idc_version, size * sizeof (char *));
512	if (cpp == NULL) {
513		*errorp = 1;
514		return;
515	}
516
517	idcp->idc_size = size;
518	idcp->idc_version = cpp;
519}
520
521/**
522 * Append a new version to a directory context.
523 *
524 * Appends a snapshot version to the
525 * directory context's version list.
526 */
527static void
528osc_directory_ctx_append_version(struct osc_snapshot_ctx *oscp,
529    struct osc_directory_ctx *idcp, const struct timespec *tsp, int *errorp)
530{
531	char *cp;
532	struct tm *tmp;
533	char text[64];
534
535	if (idcp->idc_len >= MAX_VERSIONS)
536		return;
537
538	if (idcp->idc_len >= idcp->idc_size) {
539		osc_directory_ctx_expand_version_list(oscp, idcp, errorp);
540		if (*errorp)
541			return;
542	}
543
544	tmp = gmtime(&tsp->tv_sec);
545	if (tmp == NULL) {
546		*errorp = 1;
547		return;
548	}
549
550	snprintf(text, sizeof text,
551	    "@GMT-%04u.%02u.%02u-%02u.%02u.%02u",
552	    tmp->tm_year + 1900,
553	    tmp->tm_mon + 1,
554	    tmp->tm_mday,
555	    tmp->tm_hour,
556	    tmp->tm_min,
557	    tmp->tm_sec);
558
559	cp = malloc(strlen(text) + 1);
560	if (cp == NULL) {
561		*errorp = 1;
562		return;
563	}
564
565	strcpy(cp, text);
566
567	idcp->idc_version[idcp->idc_len++] = cp;
568}
569
570/**
571 * Make a directory context from a snapshot context.
572 *
573 * Once a snapshot context has been completely filled-in,
574 * osc_make_directory_ctx() is used to build a directory context from it.  The
575 * idea here is to create version for each snapshot in the snapshot set.
576 */
577static void
578osc_make_directory_ctx(struct osc_snapshot_ctx *oscp,
579    struct osc_directory_ctx *idcp, int *errorp)
580{
581	static void
582	walk(const void *vp, VISIT v, int level)
583	{
584		const struct osc_snapshot *isp;
585
586		if ((v != postorder && v != leaf) || *errorp)
587			return;
588
589		isp = *(const struct osc_snapshot **)(u_long)vp;
590
591		osc_directory_ctx_append_version(oscp, idcp, &isp->is_time,
592		    errorp);
593	}
594
595	twalk(oscp->osc_set, walk);
596}
597
598/**
599 * Open a version directory.
600 *
601 * Opens a version directory.  What this really means is that
602 * osc_version_opendir() returns a handle to a directory context, which can be
603 * used to retrieve version strings.
604 */
605void *
606osc_version_opendir(void)
607{
608	int error = 0;
609	struct osc_directory_ctx *idcp;
610	struct osc_snapshot_ctx *oscp;
611
612	idcp = osc_directory_ctx_create();
613	if (idcp == NULL)
614		goto error_out;
615
616	oscp = osc_get_snapshot_ctx();
617
618	if (!osc_snapshot_ctx_is_valid(oscp)) {
619		osc_snapshot_ctx_clean(oscp);
620		osc_process_snapshot_directory(oscp, &error);
621		if (error)
622			goto error_out;
623	}
624
625	osc_make_directory_ctx(oscp, idcp, &error);
626	if (error)
627		goto error_out;
628
629	goto out;
630
631 error_out:
632	if (idcp != NULL) {
633		osc_directory_ctx_destroy(idcp);
634		idcp = NULL;
635	}
636
637 out:
638	return (void *)idcp;
639}
640
641/**
642 * Read the next version directory entry.
643 *
644 * Returns the name of the next version in the version directory, or NULL if
645 * we're at the end of the directory.  What this really does is return the
646 * next version from the version list stored in the directory context.
647 */
648char *
649osc_version_readdir(void *vp)
650{
651	struct osc_directory_ctx *idcp = vp;
652
653	if (idcp == NULL)
654		return NULL;
655
656	if (idcp->idc_pos >= idcp->idc_len)
657		return NULL;
658
659	return idcp->idc_version[idcp->idc_pos++];
660}
661
662/**
663 * Close the version directory.
664 *
665 * Destroys the underlying directory context.
666 */
667void
668osc_version_closedir(void *vp)
669{
670	struct osc_directory_ctx *idcp = vp;
671
672	if (idcp != NULL)
673		osc_directory_ctx_destroy(idcp);
674}
675
676/**
677 * Canonicalize a path.
678 *
679 * Converts paths of the form @GMT-.. to paths of the form ../.snapshot/..
680 * It's not the prettiest routine I've ever written, but what the heck?
681 */
682char *
683osc_canonicalize_path(const char *path, char *snap_component)
684{
685	int error = 0;
686	struct osc_snapshot_ctx *oscp;
687	struct tm tm;
688	int n;
689	struct osc_snapshot is;
690	struct osc_snapshot **ispp;
691	struct osc_snapshot *isp;
692	char *cpath = NULL;
693	char *cpath2 = NULL;
694	const char *snap_component_orig = snap_component;
695	struct stat sb;
696
697	oscp = osc_get_snapshot_ctx();
698
699	if (!osc_snapshot_ctx_is_valid(oscp)) {
700		osc_snapshot_ctx_clean(oscp);
701		osc_process_snapshot_directory(oscp, &error);
702		if (error)
703			goto out;
704	}
705
706	memset(&tm, 0, sizeof tm);
707	n = sscanf(snap_component,
708	    "@GMT-%4u.%2u.%2u-%2u.%2u.%2u",
709	    &tm.tm_year,
710	    &tm.tm_mon,
711	    &tm.tm_mday,
712	    &tm.tm_hour,
713	    &tm.tm_min,
714	    &tm.tm_sec);
715	if (n != 6)
716		goto out;
717
718	tm.tm_year -= 1900;
719	tm.tm_mon -= 1;
720
721	is.is_name = NULL;
722	is.is_time.tv_sec = timegm(&tm);
723	is.is_time.tv_nsec = 0;
724
725	ispp = tfind(&is, &oscp->osc_set, osc_snapshot_compare);
726	if (ispp == NULL)
727		goto out;
728	isp = *ispp;
729
730	/* Determine the path after "@GMT-..." */
731	while (*snap_component != '/' && *snap_component != '\0')
732		snap_component++;
733
734	while (*snap_component == '/')
735		snap_component++;
736
737	cpath = malloc(strlen(SNAPSHOT_DIRECTORY) + strlen(isp->is_name) +
738	    strlen(path) + 3);
739
740	if (cpath == NULL)
741		goto out;
742
743	/*
744	 * Use the first snapshot that has a successful stat for the requested
745	 * path.
746	 */
747	while (true) {
748
749		sprintf(cpath, "%s/%s", SNAPSHOT_DIRECTORY, isp->is_name);
750
751		/* Append path before "@GMT-..." */
752		if (snap_component_orig != path) {
753			strcat(cpath, "/");
754			strncat(cpath, path, snap_component_orig - path);
755		}
756
757		/* Append path after "@GMT-..." */
758		if (*snap_component != '\0') {
759			strcat(cpath, "/");
760			strcat(cpath, snap_component);
761		}
762
763		/* If there is a valid snapshot for this file, we're done. */
764		if (stat(cpath, &sb) == 0)
765			break;
766
767		/* Try the next snapshot. If this was the last one, give up. */
768		isp = isp->is_next;
769		if (isp == NULL)
770			break;
771
772		/* If the realloc fails, give up. */
773		cpath2 = realloc(cpath, strlen(SNAPSHOT_DIRECTORY) +
774		    strlen(isp->is_name) + strlen(path) + 3);
775		if (cpath2 == NULL)
776			break;
777		cpath = cpath2;
778	}
779
780 out:
781	return cpath;
782}
783