1/*
2 * Copyright 2008, Axel Dörfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7#include <algorithm>
8#include <string>
9#include <vector>
10
11#include <dirent.h>
12#include <errno.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16#include <unistd.h>
17
18#include <OS.h>
19#include <Path.h>
20
21#include <SHA256.h>
22
23#include "AdaptiveBuffering.h"
24
25
26//#define TRACE(x...) printf(x)
27#define TRACE(x...) ;
28
29
30extern const char *__progname;
31static const char *kProgramName = __progname;
32
33const size_t kInitialBufferSize = 1 * 1024 * 1024;
34const size_t kMaxBufferSize = 10 * 1024 * 1024;
35
36
37class SHAProcessor : public AdaptiveBuffering {
38public:
39	SHAProcessor()
40		: AdaptiveBuffering(kInitialBufferSize, kMaxBufferSize, 3),
41		fFile(-1)
42	{
43	}
44
45	virtual ~SHAProcessor()
46	{
47		Unset();
48	}
49
50	void Unset()
51	{
52		if (fFile >= 0)
53			close(fFile);
54	}
55
56	status_t Process(int file)
57	{
58		Unset();
59		fSHA.Init();
60		fFile = file;
61
62		return Run();
63	}
64
65	virtual status_t Read(uint8* buffer, size_t* _length)
66	{
67		ssize_t bytes = read(fFile, buffer, *_length);
68		if (bytes < B_OK)
69			return errno;
70
71		*_length = bytes;
72		return B_OK;
73	}
74
75	virtual status_t Write(uint8* buffer, size_t length)
76	{
77		fSHA.Update(buffer, length);
78		return B_OK;
79	}
80
81	const uint8* Digest() { return fSHA.Digest(); }
82	size_t DigestLength() const	{ return fSHA.DigestLength(); }
83
84private:
85	SHA256	fSHA;
86	int		fFile;
87};
88
89struct file_entry {
90	uint8			hash[SHA_DIGEST_LENGTH];
91	ino_t			node;
92	std::string		path;
93
94	bool operator<(const struct file_entry& other) const
95	{
96		return path < other.path;
97	}
98
99	std::string HashString() const
100	{
101		char buffer[128];
102		for (int i = 0; i < SHA_DIGEST_LENGTH; i++) {
103			sprintf(buffer + i * 2, "%02x", hash[i]);
104		}
105
106		return buffer;
107	}
108};
109
110typedef std::vector<file_entry> FileList;
111
112void process_file(const char* path);
113
114
115SHAProcessor gSHA;
116FileList gFiles;
117
118
119void
120process_directory(const char* path)
121{
122	DIR* dir = opendir(path);
123	if (dir == NULL)
124		return;
125
126	size_t pathLength = strlen(path);
127
128	while (struct dirent* entry = readdir(dir)) {
129		if (!strcmp(entry->d_name, ".")
130			|| !strcmp(entry->d_name, ".."))
131			continue;
132
133		char fullPath[1024];
134		strlcpy(fullPath, path, sizeof(fullPath));
135		if (path[pathLength - 1] != '/')
136			strlcat(fullPath, "/", sizeof(fullPath));
137		strlcat(fullPath, entry->d_name, sizeof(fullPath));
138
139		process_file(fullPath);
140	}
141
142	closedir(dir);
143}
144
145
146void
147process_file(const char* path)
148{
149	struct stat stat;
150	if (::lstat(path, &stat) != 0) {
151		fprintf(stderr, "Could not stat file \"%s\": %s\n", path,
152			strerror(errno));
153		return;
154	}
155
156	if (S_ISDIR(stat.st_mode)) {
157		process_directory(path);
158		return;
159	}
160	if (S_ISLNK(stat.st_mode))
161		return;
162
163	int file = open(path, O_RDONLY);
164	if (file < 0) {
165		fprintf(stderr, "Could not open file \"%s\": %s\n", path,
166			strerror(errno));
167		return;
168	}
169
170	status_t status = gSHA.Process(file);
171	if (status != B_OK) {
172		fprintf(stderr, "Computing SHA failed \"%s\": %s\n", path,
173			strerror(status));
174		return;
175	}
176
177	file_entry entry;
178	memcpy(entry.hash, gSHA.Digest(), SHA_DIGEST_LENGTH);
179	entry.node = stat.st_ino;
180	entry.path = path;
181
182	//printf("%s  %s\n", entry.HashString().c_str(), path);
183
184	gFiles.push_back(entry);
185
186	static bigtime_t sLastUpdate = -1;
187	if (system_time() - sLastUpdate > 500000) {
188		printf("%ld files scanned\33[1A\n", gFiles.size());
189		sLastUpdate = system_time();
190	}
191}
192
193
194void
195write_hash_file(const char* name, int fileCount, char** files)
196{
197	int file = open(name, O_WRONLY | O_TRUNC | O_CREAT);
198	if (file < 0) {
199		fprintf(stderr, "%s: Could not write hash file \"%s\": %s\n",
200			kProgramName, name, strerror(errno));
201		return;
202	}
203
204	write(file, "HASH", 4);
205
206	write(file, &fileCount, sizeof(int));
207	for (int i = 0; i < fileCount; i++) {
208		int length = strlen(files[i]);
209		write(file, &length, sizeof(int));
210		write(file, files[i], length + 1);
211	}
212
213	fileCount = gFiles.size();
214	write(file, &fileCount, sizeof(int));
215	for (int i = 0; i < fileCount; i++) {
216		file_entry& entry = gFiles[i];
217
218		write(file, entry.hash, SHA_DIGEST_LENGTH);
219		write(file, &entry.node, sizeof(ino_t));
220
221		int length = entry.path.size();
222		write(file, &length, sizeof(int));
223		write(file, entry.path.c_str(), length + 1);
224	}
225
226	close(file);
227}
228
229
230int
231main(int argc, char** argv)
232{
233	if (argc < 2) {
234		fprintf(stderr, "usage: %s <hash-file> [<files> ...]\n"
235			"\tWhen invoked without files, the hash-file is updated only.\n",
236			kProgramName);
237		return 1;
238	}
239
240	const char* hashFileName = argv[1];
241
242	status_t status = gSHA.Init();
243	if (status != B_OK) {
244		fprintf(stderr, "%s: Could not initialize SHA processor: %s\n",
245			kProgramName, strerror(status));
246		return 1;
247	}
248
249	int fileCount = argc - 2;
250	char** files = argv + 2;
251
252	if (argc == 2) {
253		// read files from hash file
254
255		int file = open(hashFileName, O_RDONLY);
256		if (file < 0) {
257			fprintf(stderr, "%s: Could not open hash file \"%s\": %s\n",
258				kProgramName, hashFileName, strerror(status));
259			return 1;
260		}
261
262		char buffer[2048];
263		read(file, buffer, 4);
264		if (memcmp(buffer, "HASH", 4)) {
265			fprintf(stderr, "%s: \"%s\" is not a hash file\n",
266				kProgramName, hashFileName);
267			close(file);
268			return 1;
269		}
270		read(file, &fileCount, sizeof(int));
271		TRACE("Found %d path(s):\n", fileCount);
272
273		files = (char**)malloc(fileCount * sizeof(char*));
274		if (files == NULL) {
275			fprintf(stderr, "%s: Could not allocate %ld bytes\n",
276				kProgramName, fileCount * sizeof(char*));
277			close(file);
278			return 1;
279		}
280
281		for (int i = 0; i < fileCount; i++) {
282			int length;
283			read(file, &length, sizeof(int));
284
285			files[i] = (char*)malloc(length + 1);
286			if (files[i] == NULL) {
287				fprintf(stderr, "%s: Could not allocate %d bytes\n",
288					kProgramName, length + 1);
289				close(file);
290				// TODO: we actually leak memory here, but it's not important in this context
291				return 1;
292			}
293			read(file, files[i], length + 1);
294			TRACE("\t%s\n", files[i]);
295		}
296
297		close(file);
298	} else {
299		// Normalize paths
300		char** normalizedFiles = (char**)malloc(fileCount * sizeof(char*));
301		if (normalizedFiles == NULL) {
302			fprintf(stderr, "%s: Could not allocate %ld bytes\n",
303				kProgramName, fileCount * sizeof(char*));
304			return 1;
305		}
306
307		for (int i = 0; i < fileCount; i++) {
308			BPath path(files[i], NULL, true);
309			normalizedFiles[i] = strdup(path.Path());
310			if (normalizedFiles[i] == NULL) {
311				fprintf(stderr, "%s: Could not allocate %ld bytes\n",
312					kProgramName, strlen(path.Path()) + 1);
313				return 1;
314			}
315		}
316
317		files = normalizedFiles;
318	}
319
320	bigtime_t start = system_time();
321
322	for (int i = 0; i < fileCount; i++) {
323		process_file(files[i]);
324	}
325
326	sort(gFiles.begin(), gFiles.end());
327
328	bigtime_t runtime = system_time() - start;
329
330	write_hash_file(hashFileName, fileCount, files);
331
332	if (gFiles.size() > 0) {
333		printf("Generated hashes for %ld files in %g seconds, %g msec per "
334			"file.\n", gFiles.size(), runtime / 1000000.0,
335			runtime / 1000.0 / gFiles.size());
336	}
337
338	for (int i = 0; i < fileCount; i++) {
339		free(files[i]);
340	}
341	free(files);
342
343	return 0;
344}
345