1/* 2 * Copyright 2008, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7#include <algorithm> 8#include <string> 9#include <vector> 10 11#include <dirent.h> 12#include <errno.h> 13#include <stdio.h> 14#include <stdlib.h> 15#include <string.h> 16#include <unistd.h> 17 18#include <OS.h> 19#include <Path.h> 20 21#include <SHA256.h> 22 23#include "AdaptiveBuffering.h" 24 25 26//#define TRACE(x...) printf(x) 27#define TRACE(x...) ; 28 29 30extern const char *__progname; 31static const char *kProgramName = __progname; 32 33const size_t kInitialBufferSize = 1 * 1024 * 1024; 34const size_t kMaxBufferSize = 10 * 1024 * 1024; 35 36 37class SHAProcessor : public AdaptiveBuffering { 38public: 39 SHAProcessor() 40 : AdaptiveBuffering(kInitialBufferSize, kMaxBufferSize, 3), 41 fFile(-1) 42 { 43 } 44 45 virtual ~SHAProcessor() 46 { 47 Unset(); 48 } 49 50 void Unset() 51 { 52 if (fFile >= 0) 53 close(fFile); 54 } 55 56 status_t Process(int file) 57 { 58 Unset(); 59 fSHA.Init(); 60 fFile = file; 61 62 return Run(); 63 } 64 65 virtual status_t Read(uint8* buffer, size_t* _length) 66 { 67 ssize_t bytes = read(fFile, buffer, *_length); 68 if (bytes < B_OK) 69 return errno; 70 71 *_length = bytes; 72 return B_OK; 73 } 74 75 virtual status_t Write(uint8* buffer, size_t length) 76 { 77 fSHA.Update(buffer, length); 78 return B_OK; 79 } 80 81 const uint8* Digest() { return fSHA.Digest(); } 82 size_t DigestLength() const { return fSHA.DigestLength(); } 83 84private: 85 SHA256 fSHA; 86 int fFile; 87}; 88 89struct file_entry { 90 uint8 hash[SHA_DIGEST_LENGTH]; 91 ino_t node; 92 std::string path; 93 94 bool operator<(const struct file_entry& other) const 95 { 96 return path < other.path; 97 } 98 99 std::string HashString() const 100 { 101 char buffer[128]; 102 for (int i = 0; i < SHA_DIGEST_LENGTH; i++) { 103 sprintf(buffer + i * 2, "%02x", hash[i]); 104 } 105 106 return buffer; 107 } 108}; 109 110typedef std::vector<file_entry> FileList; 111 112void process_file(const char* path); 113 114 115SHAProcessor gSHA; 116FileList gFiles; 117 118 119void 120process_directory(const char* path) 121{ 122 DIR* dir = opendir(path); 123 if (dir == NULL) 124 return; 125 126 size_t pathLength = strlen(path); 127 128 while (struct dirent* entry = readdir(dir)) { 129 if (!strcmp(entry->d_name, ".") 130 || !strcmp(entry->d_name, "..")) 131 continue; 132 133 char fullPath[1024]; 134 strlcpy(fullPath, path, sizeof(fullPath)); 135 if (path[pathLength - 1] != '/') 136 strlcat(fullPath, "/", sizeof(fullPath)); 137 strlcat(fullPath, entry->d_name, sizeof(fullPath)); 138 139 process_file(fullPath); 140 } 141 142 closedir(dir); 143} 144 145 146void 147process_file(const char* path) 148{ 149 struct stat stat; 150 if (::lstat(path, &stat) != 0) { 151 fprintf(stderr, "Could not stat file \"%s\": %s\n", path, 152 strerror(errno)); 153 return; 154 } 155 156 if (S_ISDIR(stat.st_mode)) { 157 process_directory(path); 158 return; 159 } 160 if (S_ISLNK(stat.st_mode)) 161 return; 162 163 int file = open(path, O_RDONLY); 164 if (file < 0) { 165 fprintf(stderr, "Could not open file \"%s\": %s\n", path, 166 strerror(errno)); 167 return; 168 } 169 170 status_t status = gSHA.Process(file); 171 if (status != B_OK) { 172 fprintf(stderr, "Computing SHA failed \"%s\": %s\n", path, 173 strerror(status)); 174 return; 175 } 176 177 file_entry entry; 178 memcpy(entry.hash, gSHA.Digest(), SHA_DIGEST_LENGTH); 179 entry.node = stat.st_ino; 180 entry.path = path; 181 182 //printf("%s %s\n", entry.HashString().c_str(), path); 183 184 gFiles.push_back(entry); 185 186 static bigtime_t sLastUpdate = -1; 187 if (system_time() - sLastUpdate > 500000) { 188 printf("%ld files scanned\33[1A\n", gFiles.size()); 189 sLastUpdate = system_time(); 190 } 191} 192 193 194void 195write_hash_file(const char* name, int fileCount, char** files) 196{ 197 int file = open(name, O_WRONLY | O_TRUNC | O_CREAT); 198 if (file < 0) { 199 fprintf(stderr, "%s: Could not write hash file \"%s\": %s\n", 200 kProgramName, name, strerror(errno)); 201 return; 202 } 203 204 write(file, "HASH", 4); 205 206 write(file, &fileCount, sizeof(int)); 207 for (int i = 0; i < fileCount; i++) { 208 int length = strlen(files[i]); 209 write(file, &length, sizeof(int)); 210 write(file, files[i], length + 1); 211 } 212 213 fileCount = gFiles.size(); 214 write(file, &fileCount, sizeof(int)); 215 for (int i = 0; i < fileCount; i++) { 216 file_entry& entry = gFiles[i]; 217 218 write(file, entry.hash, SHA_DIGEST_LENGTH); 219 write(file, &entry.node, sizeof(ino_t)); 220 221 int length = entry.path.size(); 222 write(file, &length, sizeof(int)); 223 write(file, entry.path.c_str(), length + 1); 224 } 225 226 close(file); 227} 228 229 230int 231main(int argc, char** argv) 232{ 233 if (argc < 2) { 234 fprintf(stderr, "usage: %s <hash-file> [<files> ...]\n" 235 "\tWhen invoked without files, the hash-file is updated only.\n", 236 kProgramName); 237 return 1; 238 } 239 240 const char* hashFileName = argv[1]; 241 242 status_t status = gSHA.Init(); 243 if (status != B_OK) { 244 fprintf(stderr, "%s: Could not initialize SHA processor: %s\n", 245 kProgramName, strerror(status)); 246 return 1; 247 } 248 249 int fileCount = argc - 2; 250 char** files = argv + 2; 251 252 if (argc == 2) { 253 // read files from hash file 254 255 int file = open(hashFileName, O_RDONLY); 256 if (file < 0) { 257 fprintf(stderr, "%s: Could not open hash file \"%s\": %s\n", 258 kProgramName, hashFileName, strerror(status)); 259 return 1; 260 } 261 262 char buffer[2048]; 263 read(file, buffer, 4); 264 if (memcmp(buffer, "HASH", 4)) { 265 fprintf(stderr, "%s: \"%s\" is not a hash file\n", 266 kProgramName, hashFileName); 267 close(file); 268 return 1; 269 } 270 read(file, &fileCount, sizeof(int)); 271 TRACE("Found %d path(s):\n", fileCount); 272 273 files = (char**)malloc(fileCount * sizeof(char*)); 274 if (files == NULL) { 275 fprintf(stderr, "%s: Could not allocate %ld bytes\n", 276 kProgramName, fileCount * sizeof(char*)); 277 close(file); 278 return 1; 279 } 280 281 for (int i = 0; i < fileCount; i++) { 282 int length; 283 read(file, &length, sizeof(int)); 284 285 files[i] = (char*)malloc(length + 1); 286 if (files[i] == NULL) { 287 fprintf(stderr, "%s: Could not allocate %d bytes\n", 288 kProgramName, length + 1); 289 close(file); 290 // TODO: we actually leak memory here, but it's not important in this context 291 return 1; 292 } 293 read(file, files[i], length + 1); 294 TRACE("\t%s\n", files[i]); 295 } 296 297 close(file); 298 } else { 299 // Normalize paths 300 char** normalizedFiles = (char**)malloc(fileCount * sizeof(char*)); 301 if (normalizedFiles == NULL) { 302 fprintf(stderr, "%s: Could not allocate %ld bytes\n", 303 kProgramName, fileCount * sizeof(char*)); 304 return 1; 305 } 306 307 for (int i = 0; i < fileCount; i++) { 308 BPath path(files[i], NULL, true); 309 normalizedFiles[i] = strdup(path.Path()); 310 if (normalizedFiles[i] == NULL) { 311 fprintf(stderr, "%s: Could not allocate %ld bytes\n", 312 kProgramName, strlen(path.Path()) + 1); 313 return 1; 314 } 315 } 316 317 files = normalizedFiles; 318 } 319 320 bigtime_t start = system_time(); 321 322 for (int i = 0; i < fileCount; i++) { 323 process_file(files[i]); 324 } 325 326 sort(gFiles.begin(), gFiles.end()); 327 328 bigtime_t runtime = system_time() - start; 329 330 write_hash_file(hashFileName, fileCount, files); 331 332 if (gFiles.size() > 0) { 333 printf("Generated hashes for %ld files in %g seconds, %g msec per " 334 "file.\n", gFiles.size(), runtime / 1000000.0, 335 runtime / 1000.0 / gFiles.size()); 336 } 337 338 for (int i = 0; i < fileCount; i++) { 339 free(files[i]); 340 } 341 free(files); 342 343 return 0; 344} 345