1/* $FreeBSD$ */
2
3/* Still missing:
4 *
5 * mkctm
6 *	-B regex	Bogus
7 *	-I regex	Ignore
8 *      -D int		Damage
9 *	-q		decrease verbosity
10 *	-v		increase verbosity
11 *      -l file		logfile
12 *	name		cvs-cur
13 *	prefix		src/secure
14 *	dir1		"Soll"
15 *	dir2		"Ist"
16 *
17 * $FreeBSD$
18 */
19
20#include <sys/types.h>
21#include <sys/stat.h>
22#include <sys/mman.h>
23#include <sys/wait.h>
24#include <dirent.h>
25#include <regex.h>
26#include <stdio.h>
27#include <fcntl.h>
28#include <string.h>
29#include <stdlib.h>
30#include <unistd.h>
31#include <md5.h>
32#include <err.h>
33#include <paths.h>
34#include <signal.h>
35
36#define DEFAULT_IGNORE	"/CVS$|/\\.#|00_TRANS\\.TBL$"
37#define DEFAULT_BOGUS	"\\.core$|\\.orig$|\\.rej$|\\.o$"
38regex_t reg_ignore,  reg_bogus;
39int	flag_ignore, flag_bogus;
40
41int	verbose;
42int	damage, damage_limit;
43int	change;
44
45FILE	*logf;
46
47u_long s1_ignored,	s2_ignored;
48u_long s1_bogus,	s2_bogus;
49u_long s1_wrong,	s2_wrong;
50u_long s_new_dirs,	s_new_files,	s_new_bytes;
51u_long s_del_dirs,	s_del_files,	                s_del_bytes;
52u_long 			s_files_chg,	s_bytes_add,	s_bytes_del;
53u_long s_same_dirs,	s_same_files,	s_same_bytes;
54u_long 			s_edit_files,	s_edit_bytes,	s_edit_saves;
55u_long 			s_sub_files,	s_sub_bytes;
56
57void
58Usage(void)
59{
60	fprintf(stderr,
61		"usage: mkctm [-options] name number timestamp prefix dir1 dir2\n");
62	fprintf(stderr, "options:\n");
63	fprintf(stderr, "\t\t-B bogus_regexp\n");
64	fprintf(stderr, "\t\t-D damage_limit\n");
65	fprintf(stderr, "\t\t-I ignore_regexp\n");
66	fprintf(stderr, "\t\t-q\n");
67	fprintf(stderr, "\t\t-v\n");
68}
69
70void
71print_stat(FILE *fd, char *pre)
72{
73    fprintf(fd, "%sNames:\n", pre);
74    fprintf(fd, "%s  ignore:  %5lu ref   %5lu target\n",
75	    pre, s1_ignored, s2_ignored);
76    fprintf(fd, "%s  bogus:   %5lu ref   %5lu target\n",
77	    pre, s1_bogus, s2_bogus);
78    fprintf(fd, "%s  wrong:   %5lu ref   %5lu target\n",
79	    pre, s1_wrong, s2_wrong);
80    fprintf(fd, "%sDelta:\n", pre);
81    fprintf(fd, "%s  new:     %5lu dirs  %5lu files  %9lu plus\n",
82	    pre, s_new_dirs, s_new_files, s_new_bytes);
83    fprintf(fd, "%s  del:     %5lu dirs  %5lu files                   %9lu minus\n",
84	    pre, s_del_dirs, s_del_files, s_del_bytes);
85    fprintf(fd, "%s  chg:                 %5lu files  %9lu plus   %9lu minus\n",
86	    pre, s_files_chg, s_bytes_add, s_bytes_del);
87    fprintf(fd, "%s  same:    %5lu dirs  %5lu files  %9lu bytes\n",
88	    pre, s_same_dirs, s_same_files, s_same_bytes);
89    fprintf(fd, "%sMethod:\n", pre);
90    fprintf(fd, "%s  edit:                %5lu files  %9lu bytes  %9lu saved\n",
91	    pre, s_edit_files, s_edit_bytes, s_edit_saves);
92    fprintf(fd, "%s  sub:                 %5lu files  %9lu bytes\n",
93	    pre, s_sub_files, s_sub_bytes);
94
95}
96
97void
98stat_info(int foo)
99{
100	signal(SIGINFO, stat_info);
101	print_stat(stderr, "INFO: ");
102}
103
104void DoDir(const char *dir1, const char *dir2, const char *name);
105
106static struct stat st;
107static __inline struct stat *
108StatFile(char *name)
109{
110	if (lstat(name, &st) < 0)
111		err(1, "couldn't stat %s", name);
112	return &st;
113}
114
115int
116dirselect(struct dirent *de)
117{
118	if (!strcmp(de->d_name, "."))	return 0;
119	if (!strcmp(de->d_name, ".."))	return 0;
120	return 1;
121}
122
123void
124name_stat(const char *pfx, const char *dir, const char *name, struct dirent *de)
125{
126	char *buf = alloca(strlen(dir) + strlen(name) +
127		strlen(de->d_name) + 3);
128	struct stat *st;
129
130	strcpy(buf, dir);
131		strcat(buf, "/"); strcat(buf, name);
132		strcat(buf, "/"); strcat(buf, de->d_name);
133	st = StatFile(buf);
134	printf("%s %s%s %u %u %o",
135	    pfx, name, de->d_name,
136	    st->st_uid, st->st_gid, st->st_mode & ~S_IFMT);
137	fprintf(logf, "%s %s%s\n", pfx, name, de->d_name);
138	if (verbose > 1) {
139		fprintf(stderr, "%s %s%s\n", pfx, name, de->d_name);
140	}
141}
142
143void
144Equ(const char *dir1, const char *dir2, const char *name, struct dirent *de)
145{
146	if (de->d_type == DT_DIR) {
147		char *p = alloca(strlen(name)+strlen(de->d_name)+2);
148
149		strcpy(p, name);  strcat(p, de->d_name); strcat(p, "/");
150		DoDir(dir1, dir2, p);
151		s_same_dirs++;
152	} else {
153		char *buf1 = alloca(strlen(dir1) + strlen(name) +
154			strlen(de->d_name) + 3);
155		char *buf2 = alloca(strlen(dir2) + strlen(name) +
156			strlen(de->d_name) + 3);
157		char *m1, md5_1[33], *m2, md5_2[33];
158		u_char *p1, *p2;
159		int fd1, fd2;
160		struct stat s1, s2;
161
162		strcpy(buf1, dir1);
163			strcat(buf1, "/"); strcat(buf1, name);
164			strcat(buf1, "/"); strcat(buf1, de->d_name);
165		fd1 = open(buf1, O_RDONLY);
166		if(fd1 < 0) { err(3, "%s", buf1); }
167		fstat(fd1, &s1);
168		strcpy(buf2, dir2);
169			strcat(buf2, "/"); strcat(buf2, name);
170			strcat(buf2, "/"); strcat(buf2, de->d_name);
171		fd2 = open(buf2, O_RDONLY);
172		if(fd2 < 0) { err(3, "%s", buf2); }
173		fstat(fd2, &s2);
174#if 0
175		/* XXX if we could just trust the size to change... */
176		if (s1.st_size == s2.st_size) {
177			s_same_files++;
178			s_same_bytes += s1.st_size;
179			close(fd1);
180			close(fd2);
181			goto finish;
182		}
183#endif
184		p1=mmap(0, s1.st_size, PROT_READ, MAP_PRIVATE, fd1, 0);
185		if (p1 == (u_char *)MAP_FAILED) { err(3, "%s", buf1); }
186		close(fd1);
187
188		p2=mmap(0, s2.st_size, PROT_READ, MAP_PRIVATE, fd2, 0);
189		if (p2 == (u_char *)MAP_FAILED) { err(3, "%s", buf2); }
190		close(fd2);
191
192		/* If identical, we're done. */
193		if((s1.st_size == s2.st_size) && !memcmp(p1, p2, s1.st_size)) {
194			s_same_files++;
195			s_same_bytes += s1.st_size;
196			goto finish;
197		}
198
199		s_files_chg++;
200		change++;
201		if (s1.st_size > s2.st_size)
202			s_bytes_del += (s1.st_size - s2.st_size);
203		else
204			s_bytes_add += (s2.st_size - s1.st_size);
205
206		m1 = MD5Data(p1, s1.st_size, md5_1);
207		m2 = MD5Data(p2, s2.st_size, md5_2);
208
209		/* Just a curiosity... */
210		if(!strcmp(m1, m2)) {
211			if (s1.st_size != s2.st_size)
212				fprintf(stderr,
213		"Notice: MD5 same for files of diffent size:\n\t%s\n\t%s\n",
214					buf1, buf2);
215			goto finish;
216		}
217
218		{
219			u_long l = s2.st_size + 2;
220			u_char *cmd = alloca(strlen(buf1)+strlen(buf2)+100);
221			u_char *ob = malloc(l), *p;
222			int j;
223			FILE *F;
224
225			if (s1.st_size && p1[s1.st_size-1] != '\n') {
226				if (verbose > 0)
227					fprintf(stderr,
228					    "last char != \\n in %s\n",
229					     buf1);
230				goto subst;
231			}
232
233			if (s2.st_size && p2[s2.st_size-1] != '\n') {
234				if (verbose > 0)
235					fprintf(stderr,
236					    "last char != \\n in %s\n",
237					     buf2);
238				goto subst;
239			}
240
241			for (p=p1; p<p1+s1.st_size; p++)
242				if (!*p) {
243					if (verbose > 0)
244						fprintf(stderr,
245						    "NULL char in %s\n",
246						     buf1);
247					goto subst;
248				}
249
250			for (p=p2; p<p2+s2.st_size; p++)
251				if (!*p) {
252					if (verbose > 0)
253						fprintf(stderr,
254						    "NULL char in %s\n",
255						     buf2);
256					goto subst;
257				}
258
259			strcpy(cmd, "diff -n ");
260			strcat(cmd, buf1);
261			strcat(cmd, " ");
262			strcat(cmd, buf2);
263			F = popen(cmd, "r");
264			for (j = 1, l = 0; l < s2.st_size; ) {
265				j = fread(ob+l, 1, s2.st_size - l, F);
266				if (j < 1)
267					break;
268				l += j;
269				continue;
270			}
271			if (j) {
272				l = 0;
273				while (EOF != fgetc(F))
274					continue;
275			}
276			pclose(F);
277
278			if (l && l < s2.st_size) {
279				name_stat("CTMFN", dir2, name, de);
280				printf(" %s %s %d\n", m1, m2, (unsigned)l);
281				fwrite(ob, 1, l, stdout);
282				putchar('\n');
283				s_edit_files++;
284				s_edit_bytes += l;
285				s_edit_saves += (s2.st_size - l);
286			} else {
287			subst:
288				name_stat("CTMFS", dir2, name, de);
289				printf(" %s %s %u\n", m1, m2, (unsigned)s2.st_size);
290				fwrite(p2, 1, s2.st_size, stdout);
291				putchar('\n');
292				s_sub_files++;
293				s_sub_bytes += s2.st_size;
294			}
295			free(ob);
296		}
297	    finish:
298		munmap(p1, s1.st_size);
299		munmap(p2, s2.st_size);
300	}
301}
302
303void
304Add(const char *dir1, const char *dir2, const char *name, struct dirent *de)
305{
306	change++;
307	if (de->d_type == DT_DIR) {
308		char *p = alloca(strlen(name)+strlen(de->d_name)+2);
309		strcpy(p, name);  strcat(p, de->d_name); strcat(p, "/");
310		name_stat("CTMDM", dir2, name, de);
311		putchar('\n');
312		s_new_dirs++;
313		DoDir(dir1, dir2, p);
314	} else if (de->d_type == DT_REG) {
315		char *buf2 = alloca(strlen(dir2) + strlen(name) +
316			strlen(de->d_name) + 3);
317		char *m2, md5_2[33];
318		u_char *p1;
319		struct stat st;
320		int fd1;
321
322		strcpy(buf2, dir2);
323			strcat(buf2, "/"); strcat(buf2, name);
324			strcat(buf2, "/"); strcat(buf2, de->d_name);
325		fd1 = open(buf2, O_RDONLY);
326		if (fd1 < 0) { err(3, "%s", buf2); }
327		fstat(fd1, &st);
328		p1=mmap(0, st.st_size, PROT_READ, MAP_PRIVATE, fd1, 0);
329		if (p1 == (u_char *)MAP_FAILED) { err(3, "%s", buf2); }
330		close(fd1);
331		m2 = MD5Data(p1, st.st_size, md5_2);
332		name_stat("CTMFM", dir2, name, de);
333		printf(" %s %u\n", m2, (unsigned)st.st_size);
334		fwrite(p1, 1, st.st_size, stdout);
335		putchar('\n');
336		munmap(p1, st.st_size);
337		s_new_files++;
338		s_new_bytes += st.st_size;
339	}
340}
341
342void
343Del (const char *dir1, const char *dir2, const char *name, struct dirent *de)
344{
345	damage++;
346	change++;
347	if (de->d_type == DT_DIR) {
348		char *p = alloca(strlen(name)+strlen(de->d_name)+2);
349		strcpy(p, name);  strcat(p, de->d_name); strcat(p, "/");
350		DoDir(dir1, dir2, p);
351		printf("CTMDR %s%s\n", name, de->d_name);
352		fprintf(logf, "CTMDR %s%s\n", name, de->d_name);
353		if (verbose > 1) {
354			fprintf(stderr, "CTMDR %s%s\n", name, de->d_name);
355		}
356		s_del_dirs++;
357	} else if (de->d_type == DT_REG) {
358		char *buf1 = alloca(strlen(dir1) + strlen(name) +
359			strlen(de->d_name) + 3);
360		char *m1, md5_1[33];
361		strcpy(buf1, dir1);
362			strcat(buf1, "/"); strcat(buf1, name);
363			strcat(buf1, "/"); strcat(buf1, de->d_name);
364		m1 = MD5File(buf1, md5_1);
365		printf("CTMFR %s%s %s\n", name, de->d_name, m1);
366		fprintf(logf, "CTMFR %s%s %s\n", name, de->d_name, m1);
367		if (verbose > 1) {
368			fprintf(stderr, "CTMFR %s%s\n", name, de->d_name);
369		}
370		s_del_files++;
371		s_del_bytes += StatFile(buf1)->st_size;
372	}
373}
374
375void
376GetNext(int *i, int *n, struct dirent **nl, const char *dir, const char *name, u_long *ignored, u_long *bogus, u_long *wrong)
377{
378	char buf[BUFSIZ];
379	char buf1[BUFSIZ];
380
381	for (;;) {
382		for (;;) {
383			(*i)++;
384			if (*i >= *n)
385				return;
386			strcpy(buf1, name);
387			if (buf1[strlen(buf1)-1] != '/')
388				strcat(buf1, "/");
389			strcat(buf1, nl[*i]->d_name);
390			if (flag_ignore &&
391			    !regexec(&reg_ignore, buf1, 0, 0, 0)) {
392				(*ignored)++;
393				fprintf(logf, "Ignore %s\n", buf1);
394				if (verbose > 2) {
395					fprintf(stderr, "Ignore %s\n", buf1);
396				}
397			} else if (flag_bogus &&
398			    !regexec(&reg_bogus, buf1, 0, 0, 0)) {
399				(*bogus)++;
400				fprintf(logf, "Bogus %s\n", buf1);
401				fprintf(stderr, "Bogus %s\n", buf1);
402				damage++;
403			} else {
404				*buf = 0;
405				if (*dir != '/')
406					strcat(buf, "/");
407				strcat(buf, dir);
408				if (buf[strlen(buf)-1] != '/')
409					strcat(buf, "/");
410				strcat(buf, buf1);
411				break;
412			}
413			free(nl[*i]); nl[*i] = 0;
414		}
415		/* If the filesystem didn't tell us, find type */
416		if (nl[*i]->d_type == DT_UNKNOWN)
417			nl[*i]->d_type = IFTODT(StatFile(buf)->st_mode);
418		if (nl[*i]->d_type == DT_REG || nl[*i]->d_type == DT_DIR)
419			break;
420		(*wrong)++;
421		if (verbose > 0)
422			fprintf(stderr, "Wrong %s\n", buf);
423		free(nl[*i]); nl[*i] = 0;
424	}
425}
426
427void
428DoDir(const char *dir1, const char *dir2, const char *name)
429{
430	int i1, i2, n1, n2, i;
431	struct dirent **nl1, **nl2;
432	char *buf1 = alloca(strlen(dir1) + strlen(name) + 4);
433	char *buf2 = alloca(strlen(dir2) + strlen(name) + 4);
434
435	strcpy(buf1, dir1); strcat(buf1, "/"); strcat(buf1, name);
436	strcpy(buf2, dir2); strcat(buf2, "/"); strcat(buf2, name);
437	n1 = scandir(buf1, &nl1, dirselect, alphasort);
438	n2 = scandir(buf2, &nl2, dirselect, alphasort);
439	i1 = i2 = -1;
440	GetNext(&i1, &n1, nl1, dir1, name, &s1_ignored, &s1_bogus, &s1_wrong);
441	GetNext(&i2, &n2, nl2, dir2, name, &s2_ignored, &s2_bogus, &s2_wrong);
442	for (;i1 < n1 || i2 < n2;) {
443
444		if (damage_limit && damage > damage_limit)
445			break;
446
447		/* Get next item from list 1 */
448		if (i1 < n1 && !nl1[i1])
449			GetNext(&i1, &n1, nl1, dir1, name,
450				&s1_ignored, &s1_bogus, &s1_wrong);
451
452		/* Get next item from list 2 */
453		if (i2 < n2 && !nl2[i2])
454			GetNext(&i2, &n2, nl2, dir2, name,
455				&s2_ignored, &s2_bogus, &s2_wrong);
456
457		if (i1 >= n1 && i2 >= n2) {
458			/* Done */
459			break;
460		} else if (i1 >= n1 && i2 < n2) {
461			/* end of list 1, add anything left on list 2 */
462			Add(dir1, dir2, name, nl2[i2]);
463			free(nl2[i2]); nl2[i2] = 0;
464		} else if (i1 < n1 && i2 >= n2) {
465			/* end of list 2, delete anything left on list 1 */
466			Del(dir1, dir2, name, nl1[i1]);
467			free(nl1[i1]); nl1[i1] = 0;
468		} else if (!(i = strcmp(nl1[i1]->d_name, nl2[i2]->d_name))) {
469			/* Identical names */
470			if (nl1[i1]->d_type == nl2[i2]->d_type) {
471				/* same type */
472				Equ(dir1, dir2, name, nl1[i1]);
473			} else {
474				/* different types */
475				Del(dir1, dir2, name, nl1[i1]);
476				Add(dir1, dir2, name, nl2[i2]);
477			}
478			free(nl1[i1]); nl1[i1] = 0;
479			free(nl2[i2]); nl2[i2] = 0;
480		} else if (i < 0) {
481			/* Something extra in list 1, delete it */
482			Del(dir1, dir2, name, nl1[i1]);
483			free(nl1[i1]); nl1[i1] = 0;
484		} else {
485			/* Something extra in list 2, add it */
486			Add(dir1, dir2, name, nl2[i2]);
487			free(nl2[i2]); nl2[i2] = 0;
488		}
489	}
490	if (n1 >= 0)
491		free(nl1);
492	if (n2 >= 0)
493		free(nl2);
494}
495
496int
497main(int argc, char **argv)
498{
499	int i;
500
501	setbuf(stderr, NULL);
502
503#if 0
504	if (regcomp(&reg_bogus, DEFAULT_BOGUS, REG_EXTENDED | REG_NEWLINE))
505		/* XXX use regerror to explain it */
506		errx(1, "default regular expression argument to -B is botched");
507	flag_bogus = 1;
508
509	if (regcomp(&reg_ignore, DEFAULT_IGNORE, REG_EXTENDED | REG_NEWLINE))
510		/* XXX use regerror to explain it */
511		errx(1, "default regular expression argument to -I is botched");
512	flag_ignore = 1;
513#endif
514
515	while ((i = getopt(argc, argv, "D:I:B:l:qv")) != -1)
516		switch (i) {
517		case 'D':
518			damage_limit = strtol(optarg, 0, 0);
519			if (damage_limit < 0)
520				errx(1, "damage limit must be positive");
521			break;
522		case 'I':
523			if (flag_ignore)
524				regfree(&reg_ignore);
525			flag_ignore = 0;
526			if (!*optarg)
527				break;
528			if (regcomp(&reg_ignore, optarg,
529			    REG_EXTENDED | REG_NEWLINE))
530				/* XXX use regerror to explain it */
531				errx(1, "regular expression argument to -I is botched");
532			flag_ignore = 1;
533			break;
534		case 'B':
535			if (flag_bogus)
536				regfree(&reg_bogus);
537			flag_bogus = 0;
538			if (!*optarg)
539				break;
540			if (regcomp(&reg_bogus, optarg,
541			    REG_EXTENDED | REG_NEWLINE))
542				/* XXX use regerror to explain it */
543				errx(1, "regular expression argument to -B is botched");
544			flag_bogus = 1;
545			break;
546		case 'l':
547			logf = fopen(optarg, "w");
548			if (!logf)
549				err(1, "%s", optarg);
550			setlinebuf(logf);
551			break;
552		case 'q':
553			verbose--;
554			break;
555		case 'v':
556			verbose++;
557			break;
558		case '?':
559		default:
560			Usage();
561			return (1);
562		}
563	argc -= optind;
564	argv += optind;
565
566	if (!logf)
567		logf = fopen(_PATH_DEVNULL, "w");
568
569	setbuf(stdout, 0);
570
571	if (argc != 6) {
572		Usage();
573		return (1);
574	}
575
576	signal(SIGINFO, stat_info);
577
578	fprintf(stderr, "CTM_BEGIN 2.0 %s %s %s %s\n",
579		argv[0], argv[1], argv[2], argv[3]);
580	fprintf(logf, "CTM_BEGIN 2.0 %s %s %s %s\n",
581		argv[0], argv[1], argv[2], argv[3]);
582	printf("CTM_BEGIN 2.0 %s %s %s %s\n",
583		argv[0], argv[1], argv[2], argv[3]);
584	DoDir(argv[4], argv[5], "");
585	if (damage_limit && damage > damage_limit) {
586		print_stat(stderr, "DAMAGE: ");
587		errx(1, "damage of %d would exceed %d files",
588			damage, damage_limit);
589	} else if (change < 2) {
590		errx(4, "no changes");
591	} else {
592		printf("CTM_END ");
593		fprintf(logf, "CTM_END\n");
594		print_stat(stderr, "END: ");
595	}
596	exit(0);
597}
598