1/*-
2 * Copyright (c) 2002 Poul-Henning Kamp
3 * Copyright (c) 2002 Networks Associates Technology, Inc.
4 * All rights reserved.
5 *
6 * This software was developed for the FreeBSD Project by Poul-Henning Kamp
7 * and NAI Labs, the Security Research Division of Network Associates, Inc.
8 * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
9 * DARPA CHATS research program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. The names of the authors may not be used to endorse or promote
20 *    products derived from this software without specific prior written
21 *    permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * $FreeBSD$
36 */
37
38#include <stdio.h>
39#include <stdlib.h>
40#include <string.h>
41#include <unistd.h>
42#include <fcntl.h>
43#include <ctype.h>
44#include <sys/stat.h>
45#include <sys/mman.h>
46#include <sys/queue.h>
47#include <sys/sbuf.h>
48#include <err.h>
49#include <bsdxml.h>
50
51FILE *fsubs;
52
53struct node {
54	LIST_HEAD(, node)	children;
55	LIST_ENTRY(node)	siblings;
56	struct node		*parent;
57	const char		*name;
58	struct sbuf		*cont;
59	struct sbuf		*key;
60	char			*id;
61	char			*ref;
62};
63
64struct mytree {
65	struct node		*top;
66	struct node		*cur;
67	int			indent;
68	int			ignore;
69};
70
71struct ref {
72	LIST_ENTRY(ref)		next;
73	char 			*k1;
74	char			*k2;
75};
76
77LIST_HEAD(, ref)		refs = LIST_HEAD_INITIALIZER(refs);
78
79static struct node *
80new_node(void)
81{
82	struct node *np;
83
84	np = calloc(1, sizeof *np);
85	np->cont = sbuf_new_auto();
86	sbuf_clear(np->cont);
87	np->key = sbuf_new_auto();
88	sbuf_clear(np->key);
89	LIST_INIT(&np->children);
90	return (np);
91}
92
93static void
94indent(int n)
95{
96
97	printf("%*.*s", n, n, "");
98}
99
100static void
101StartElement(void *userData, const char *name, const char **attr)
102{
103	struct mytree *mt;
104	struct node *np;
105	int i;
106
107	mt = userData;
108	if (!strcmp(name, "FreeBSD")) {
109		mt->ignore = 1;
110		return;
111	}
112	mt->ignore = 0;
113	mt->indent += 2;
114	np = new_node();
115	for (i = 0; attr[i]; i += 2) {
116		if (!strcmp(attr[i], "id"))
117			np->id = strdup(attr[i+1]);
118		else if (!strcmp(attr[i], "ref"))
119			np->ref = strdup(attr[i+1]);
120	}
121	np->name = strdup(name);
122	sbuf_cat(np->key, name);
123	sbuf_cat(np->key, "::");
124	np->parent = mt->cur;
125	LIST_INSERT_HEAD(&mt->cur->children, np, siblings);
126	mt->cur = np;
127}
128
129static void
130EndElement(void *userData, const char *name __unused)
131{
132	struct mytree *mt;
133	struct node *np;
134
135	mt = userData;
136	if (mt->ignore)
137		return;
138
139	mt->indent -= 2;
140	sbuf_finish(mt->cur->cont);
141	LIST_FOREACH(np, &mt->cur->children, siblings) {
142		if (strcmp(np->name, "name"))
143			continue;
144		sbuf_cat(mt->cur->key, sbuf_data(np->cont));
145		break;
146	}
147	sbuf_finish(mt->cur->key);
148	mt->cur = mt->cur->parent;
149}
150
151static void
152CharData(void *userData , const XML_Char *s , int len)
153{
154	struct mytree *mt;
155	const char *b, *e;
156
157	mt = userData;
158	if (mt->ignore)
159		return;
160	b = s;
161	e = s + len - 1;
162	while (isspace(*b) && b < e)
163		b++;
164	while (isspace(*e) && e > b)
165		e--;
166	if (e != b || *b)
167		sbuf_bcat(mt->cur->cont, b, e - b + 1);
168}
169
170static struct mytree *
171dofile(char *filename)
172{
173	XML_Parser parser;
174	struct mytree *mt;
175	struct stat st;
176	int fd;
177	char *p;
178	int i;
179
180	parser = XML_ParserCreate(NULL);
181	mt = calloc(1, sizeof *mt);
182	mt->top = new_node();
183	mt->top->name = "(top)";
184	mt->top->parent = mt->top;
185	mt->cur = mt->top;
186	sbuf_finish(mt->top->key);
187	sbuf_finish(mt->top->cont);
188	XML_SetUserData(parser, mt);
189	XML_SetElementHandler(parser, StartElement, EndElement);
190	XML_SetCharacterDataHandler(parser, CharData);
191	fd = open(filename, O_RDONLY);
192	if (fd < 0)
193		err(1, filename);
194	fstat(fd, &st);
195	p = mmap(NULL, st.st_size, PROT_READ, MAP_NOCORE|MAP_PRIVATE, fd, 0);
196	i = XML_Parse(parser, p, st.st_size, 1);
197	if (i != 1)
198		errx(1, "XML_Parse complained -> %d", i);
199	munmap(p, st.st_size);
200	close(fd);
201	XML_ParserFree(parser);
202	sbuf_finish(mt->top->cont);
203	if (i)
204		return (mt);
205	else
206		return (NULL);
207}
208
209static void
210print_node(struct node *np)
211{
212	printf("\"%s\" -- \"%s\" -- \"%s\"", np->name, sbuf_data(np->cont), sbuf_data(np->key));
213	if (np->id)
214		printf(" id=\"%s\"", np->id);
215	if (np->ref)
216		printf(" ref=\"%s\"", np->ref);
217	printf("\n");
218}
219
220static void
221print_tree(struct node *np, int n)
222{
223	struct node *np1;
224
225	indent(n); printf("%s id=%s ref=%s\n", np->name, np->id, np->ref);
226	LIST_FOREACH(np1, &np->children, siblings)
227		print_tree(np1, n + 2);
228}
229
230static void
231sort_node(struct node *np)
232{
233	struct node *np1, *np2;
234	int n;
235
236	LIST_FOREACH(np1, &np->children, siblings)
237		sort_node(np1);
238	do {
239		np1 = LIST_FIRST(&np->children);
240		n = 0;
241		for (;;) {
242			if (np1 == NULL)
243				return;
244			np2 = LIST_NEXT(np1, siblings);
245			if (np2 == NULL)
246				return;
247			if (strcmp(sbuf_data(np1->key), sbuf_data(np2->key)) > 0) {
248				LIST_REMOVE(np2, siblings);
249				LIST_INSERT_BEFORE(np1, np2, siblings);
250				n++;
251				break;
252			}
253			np1 = np2;
254		}
255	} while (n);
256}
257
258static int
259refcmp(char *r1, char *r2)
260{
261	struct ref *r;
262
263	LIST_FOREACH(r, &refs, next) {
264		if (!strcmp(r1, r->k1))
265			return (strcmp(r2, r->k2));
266	}
267	r = calloc(1, sizeof(*r));
268	r->k1 = strdup(r1);
269	r->k2 = strdup(r2);
270	LIST_INSERT_HEAD(&refs, r, next);
271	if (fsubs != NULL) {
272		fprintf(fsubs, "s/%s/%s/g\n", r1, r2);
273		fflush(fsubs);
274	}
275	return (0);
276}
277
278static int compare_node2(struct node *n1, struct node *n2, int in);
279
280static int
281compare_node(struct node *n1, struct node *n2, int in)
282{
283	int i;
284	struct node *n1a, *n2a;
285
286	i = strcmp(n1->name, n2->name);
287	if (i)
288		return (i);
289	if (n1->id && n2->id)
290		i = refcmp(n1->id, n2->id);
291	else if (n1->id || n2->id)
292		i = -1;
293	if (i)
294		return (i);
295	if (n1->ref && n2->ref)
296		i = refcmp(n1->ref, n2->ref);
297	else if (n1->ref || n2->ref)
298		i = -1;
299	if (i)
300		return (i);
301	if (!strcmp(n1->name, "ref"))
302		i = refcmp(sbuf_data(n1->cont), sbuf_data(n2->cont));
303	else
304		i = strcmp(sbuf_data(n1->cont), sbuf_data(n2->cont));
305	if (i)
306		return (1);
307	n1a = LIST_FIRST(&n1->children);
308	n2a = LIST_FIRST(&n2->children);
309	for (;;) {
310		if (n1a == NULL && n2a == NULL)
311			return (0);
312		if (n1a != NULL && n2a == NULL) {
313			printf("1>");
314			indent(in);
315			print_node(n1a);
316			printf("2>\n");
317			return (1);
318		}
319		if (n1a == NULL && n2a != NULL) {
320			printf("1>\n");
321			printf("2>");
322			indent(in);
323			print_node(n2a);
324			return (1);
325		}
326		i = compare_node2(n1a, n2a, in + 2);
327		if (i)
328			return (1);
329		n1a = LIST_NEXT(n1a, siblings);
330		n2a = LIST_NEXT(n2a, siblings);
331	}
332	return (0);
333}
334
335static int
336compare_node2(struct node *n1, struct node *n2, int in)
337{
338	int i;
339
340	i = compare_node(n1, n2, in);
341	if (i) {
342		printf("1>");
343		indent(in);
344		print_node(n1);
345		printf("2>");
346		indent(in);
347		print_node(n2);
348	}
349	return (i);
350}
351
352
353
354int
355main(int argc, char **argv)
356{
357	struct mytree *t1, *t2;
358	int i;
359
360	fsubs = fopen("_.subs", "w");
361	setbuf(stdout, NULL);
362	setbuf(stderr, NULL);
363	if (argc != 3)
364		errx(1, "usage: %s file1 file2", argv[0]);
365
366	t1 = dofile(argv[1]);
367	if (t1 == NULL)
368		errx(2, "XML parser error on file %s", argv[1]);
369	sort_node(t1->top);
370	t2 = dofile(argv[2]);
371	if (t2 == NULL)
372		errx(2, "XML parser error on file %s", argv[2]);
373	sort_node(t2->top);
374	i = compare_node(t1->top, t2->top, 0);
375	return (i);
376}
377
378