1/*-
2 * Copyright (c) 2008-2009, Ulf Lilleengen <lulf@FreeBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29#include <assert.h>
30#include <stdio.h>
31#include <stdlib.h>
32
33#include "misc.h"
34#include "queue.h"
35#include "rcsfile.h"
36#include "rcsparse.h"
37#include "rcstokenizer.h"
38
39/*
40 * This is an RCS-parser using lex for tokenizing and makes sure the RCS syntax
41 * is correct as it constructs an RCS file that is used by csup.
42 */
43
44static void	asserttoken(yyscan_t *, int);
45static int	parse_admin(struct rcsfile *, yyscan_t *);
46static int	parse_deltas(struct rcsfile *, yyscan_t *, int);
47static int	parse_deltatexts(struct rcsfile *, yyscan_t *, int);
48static char	*duptext(yyscan_t *, int *);
49
50struct string {
51	char *str;
52	STAILQ_ENTRY(string) next;
53};
54
55static void
56asserttoken(yyscan_t *sp, int token)
57{
58	int t;
59
60	t = token;
61	t = rcslex(*sp);
62	assert(t == token);
63}
64
65static char *
66duptext(yyscan_t *sp, int *arglen)
67{
68	char *tmp, *val;
69	int len;
70
71	tmp = rcsget_text(*sp);
72	len = rcsget_leng(*sp);
73	val = xmalloc(len + 1);
74	memcpy(val, tmp, len);
75	val[len] = '\0';
76	if (arglen != NULL)
77		*arglen = len;
78	return (val);
79}
80
81/*
82 * Start up parser, and use the rcsfile hook to add objects.
83 */
84int
85rcsparse_run(struct rcsfile *rf, FILE *infp, int ro)
86{
87	yyscan_t scanner;
88	char *desc;
89	int error, tok;
90
91	error = 0;
92	rcslex_init(&scanner);
93	rcsset_in(infp, scanner);
94	tok = parse_admin(rf, &scanner);
95	tok = parse_deltas(rf, &scanner, tok);
96	assert(tok == KEYWORD);
97	asserttoken(&scanner, STRING);
98	desc = duptext(&scanner, NULL);
99	rcsfile_setval(rf, RCSFILE_DESC, desc);
100	free(desc);
101	tok = rcslex(scanner);
102	/* Parse deltatexts if we need to edit. */
103	if (!ro) {
104		error = parse_deltatexts(rf, &scanner, tok);
105		if (error)
106			return (error);
107	}
108	rcslex_destroy(scanner);
109	return (0);
110}
111
112/*
113 * Parse the admin part of a RCS file.
114 */
115static int
116parse_admin(struct rcsfile *rf, yyscan_t *sp)
117{
118	char *branch, *comment, *expand, *head, *id, *revnum, *tag, *tmp;
119	int strict, token;
120
121	strict = 0;
122	branch = NULL;
123
124	/* head {num}; */
125	asserttoken(sp, KEYWORD);
126	asserttoken(sp, NUM);
127	head = duptext(sp, NULL);
128	rcsfile_setval(rf, RCSFILE_HEAD, head);
129	free(head);
130	asserttoken(sp, SEMIC);
131
132	/* { branch {num}; } */
133	token = rcslex(*sp);
134	if (token == KEYWORD_TWO) {
135		asserttoken(sp, NUM);
136		branch = duptext(sp, NULL);
137		rcsfile_setval(rf, RCSFILE_BRANCH, branch);
138		free(branch);
139		asserttoken(sp, SEMIC);
140		token = rcslex(*sp);
141	}
142
143	/* access {id]*; */
144	assert(token == KEYWORD);
145	token = rcslex(*sp);
146	while (token == ID) {
147		id = duptext(sp, NULL);
148		rcsfile_addaccess(rf, id);
149		free(id);
150		token = rcslex(*sp);
151	}
152	assert(token == SEMIC);
153
154	/* symbols {sym : num}*; */
155	asserttoken(sp, KEYWORD);
156	token = rcslex(*sp);
157	while (token == ID) {
158		tag = duptext(sp, NULL);
159		asserttoken(sp, COLON);
160		asserttoken(sp, NUM);
161		revnum = duptext(sp, NULL);
162		rcsfile_importtag(rf, tag, revnum);
163		free(tag);
164		free(revnum);
165		token = rcslex(*sp);
166	}
167	assert(token == SEMIC);
168
169	/* locks {id : num}*; */
170	asserttoken(sp, KEYWORD);
171	token = rcslex(*sp);
172	while (token == ID) {
173		/* XXX: locks field is skipped */
174		asserttoken(sp, COLON);
175		asserttoken(sp, NUM);
176		token = rcslex(*sp);
177	}
178	assert(token == SEMIC);
179	token = rcslex(*sp);
180	while (token == KEYWORD) {
181		tmp = rcsget_text(*sp);
182
183		/* {strict  ;} */
184		if (!strcmp(tmp, "strict")) {
185			rcsfile_setval(rf, RCSFILE_STRICT, tmp);
186			asserttoken(sp, SEMIC);
187		/* { comment {string}; } */
188		} else if (!strcmp(tmp, "comment")) {
189			token = rcslex(*sp);
190			if (token == STRING) {
191				comment = duptext(sp, NULL);
192				rcsfile_setval(rf, RCSFILE_COMMENT, comment);
193				free(comment);
194			}
195			asserttoken(sp, SEMIC);
196		/* { expand {string}; } */
197		} else if (!strcmp(tmp, "expand")) {
198			token = rcslex(*sp);
199			if (token == STRING) {
200				expand = duptext(sp, NULL);
201				rcsfile_setval(rf, RCSFILE_EXPAND, expand);
202				free(expand);
203			}
204			asserttoken(sp, SEMIC);
205		}
206		/* {newphrase }* */
207		token = rcslex(*sp);
208		while (token == ID) {
209			token = rcslex(*sp);
210			/* XXX: newphrases ignored */
211			while (token == ID || token == NUM || token == STRING ||
212			    token == COLON) {
213				token = rcslex(*sp);
214			}
215			asserttoken(sp, SEMIC);
216			token = rcslex(*sp);
217		}
218	}
219	return (token);
220}
221
222/*
223 * Parse RCS deltas.
224 */
225static int
226parse_deltas(struct rcsfile *rf, yyscan_t *sp, int token)
227{
228	STAILQ_HEAD(, string) branchlist;
229	char *revnum, *revdate, *author, *state, *next;
230
231	/* In case we don't have deltas. */
232	if (token != NUM)
233		return (token);
234	do {
235		next = NULL;
236		state = NULL;
237
238		/* num */
239		assert(token == NUM);
240		revnum = duptext(sp, NULL);
241		/* date num; */
242		asserttoken(sp, KEYWORD);
243		asserttoken(sp, NUM);
244		revdate = duptext(sp, NULL);
245		asserttoken(sp, SEMIC);
246		/* author id; */
247		asserttoken(sp, KEYWORD);
248		asserttoken(sp, ID);
249		author = duptext(sp, NULL);
250		asserttoken(sp, SEMIC);
251		/* state {id}; */
252		asserttoken(sp, KEYWORD);
253		token = rcslex(*sp);
254		if (token == ID) {
255			state = duptext(sp, NULL);
256			token = rcslex(*sp);
257		}
258		assert(token == SEMIC);
259		/* branches {num}*; */
260		asserttoken(sp, KEYWORD);
261		token = rcslex(*sp);
262		STAILQ_INIT(&branchlist);
263		while (token == NUM)
264			token = rcslex(*sp);
265		assert(token == SEMIC);
266		/* next {num}; */
267		asserttoken(sp, KEYWORD);
268		token = rcslex(*sp);
269		if (token == NUM) {
270			next = duptext(sp, NULL);
271			token = rcslex(*sp);
272		}
273		assert(token == SEMIC);
274		/* {newphrase }* */
275		token = rcslex(*sp);
276		while (token == ID) {
277			token = rcslex(*sp);
278			/* XXX: newphrases ignored. */
279			while (token == ID || token == NUM || token == STRING ||
280			    token == COLON) {
281				token = rcslex(*sp);
282			}
283			asserttoken(sp, SEMIC);
284			token = rcslex(*sp);
285		}
286		rcsfile_importdelta(rf, revnum, revdate, author, state, next);
287		free(revnum);
288		free(revdate);
289		free(author);
290		if (state != NULL)
291			free(state);
292		if (next != NULL)
293			free(next);
294	} while (token == NUM);
295
296	return (token);
297}
298
299/*
300 * Parse RCS deltatexts.
301 */
302static int
303parse_deltatexts(struct rcsfile *rf, yyscan_t *sp, int token)
304{
305	struct delta *d;
306	char *log, *revnum, *text;
307	int error, len;
308
309	error = 0;
310	/* In case we don't have deltatexts. */
311	if (token != NUM)
312		return (-1);
313	do {
314		/* num */
315		assert(token == NUM);
316		revnum = duptext(sp, NULL);
317		/* Get delta we're adding text to. */
318		d = rcsfile_getdelta(rf, revnum);
319		free(revnum);
320
321		/*
322		 * XXX: The RCS file is corrupt, but lie and say it is ok.
323		 * If it is actually broken, then the MD5 mismatch will
324		 * trigger a fixup.
325		 */
326		if (d == NULL)
327			return (0);
328
329		/* log string */
330		asserttoken(sp, KEYWORD);
331		asserttoken(sp, STRING);
332		log = duptext(sp, &len);
333		error = rcsdelta_addlog(d, log, len);
334		free(log);
335		if (error)
336			return (-1);
337		/* { newphrase }* */
338		token = rcslex(*sp);
339		while (token == ID) {
340			token = rcslex(*sp);
341			/* XXX: newphrases ignored. */
342			while (token == ID || token == NUM || token == STRING ||
343			    token == COLON) {
344				token = rcslex(*sp);
345			}
346			asserttoken(sp, SEMIC);
347			token = rcslex(*sp);
348		}
349		/* text string */
350		assert(token == KEYWORD);
351		asserttoken(sp, STRING);
352		text = duptext(sp, &len);
353		error = rcsdelta_addtext(d, text, len);
354		/*
355		 * If this happens, something is wrong with the RCS file, and it
356		 * should be resent.
357		 */
358		free(text);
359		if (error)
360			return (-1);
361		token = rcslex(*sp);
362	} while (token == NUM);
363
364	return (0);
365}
366