1/* RCS file syntactic analysis */
2
3/******************************************************************************
4 *                       Syntax Analysis.
5 *                       Keyword table
6 *                       Testprogram: define SYNTEST
7 *                       Compatibility with Release 2: define COMPAT2=1
8 ******************************************************************************
9 */
10
11/* Copyright 1982, 1988, 1989 Walter Tichy
12   Copyright 1990, 1991, 1992, 1993, 1994, 1995 Paul Eggert
13   Distributed under license by the Free Software Foundation, Inc.
14
15This file is part of RCS.
16
17RCS is free software; you can redistribute it and/or modify
18it under the terms of the GNU General Public License as published by
19the Free Software Foundation; either version 2, or (at your option)
20any later version.
21
22RCS is distributed in the hope that it will be useful,
23but WITHOUT ANY WARRANTY; without even the implied warranty of
24MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25GNU General Public License for more details.
26
27You should have received a copy of the GNU General Public License
28along with RCS; see the file COPYING.
29If not, write to the Free Software Foundation,
3059 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
31
32Report problems and direct all questions to:
33
34    rcs-bugs@cs.purdue.edu
35
36*/
37
38/*
39 * Revision 5.15  1995/06/16 06:19:24  eggert
40 * Update FSF address.
41 *
42 * Revision 5.14  1995/06/01 16:23:43  eggert
43 * (expand_names): Add "b" for -kb.
44 * (getdelta): Don't strip leading "19" from MKS RCS dates; see cmpdate.
45 *
46 * Revision 5.13  1994/03/20 04:52:58  eggert
47 * Remove lint.
48 *
49 * Revision 5.12  1993/11/03 17:42:27  eggert
50 * Parse MKS RCS dates; ignore \r in diff control lines.
51 * Don't discard ignored phrases.  Improve quality of diagnostics.
52 *
53 * Revision 5.11  1992/07/28  16:12:44  eggert
54 * Avoid `unsigned'.  Statement macro names now end in _.
55 *
56 * Revision 5.10  1992/01/24  18:44:19  eggert
57 * Move put routines to rcsgen.c.
58 *
59 * Revision 5.9  1992/01/06  02:42:34  eggert
60 * ULONG_MAX/10 -> ULONG_MAX_OVER_10
61 * while (E) ; -> while (E) continue;
62 *
63 * Revision 5.8  1991/08/19  03:13:55  eggert
64 * Tune.
65 *
66 * Revision 5.7  1991/04/21  11:58:29  eggert
67 * Disambiguate names on shortname hosts.
68 * Fix errno bug.  Add MS-DOS support.
69 *
70 * Revision 5.6  1991/02/28  19:18:51  eggert
71 * Fix null termination bug in reporting keyword expansion.
72 *
73 * Revision 5.5  1991/02/25  07:12:44  eggert
74 * Check diff output more carefully; avoid overflow.
75 *
76 * Revision 5.4  1990/11/01  05:28:48  eggert
77 * When ignoring unknown phrases, copy them to the output RCS file.
78 * Permit arbitrary data in logs and comment leaders.
79 * Don't check for nontext on initial checkin.
80 *
81 * Revision 5.3  1990/09/20  07:58:32  eggert
82 * Remove the test for non-text bytes; it caused more pain than it cured.
83 *
84 * Revision 5.2  1990/09/04  08:02:30  eggert
85 * Parse RCS files with no revisions.
86 * Don't strip leading white space from diff commands.  Count RCS lines better.
87 *
88 * Revision 5.1  1990/08/29  07:14:06  eggert
89 * Add -kkvl.  Clean old log messages too.
90 *
91 * Revision 5.0  1990/08/22  08:13:44  eggert
92 * Try to parse future RCS formats without barfing.
93 * Add -k.  Don't require final newline.
94 * Remove compile-time limits; use malloc instead.
95 * Don't output branch keyword if there's no default branch,
96 * because RCS version 3 doesn't understand it.
97 * Tune.  Remove lint.
98 * Add support for ISO 8859.  Ansify and Posixate.
99 * Check that a newly checked-in file is acceptable as input to 'diff'.
100 * Check diff's output.
101 *
102 * Revision 4.6  89/05/01  15:13:32  narten
103 * changed copyright header to reflect current distribution rules
104 *
105 * Revision 4.5  88/08/09  19:13:21  eggert
106 * Allow cc -R; remove lint.
107 *
108 * Revision 4.4  87/12/18  11:46:16  narten
109 * more lint cleanups (Guy Harris)
110 *
111 * Revision 4.3  87/10/18  10:39:36  narten
112 * Updating version numbers. Changes relative to 1.1 actually relative to
113 * 4.1
114 *
115 * Revision 1.3  87/09/24  14:00:49  narten
116 * Sources now pass through lint (if you ignore printf/sprintf/fprintf
117 * warnings)
118 *
119 * Revision 1.2  87/03/27  14:22:40  jenkins
120 * Port to suns
121 *
122 * Revision 4.1  83/03/28  11:38:49  wft
123 * Added parsing and printing of default branch.
124 *
125 * Revision 3.6  83/01/15  17:46:50  wft
126 * Changed readdelta() to initialize selector and log-pointer.
127 * Changed puttree to check for selector==DELETE; putdtext() uses DELNUMFORM.
128 *
129 * Revision 3.5  82/12/08  21:58:58  wft
130 * renamed Commentleader to Commleader.
131 *
132 * Revision 3.4  82/12/04  13:24:40  wft
133 * Added routine gettree(), which updates keeplock after reading the
134 * delta tree.
135 *
136 * Revision 3.3  82/11/28  21:30:11  wft
137 * Reading and printing of Suffix removed; version COMPAT2 skips the
138 * Suffix for files of release 2 format. Fixed problems with printing nil.
139 *
140 * Revision 3.2  82/10/18  21:18:25  wft
141 * renamed putdeltatext to putdtext.
142 *
143 * Revision 3.1  82/10/11  19:45:11  wft
144 * made sure getc() returns into an integer.
145 */
146
147
148
149/* version COMPAT2 reads files of the format of release 2 and 3, but
150 * generates files of release 3 format. Need not be defined if no
151 * old RCS files generated with release 2 exist.
152 */
153
154#include "rcsbase.h"
155
156libId(synId, "$FreeBSD$")
157
158static char const *getkeyval P((char const*,enum tokens,int));
159static int getdelta P((void));
160static int strn2expmode P((char const*,size_t));
161static struct hshentry *getdnum P((void));
162static void badDiffOutput P((char const*)) exiting;
163static void diffLineNumberTooLarge P((char const*)) exiting;
164static void getsemi P((char const*));
165
166/* keyword table */
167
168char const
169	Kaccess[]   = "access",
170	Kauthor[]   = "author",
171	Kbranch[]   = "branch",
172	Kcomment[]  = "comment",
173	Kdate[]     = "date",
174	Kdesc[]     = "desc",
175	Kexpand[]   = "expand",
176	Khead[]     = "head",
177	Klocks[]    = "locks",
178	Klog[]      = "log",
179	Knext[]     = "next",
180	Kstate[]    = "state",
181	Kstrict[]   = "strict",
182	Ksymbols[]  = "symbols",
183	Ktext[]     = "text";
184
185static char const
186#if COMPAT2
187	Ksuffix[]   = "suffix",
188#endif
189	K_branches[]= "branches";
190
191static struct buf Commleader;
192struct cbuf Comment;
193struct cbuf Ignored;
194struct access   * AccessList;
195struct assoc    * Symbols;
196struct rcslock *Locks;
197int		  Expand;
198int               StrictLocks;
199struct hshentry * Head;
200char const      * Dbranch;
201int TotalDeltas;
202
203
204	static void
205getsemi(key)
206	char const *key;
207/* Get a semicolon to finish off a phrase started by KEY.  */
208{
209	if (!getlex(SEMI))
210		fatserror("missing ';' after '%s'", key);
211}
212
213	static struct hshentry *
214getdnum()
215/* Get a delta number.  */
216{
217	register struct hshentry *delta = getnum();
218	if (delta && countnumflds(delta->num)&1)
219		fatserror("%s isn't a delta number", delta->num);
220	return delta;
221}
222
223
224	void
225getadmin()
226/* Read an <admin> and initialize the appropriate global variables.  */
227{
228	register char const *id;
229        struct access   * newaccess;
230        struct assoc    * newassoc;
231	struct rcslock *newlock;
232        struct hshentry * delta;
233	struct access **LastAccess;
234	struct assoc **LastSymbol;
235	struct rcslock **LastLock;
236	struct buf b;
237	struct cbuf cb;
238
239        TotalDeltas=0;
240
241	getkey(Khead);
242	Head = getdnum();
243	getsemi(Khead);
244
245	Dbranch = 0;
246	if (getkeyopt(Kbranch)) {
247		if ((delta = getnum()))
248			Dbranch = delta->num;
249		getsemi(Kbranch);
250        }
251
252
253#if COMPAT2
254        /* read suffix. Only in release 2 format */
255	if (getkeyopt(Ksuffix)) {
256                if (nexttok==STRING) {
257			readstring(); nextlex(); /* Throw away the suffix.  */
258		} else if (nexttok==ID) {
259                        nextlex();
260                }
261		getsemi(Ksuffix);
262        }
263#endif
264
265	getkey(Kaccess);
266	LastAccess = &AccessList;
267	while ((id = getid())) {
268		newaccess = ftalloc(struct access);
269                newaccess->login = id;
270		*LastAccess = newaccess;
271		LastAccess = &newaccess->nextaccess;
272        }
273	*LastAccess = 0;
274	getsemi(Kaccess);
275
276	getkey(Ksymbols);
277	LastSymbol = &Symbols;
278        while ((id = getid())) {
279                if (!getlex(COLON))
280			fatserror("missing ':' in symbolic name definition");
281                if (!(delta=getnum())) {
282			fatserror("missing number in symbolic name definition");
283                } else { /*add new pair to association list*/
284			newassoc = ftalloc(struct assoc);
285                        newassoc->symbol=id;
286			newassoc->num = delta->num;
287			*LastSymbol = newassoc;
288			LastSymbol = &newassoc->nextassoc;
289                }
290        }
291	*LastSymbol = 0;
292	getsemi(Ksymbols);
293
294	getkey(Klocks);
295	LastLock = &Locks;
296        while ((id = getid())) {
297                if (!getlex(COLON))
298			fatserror("missing ':' in lock");
299		if (!(delta=getdnum())) {
300			fatserror("missing number in lock");
301                } else { /*add new pair to lock list*/
302			newlock = ftalloc(struct rcslock);
303                        newlock->login=id;
304                        newlock->delta=delta;
305			*LastLock = newlock;
306			LastLock = &newlock->nextlock;
307                }
308        }
309	*LastLock = 0;
310	getsemi(Klocks);
311
312	if ((StrictLocks = getkeyopt(Kstrict)))
313		getsemi(Kstrict);
314
315	clear_buf(&Comment);
316	if (getkeyopt(Kcomment)) {
317		if (nexttok==STRING) {
318			Comment = savestring(&Commleader);
319			nextlex();
320		}
321		getsemi(Kcomment);
322        }
323
324	Expand = KEYVAL_EXPAND;
325	if (getkeyopt(Kexpand)) {
326		if (nexttok==STRING) {
327			bufautobegin(&b);
328			cb = savestring(&b);
329			if ((Expand = strn2expmode(cb.string,cb.size)) < 0)
330			    fatserror("unknown expand mode %.*s",
331				(int)cb.size, cb.string
332			    );
333			bufautoend(&b);
334			nextlex();
335		}
336		getsemi(Kexpand);
337        }
338	Ignored = getphrases(Kdesc);
339}
340
341char const *const expand_names[] = {
342	/* These must agree with *_EXPAND in rcsbase.h.  */
343	"kv", "kvl", "k", "v", "o", "b",
344	0
345};
346
347	int
348str2expmode(s)
349	char const *s;
350/* Yield expand mode corresponding to S, or -1 if bad.  */
351{
352	return strn2expmode(s, strlen(s));
353}
354
355	static int
356strn2expmode(s, n)
357	char const *s;
358	size_t n;
359{
360	char const *const *p;
361
362	for (p = expand_names;  *p;  ++p)
363		if (memcmp(*p,s,n) == 0  &&  !(*p)[n])
364			return p - expand_names;
365	return -1;
366}
367
368
369	void
370ignorephrases(key)
371	const char *key;
372/*
373* Ignore a series of phrases that do not start with KEY.
374* Stop when the next phrase starts with a token that is not an identifier,
375* or is KEY.
376*/
377{
378	for (;;) {
379		nextlex();
380		if (nexttok != ID  ||  strcmp(NextString,key) == 0)
381			break;
382		warnignore();
383		hshenter=false;
384		for (;; nextlex()) {
385			switch (nexttok) {
386				case SEMI: hshenter=true; break;
387				case ID:
388				case NUM: ffree1(NextString); continue;
389				case STRING: readstring(); continue;
390				default: continue;
391			}
392			break;
393		}
394	}
395}
396
397
398	static int
399getdelta()
400/* Function: reads a delta block.
401 * returns false if the current block does not start with a number.
402 */
403{
404        register struct hshentry * Delta, * num;
405	struct branchhead **LastBranch, *NewBranch;
406
407	if (!(Delta = getdnum()))
408		return false;
409
410        hshenter = false; /*Don't enter dates into hashtable*/
411	Delta->date = getkeyval(Kdate, NUM, false);
412        hshenter=true;    /*reset hshenter for revision numbers.*/
413
414        Delta->author = getkeyval(Kauthor, ID, false);
415
416        Delta->state = getkeyval(Kstate, ID, true);
417
418	getkey(K_branches);
419	LastBranch = &Delta->branches;
420	while ((num = getdnum())) {
421		NewBranch = ftalloc(struct branchhead);
422                NewBranch->hsh = num;
423		*LastBranch = NewBranch;
424		LastBranch = &NewBranch->nextbranch;
425        }
426	*LastBranch = 0;
427	getsemi(K_branches);
428
429	getkey(Knext);
430	Delta->next = num = getdnum();
431	getsemi(Knext);
432	Delta->lockedby = 0;
433	Delta->log.string = 0;
434	Delta->selector = true;
435	Delta->ig = getphrases(Kdesc);
436        TotalDeltas++;
437        return (true);
438}
439
440
441	void
442gettree()
443/* Function: Reads in the delta tree with getdelta(), then
444 * updates the lockedby fields.
445 */
446{
447	struct rcslock const *currlock;
448
449	while (getdelta())
450		continue;
451        currlock=Locks;
452        while (currlock) {
453                currlock->delta->lockedby = currlock->login;
454                currlock = currlock->nextlock;
455        }
456}
457
458
459	void
460getdesc(prdesc)
461int  prdesc;
462/* Function: read in descriptive text
463 * nexttok is not advanced afterwards.
464 * If prdesc is set, the text is printed to stdout.
465 */
466{
467
468	getkeystring(Kdesc);
469        if (prdesc)
470                printstring();  /*echo string*/
471        else    readstring();   /*skip string*/
472}
473
474
475
476
477
478
479	static char const *
480getkeyval(keyword, token, optional)
481	char const *keyword;
482	enum tokens token;
483	int optional;
484/* reads a pair of the form
485 * <keyword> <token> ;
486 * where token is one of <id> or <num>. optional indicates whether
487 * <token> is optional. A pointer to
488 * the actual character string of <id> or <num> is returned.
489 */
490{
491	register char const *val = 0;
492
493	getkey(keyword);
494        if (nexttok==token) {
495                val = NextString;
496                nextlex();
497        } else {
498		if (!optional)
499			fatserror("missing %s", keyword);
500        }
501	getsemi(keyword);
502        return(val);
503}
504
505
506	void
507unexpected_EOF()
508{
509	rcsfaterror("unexpected EOF in diff output");
510}
511
512	void
513initdiffcmd(dc)
514	register struct diffcmd *dc;
515/* Initialize *dc suitably for getdiffcmd(). */
516{
517	dc->adprev = 0;
518	dc->dafter = 0;
519}
520
521	static void
522badDiffOutput(buf)
523	char const *buf;
524{
525	rcsfaterror("bad diff output line: %s", buf);
526}
527
528	static void
529diffLineNumberTooLarge(buf)
530	char const *buf;
531{
532	rcsfaterror("diff line number too large: %s", buf);
533}
534
535	int
536getdiffcmd(finfile, delimiter, foutfile, dc)
537	RILE *finfile;
538	FILE *foutfile;
539	int delimiter;
540	struct diffcmd *dc;
541/* Get a editing command output by 'diff -n' from fin.
542 * The input is delimited by SDELIM if delimiter is set, EOF otherwise.
543 * Copy a clean version of the command to fout (if nonnull).
544 * Yield 0 for 'd', 1 for 'a', and -1 for EOF.
545 * Store the command's line number and length into dc->line1 and dc->nlines.
546 * Keep dc->adprev and dc->dafter up to date.
547 */
548{
549	register int c;
550	declarecache;
551	register FILE *fout;
552	register char *p;
553	register RILE *fin;
554	long line1, nlines, t;
555	char buf[BUFSIZ];
556
557	fin = finfile;
558	fout = foutfile;
559	setupcache(fin); cache(fin);
560	cachegeteof_(c, { if (delimiter) unexpected_EOF(); return -1; } )
561	if (delimiter) {
562		if (c==SDELIM) {
563			cacheget_(c)
564			if (c==SDELIM) {
565				buf[0] = c;
566				buf[1] = 0;
567				badDiffOutput(buf);
568			}
569			uncache(fin);
570			nextc = c;
571			if (fout)
572				aprintf(fout, "%c%c", SDELIM, c);
573			return -1;
574		}
575	}
576	p = buf;
577	do {
578		if (buf+BUFSIZ-2 <= p) {
579			rcsfaterror("diff output command line too long");
580		}
581		*p++ = c;
582		cachegeteof_(c, unexpected_EOF();)
583	} while (c != '\n');
584	uncache(fin);
585	if (delimiter)
586		++rcsline;
587	*p = '\0';
588	for (p = buf+1;  (c = *p++) == ' ';  )
589		continue;
590	line1 = 0;
591	while (isdigit(c)) {
592		if (
593			LONG_MAX/10 < line1  ||
594			(t = line1 * 10,   (line1 = t + (c - '0'))  <  t)
595		)
596			diffLineNumberTooLarge(buf);
597		c = *p++;
598	}
599	while (c == ' ')
600		c = *p++;
601	nlines = 0;
602	while (isdigit(c)) {
603		if (
604			LONG_MAX/10 < nlines  ||
605			(t = nlines * 10,   (nlines = t + (c - '0'))  <  t)
606		)
607			diffLineNumberTooLarge(buf);
608		c = *p++;
609	}
610	if (c == '\r')
611		c = *p++;
612	if (c || !nlines) {
613		badDiffOutput(buf);
614	}
615	if (line1+nlines < line1)
616		diffLineNumberTooLarge(buf);
617	switch (buf[0]) {
618	    case 'a':
619		if (line1 < dc->adprev) {
620		    rcsfaterror("backward insertion in diff output: %s", buf);
621		}
622		dc->adprev = line1 + 1;
623		break;
624	    case 'd':
625		if (line1 < dc->adprev  ||  line1 < dc->dafter) {
626		    rcsfaterror("backward deletion in diff output: %s", buf);
627		}
628		dc->adprev = line1;
629		dc->dafter = line1 + nlines;
630		break;
631	    default:
632		badDiffOutput(buf);
633	}
634	if (fout) {
635		aprintf(fout, "%s\n", buf);
636	}
637	dc->line1 = line1;
638	dc->nlines = nlines;
639	return buf[0] == 'a';
640}
641
642
643
644#ifdef SYNTEST
645
646/* Input an RCS file and print its internal data structures.  */
647
648char const cmdid[] = "syntest";
649
650	int
651main(argc,argv)
652int argc; char * argv[];
653{
654
655        if (argc<2) {
656		aputs("No input file\n",stderr);
657		exitmain(EXIT_FAILURE);
658        }
659	if (!(finptr = Iopen(argv[1], FOPEN_R, (struct stat*)0))) {
660		faterror("can't open input file %s", argv[1]);
661        }
662        Lexinit();
663        getadmin();
664	fdlock = STDOUT_FILENO;
665	putadmin();
666
667        gettree();
668
669        getdesc(true);
670
671	nextlex();
672
673	if (!eoflex()) {
674		fatserror("expecting EOF");
675        }
676	exitmain(EXIT_SUCCESS);
677}
678
679void exiterr() { _exit(EXIT_FAILURE); }
680
681#endif
682