1/*-
2 * Copyright (c) 2015 Baptiste Daroussin <bapt@FreeBSD.org>
3 * Copyright (c) 2015 Xin LI <delphij@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include <sys/types.h>
32#include <sys/sbuf.h>
33
34#include <ctype.h>
35#include <err.h>
36#include <stdbool.h>
37#include <stdio.h>
38#include <stdlib.h>
39#include <unistd.h>
40#include <xlocale.h>
41
42typedef enum {
43	/* state	condition to transit to next state */
44	INIT,		/* '$' */
45	DELIM_SEEN,	/* letter */
46	KEYWORD,	/* punctuation mark */
47	PUNC_SEEN,	/* ':' -> _SVN; space -> TEXT */
48	PUNC_SEEN_SVN,	/* space */
49	TEXT
50} analyzer_states;
51
52static int
53scan(FILE *fp, const char *name, bool quiet)
54{
55	int c;
56	bool hasid = false;
57	bool subversion = false;
58	analyzer_states state = INIT;
59	struct sbuf *id = sbuf_new_auto();
60	locale_t l;
61
62	l = newlocale(LC_ALL_MASK, "C", NULL);
63
64	if (name != NULL)
65		printf("%s:\n", name);
66
67	while ((c = fgetc(fp)) != EOF) {
68		switch (state) {
69		case INIT:
70			if (c == '$') {
71				/* Transit to DELIM_SEEN if we see $ */
72				state = DELIM_SEEN;
73			} else {
74				/* Otherwise, stay in INIT state */
75				continue;
76			}
77			break;
78		case DELIM_SEEN:
79			if (isalpha_l(c, l)) {
80				/* Transit to KEYWORD if we see letter */
81				sbuf_clear(id);
82				sbuf_putc(id, '$');
83				sbuf_putc(id, c);
84				state = KEYWORD;
85
86				continue;
87			} else if (c == '$') {
88				/* Or, stay in DELIM_SEEN if more $ */
89				continue;
90			} else {
91				/* Otherwise, transit back to INIT */
92				state = INIT;
93			}
94			break;
95		case KEYWORD:
96			sbuf_putc(id, c);
97
98			if (isalpha_l(c, l)) {
99				/*
100				 * Stay in KEYWORD if additional letter is seen
101				 */
102				continue;
103			} else if (c == ':') {
104				/*
105				 * See ':' for the first time, transit to
106				 * PUNC_SEEN.
107				 */
108				state = PUNC_SEEN;
109				subversion = false;
110			} else if (c == '$') {
111				/*
112				 * Incomplete ident.  Go back to DELIM_SEEN
113				 * state because we see a '$' which could be
114				 * the beginning of a keyword.
115				 */
116				state = DELIM_SEEN;
117			} else {
118				/*
119				 * Go back to INIT state otherwise.
120				 */
121				state = INIT;
122			}
123			break;
124		case PUNC_SEEN:
125		case PUNC_SEEN_SVN:
126			sbuf_putc(id, c);
127
128			switch (c) {
129			case ':':
130				/*
131				 * If we see '::' (seen : in PUNC_SEEN),
132				 * activate subversion treatment and transit
133				 * to PUNC_SEEN_SVN state.
134				 *
135				 * If more than two :'s were seen, the ident
136				 * is invalid and we would therefore go back
137				 * to INIT state.
138				 */
139				if (state == PUNC_SEEN) {
140					state = PUNC_SEEN_SVN;
141					subversion = true;
142				} else {
143					state = INIT;
144				}
145				break;
146			case ' ':
147				/*
148				 * A space after ':' or '::' indicates we are at the
149				 * last component of potential ident.
150				 */
151				state = TEXT;
152				break;
153			default:
154				/* All other characters are invalid */
155				state = INIT;
156				break;
157			}
158			break;
159		case TEXT:
160			sbuf_putc(id, c);
161
162			if (iscntrl_l(c, l)) {
163				/* Control characters are not allowed in this state */
164				state = INIT;
165			} else if (c == '$') {
166				sbuf_finish(id);
167				/*
168				 * valid ident should end with a space.
169				 *
170				 * subversion extension uses '#' to indicate that
171				 * the keyword expansion have exceeded the fixed
172				 * width, so it is also permitted if we are in
173				 * subversion mode.  No length check is enforced
174				 * because GNU RCS ident(1) does not do it either.
175				 */
176				c = sbuf_data(id)[sbuf_len(id) - 2];
177				if (c == ' ' || (subversion && c == '#')) {
178					printf("     %s\n", sbuf_data(id));
179					hasid = true;
180				}
181				state = INIT;
182			}
183			/* Other characters: stay in the state */
184			break;
185		}
186	}
187	sbuf_delete(id);
188	freelocale(l);
189
190	if (!hasid) {
191		if (!quiet)
192			fprintf(stderr, "%s warning: no id keywords in %s\n",
193			    getprogname(), name ? name : "standard input");
194
195		return (EXIT_FAILURE);
196	}
197
198	return (EXIT_SUCCESS);
199}
200
201int
202main(int argc, char **argv)
203{
204	bool quiet = false;
205	int ch, i;
206	int ret = EXIT_SUCCESS;
207	FILE *fp;
208
209	while ((ch = getopt(argc, argv, "qV")) != -1) {
210		switch (ch) {
211		case 'q':
212			quiet = true;
213			break;
214		case 'V':
215			/* Do nothing, compat with GNU rcs's ident */
216			return (EXIT_SUCCESS);
217		default:
218			errx(EXIT_FAILURE, "usage: %s [-q] [-V] [file...]",
219			    getprogname());
220		}
221	}
222
223	argc -= optind;
224	argv += optind;
225
226	if (argc == 0)
227		return (scan(stdin, NULL, quiet));
228
229	for (i = 0; i < argc; i++) {
230		fp = fopen(argv[i], "r");
231		if (fp == NULL) {
232			warn("%s", argv[i]);
233			ret = EXIT_FAILURE;
234			continue;
235		}
236		if (scan(fp, argv[i], quiet) != EXIT_SUCCESS)
237			ret = EXIT_FAILURE;
238		fclose(fp);
239	}
240
241	return (ret);
242}
243