1285890Sbapt/*-
2285890Sbapt * Copyright (c) 2015 Baptiste Daroussin <bapt@FreeBSD.org>
3285923Sdelphij * Copyright (c) 2015 Xin LI <delphij@FreeBSD.org>
4285890Sbapt * All rights reserved.
5285890Sbapt *
6285890Sbapt * Redistribution and use in source and binary forms, with or without
7285890Sbapt * modification, are permitted provided that the following conditions
8285890Sbapt * are met:
9285890Sbapt * 1. Redistributions of source code must retain the above copyright
10285890Sbapt *    notice, this list of conditions and the following disclaimer
11285890Sbapt *    in this position and unchanged.
12285890Sbapt * 2. Redistributions in binary form must reproduce the above copyright
13285890Sbapt *    notice, this list of conditions and the following disclaimer in the
14285890Sbapt *    documentation and/or other materials provided with the distribution.
15285890Sbapt *
16285890Sbapt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17285890Sbapt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18285890Sbapt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19285890Sbapt * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20285890Sbapt * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21285890Sbapt * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22285890Sbapt * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23285890Sbapt * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24285890Sbapt * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25285890Sbapt * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26285890Sbapt */
27285890Sbapt
28285890Sbapt#include <sys/cdefs.h>
29285890Sbapt__FBSDID("$FreeBSD$");
30285890Sbapt
31285890Sbapt#include <sys/types.h>
32285890Sbapt#include <sys/sbuf.h>
33285890Sbapt
34285890Sbapt#include <ctype.h>
35285890Sbapt#include <err.h>
36285890Sbapt#include <stdbool.h>
37285890Sbapt#include <stdio.h>
38285890Sbapt#include <stdlib.h>
39285890Sbapt#include <unistd.h>
40285890Sbapt#include <xlocale.h>
41285890Sbapt
42285923Sdelphijtypedef enum {
43285923Sdelphij	/* state	condition to transit to next state */
44285923Sdelphij	INIT,		/* '$' */
45285923Sdelphij	DELIM_SEEN,	/* letter */
46285923Sdelphij	KEYWORD,	/* punctuation mark */
47285923Sdelphij	PUNC_SEEN,	/* ':' -> _SVN; space -> TEXT */
48285923Sdelphij	PUNC_SEEN_SVN,	/* space */
49285923Sdelphij	TEXT
50285923Sdelphij} analyzer_states;
51285890Sbapt
52285890Sbaptstatic int
53285890Sbaptscan(FILE *fp, const char *name, bool quiet)
54285890Sbapt{
55285890Sbapt	int c;
56285890Sbapt	bool hasid = false;
57285923Sdelphij	bool subversion = false;
58285923Sdelphij	analyzer_states state = INIT;
59285890Sbapt	struct sbuf *id = sbuf_new_auto();
60285890Sbapt	locale_t l;
61285890Sbapt
62285890Sbapt	l = newlocale(LC_ALL_MASK, "C", NULL);
63285890Sbapt
64285890Sbapt	if (name != NULL)
65285890Sbapt		printf("%s:\n", name);
66285890Sbapt
67285890Sbapt	while ((c = fgetc(fp)) != EOF) {
68285923Sdelphij		switch (state) {
69285923Sdelphij		case INIT:
70285923Sdelphij			if (c == '$') {
71285923Sdelphij				/* Transit to DELIM_SEEN if we see $ */
72285923Sdelphij				state = DELIM_SEEN;
73285923Sdelphij			} else {
74285923Sdelphij				/* Otherwise, stay in INIT state */
75285923Sdelphij				continue;
76285923Sdelphij			}
77285923Sdelphij			break;
78285923Sdelphij		case DELIM_SEEN:
79285923Sdelphij			if (isalpha_l(c, l)) {
80285923Sdelphij				/* Transit to KEYWORD if we see letter */
81285923Sdelphij				sbuf_clear(id);
82285923Sdelphij				sbuf_putc(id, '$');
83285923Sdelphij				sbuf_putc(id, c);
84285923Sdelphij				state = KEYWORD;
85285923Sdelphij
86285923Sdelphij				continue;
87285923Sdelphij			} else if (c == '$') {
88285923Sdelphij				/* Or, stay in DELIM_SEEN if more $ */
89285923Sdelphij				continue;
90285923Sdelphij			} else {
91285923Sdelphij				/* Otherwise, transit back to INIT */
92285923Sdelphij				state = INIT;
93285923Sdelphij			}
94285923Sdelphij			break;
95285923Sdelphij		case KEYWORD:
96285923Sdelphij			sbuf_putc(id, c);
97285923Sdelphij
98285923Sdelphij			if (isalpha_l(c, l)) {
99285923Sdelphij				/*
100285923Sdelphij				 * Stay in KEYWORD if additional letter is seen
101285923Sdelphij				 */
102285923Sdelphij				continue;
103285923Sdelphij			} else if (c == ':') {
104285923Sdelphij				/*
105285923Sdelphij				 * See ':' for the first time, transit to
106285923Sdelphij				 * PUNC_SEEN.
107285923Sdelphij				 */
108285923Sdelphij				state = PUNC_SEEN;
109285923Sdelphij				subversion = false;
110285923Sdelphij			} else if (c == '$') {
111285923Sdelphij				/*
112285923Sdelphij				 * Incomplete ident.  Go back to DELIM_SEEN
113285923Sdelphij				 * state because we see a '$' which could be
114285923Sdelphij				 * the beginning of a keyword.
115285923Sdelphij				 */
116285923Sdelphij				state = DELIM_SEEN;
117285923Sdelphij			} else {
118285923Sdelphij				/*
119285923Sdelphij				 * Go back to INIT state otherwise.
120285923Sdelphij				 */
121285923Sdelphij				state = INIT;
122285923Sdelphij			}
123285923Sdelphij			break;
124285923Sdelphij		case PUNC_SEEN:
125285923Sdelphij		case PUNC_SEEN_SVN:
126285923Sdelphij			sbuf_putc(id, c);
127285923Sdelphij
128285923Sdelphij			switch (c) {
129285923Sdelphij			case ':':
130285923Sdelphij				/*
131285923Sdelphij				 * If we see '::' (seen : in PUNC_SEEN),
132285923Sdelphij				 * activate subversion treatment and transit
133285923Sdelphij				 * to PUNC_SEEN_SVN state.
134285923Sdelphij				 *
135285923Sdelphij				 * If more than two :'s were seen, the ident
136285923Sdelphij				 * is invalid and we would therefore go back
137285923Sdelphij				 * to INIT state.
138285923Sdelphij				 */
139285923Sdelphij				if (state == PUNC_SEEN) {
140285923Sdelphij					state = PUNC_SEEN_SVN;
141285923Sdelphij					subversion = true;
142285923Sdelphij				} else {
143285923Sdelphij					state = INIT;
144285923Sdelphij				}
145285923Sdelphij				break;
146285923Sdelphij			case ' ':
147285923Sdelphij				/*
148285923Sdelphij				 * A space after ':' or '::' indicates we are at the
149285923Sdelphij				 * last component of potential ident.
150285923Sdelphij				 */
151285923Sdelphij				state = TEXT;
152285923Sdelphij				break;
153285923Sdelphij			default:
154285923Sdelphij				/* All other characters are invalid */
155285923Sdelphij				state = INIT;
156285923Sdelphij				break;
157285923Sdelphij			}
158285923Sdelphij			break;
159285923Sdelphij		case TEXT:
160285923Sdelphij			sbuf_putc(id, c);
161285923Sdelphij
162285923Sdelphij			if (iscntrl_l(c, l)) {
163285923Sdelphij				/* Control characters are not allowed in this state */
164285923Sdelphij				state = INIT;
165285923Sdelphij			} else if (c == '$') {
166285923Sdelphij				sbuf_finish(id);
167285923Sdelphij				/*
168285923Sdelphij				 * valid ident should end with a space.
169285923Sdelphij				 *
170285923Sdelphij				 * subversion extension uses '#' to indicate that
171285923Sdelphij				 * the keyword expansion have exceeded the fixed
172285923Sdelphij				 * width, so it is also permitted if we are in
173285923Sdelphij				 * subversion mode.  No length check is enforced
174285923Sdelphij				 * because GNU RCS ident(1) does not do it either.
175285923Sdelphij				 */
176285923Sdelphij				c = sbuf_data(id)[sbuf_len(id) - 2];
177285923Sdelphij				if (c == ' ' || (subversion && c == '#')) {
178285923Sdelphij					printf("     %s\n", sbuf_data(id));
179285923Sdelphij					hasid = true;
180285923Sdelphij				}
181285923Sdelphij				state = INIT;
182285923Sdelphij			}
183285923Sdelphij			/* Other characters: stay in the state */
184285923Sdelphij			break;
185285890Sbapt		}
186285890Sbapt	}
187285890Sbapt	sbuf_delete(id);
188285890Sbapt	freelocale(l);
189285890Sbapt
190285890Sbapt	if (!hasid) {
191285890Sbapt		if (!quiet)
192285890Sbapt			fprintf(stderr, "%s warning: no id keywords in %s\n",
193285890Sbapt			    getprogname(), name ? name : "standard input");
194285890Sbapt
195285890Sbapt		return (EXIT_FAILURE);
196285890Sbapt	}
197285890Sbapt
198285890Sbapt	return (EXIT_SUCCESS);
199285890Sbapt}
200285890Sbapt
201285890Sbaptint
202285890Sbaptmain(int argc, char **argv)
203285890Sbapt{
204285890Sbapt	bool quiet = false;
205285890Sbapt	int ch, i;
206285890Sbapt	int ret = EXIT_SUCCESS;
207285890Sbapt	FILE *fp;
208285890Sbapt
209285890Sbapt	while ((ch = getopt(argc, argv, "qV")) != -1) {
210285890Sbapt		switch (ch) {
211285890Sbapt		case 'q':
212285890Sbapt			quiet = true;
213285890Sbapt			break;
214285890Sbapt		case 'V':
215285890Sbapt			/* Do nothing, compat with GNU rcs's ident */
216285890Sbapt			return (EXIT_SUCCESS);
217285890Sbapt		default:
218285890Sbapt			errx(EXIT_FAILURE, "usage: %s [-q] [-V] [file...]",
219285890Sbapt			    getprogname());
220285890Sbapt		}
221285890Sbapt	}
222285890Sbapt
223285890Sbapt	argc -= optind;
224285890Sbapt	argv += optind;
225285890Sbapt
226285890Sbapt	if (argc == 0)
227285890Sbapt		return (scan(stdin, NULL, quiet));
228285890Sbapt
229285890Sbapt	for (i = 0; i < argc; i++) {
230285890Sbapt		fp = fopen(argv[i], "r");
231285890Sbapt		if (fp == NULL) {
232285890Sbapt			warn("%s", argv[i]);
233285890Sbapt			ret = EXIT_FAILURE;
234285890Sbapt			continue;
235285890Sbapt		}
236285890Sbapt		if (scan(fp, argv[i], quiet) != EXIT_SUCCESS)
237285890Sbapt			ret = EXIT_FAILURE;
238285890Sbapt		fclose(fp);
239285890Sbapt	}
240285890Sbapt
241285890Sbapt	return (ret);
242285890Sbapt}
243