1285890Sbapt/*- 2285890Sbapt * Copyright (c) 2015 Baptiste Daroussin <bapt@FreeBSD.org> 3285923Sdelphij * Copyright (c) 2015 Xin LI <delphij@FreeBSD.org> 4285890Sbapt * All rights reserved. 5285890Sbapt * 6285890Sbapt * Redistribution and use in source and binary forms, with or without 7285890Sbapt * modification, are permitted provided that the following conditions 8285890Sbapt * are met: 9285890Sbapt * 1. Redistributions of source code must retain the above copyright 10285890Sbapt * notice, this list of conditions and the following disclaimer 11285890Sbapt * in this position and unchanged. 12285890Sbapt * 2. Redistributions in binary form must reproduce the above copyright 13285890Sbapt * notice, this list of conditions and the following disclaimer in the 14285890Sbapt * documentation and/or other materials provided with the distribution. 15285890Sbapt * 16285890Sbapt * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17285890Sbapt * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18285890Sbapt * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19285890Sbapt * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20285890Sbapt * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21285890Sbapt * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22285890Sbapt * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23285890Sbapt * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24285890Sbapt * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25285890Sbapt * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26285890Sbapt */ 27285890Sbapt 28285890Sbapt#include <sys/cdefs.h> 29285890Sbapt__FBSDID("$FreeBSD$"); 30285890Sbapt 31285890Sbapt#include <sys/types.h> 32285890Sbapt#include <sys/sbuf.h> 33285890Sbapt 34285890Sbapt#include <ctype.h> 35285890Sbapt#include <err.h> 36285890Sbapt#include <stdbool.h> 37285890Sbapt#include <stdio.h> 38285890Sbapt#include <stdlib.h> 39285890Sbapt#include <unistd.h> 40285890Sbapt#include <xlocale.h> 41285890Sbapt 42285923Sdelphijtypedef enum { 43285923Sdelphij /* state condition to transit to next state */ 44285923Sdelphij INIT, /* '$' */ 45285923Sdelphij DELIM_SEEN, /* letter */ 46285923Sdelphij KEYWORD, /* punctuation mark */ 47285923Sdelphij PUNC_SEEN, /* ':' -> _SVN; space -> TEXT */ 48285923Sdelphij PUNC_SEEN_SVN, /* space */ 49285923Sdelphij TEXT 50285923Sdelphij} analyzer_states; 51285890Sbapt 52285890Sbaptstatic int 53285890Sbaptscan(FILE *fp, const char *name, bool quiet) 54285890Sbapt{ 55285890Sbapt int c; 56285890Sbapt bool hasid = false; 57285923Sdelphij bool subversion = false; 58285923Sdelphij analyzer_states state = INIT; 59285890Sbapt struct sbuf *id = sbuf_new_auto(); 60285890Sbapt locale_t l; 61285890Sbapt 62285890Sbapt l = newlocale(LC_ALL_MASK, "C", NULL); 63285890Sbapt 64285890Sbapt if (name != NULL) 65285890Sbapt printf("%s:\n", name); 66285890Sbapt 67285890Sbapt while ((c = fgetc(fp)) != EOF) { 68285923Sdelphij switch (state) { 69285923Sdelphij case INIT: 70285923Sdelphij if (c == '$') { 71285923Sdelphij /* Transit to DELIM_SEEN if we see $ */ 72285923Sdelphij state = DELIM_SEEN; 73285923Sdelphij } else { 74285923Sdelphij /* Otherwise, stay in INIT state */ 75285923Sdelphij continue; 76285923Sdelphij } 77285923Sdelphij break; 78285923Sdelphij case DELIM_SEEN: 79285923Sdelphij if (isalpha_l(c, l)) { 80285923Sdelphij /* Transit to KEYWORD if we see letter */ 81285923Sdelphij sbuf_clear(id); 82285923Sdelphij sbuf_putc(id, '$'); 83285923Sdelphij sbuf_putc(id, c); 84285923Sdelphij state = KEYWORD; 85285923Sdelphij 86285923Sdelphij continue; 87285923Sdelphij } else if (c == '$') { 88285923Sdelphij /* Or, stay in DELIM_SEEN if more $ */ 89285923Sdelphij continue; 90285923Sdelphij } else { 91285923Sdelphij /* Otherwise, transit back to INIT */ 92285923Sdelphij state = INIT; 93285923Sdelphij } 94285923Sdelphij break; 95285923Sdelphij case KEYWORD: 96285923Sdelphij sbuf_putc(id, c); 97285923Sdelphij 98285923Sdelphij if (isalpha_l(c, l)) { 99285923Sdelphij /* 100285923Sdelphij * Stay in KEYWORD if additional letter is seen 101285923Sdelphij */ 102285923Sdelphij continue; 103285923Sdelphij } else if (c == ':') { 104285923Sdelphij /* 105285923Sdelphij * See ':' for the first time, transit to 106285923Sdelphij * PUNC_SEEN. 107285923Sdelphij */ 108285923Sdelphij state = PUNC_SEEN; 109285923Sdelphij subversion = false; 110285923Sdelphij } else if (c == '$') { 111285923Sdelphij /* 112285923Sdelphij * Incomplete ident. Go back to DELIM_SEEN 113285923Sdelphij * state because we see a '$' which could be 114285923Sdelphij * the beginning of a keyword. 115285923Sdelphij */ 116285923Sdelphij state = DELIM_SEEN; 117285923Sdelphij } else { 118285923Sdelphij /* 119285923Sdelphij * Go back to INIT state otherwise. 120285923Sdelphij */ 121285923Sdelphij state = INIT; 122285923Sdelphij } 123285923Sdelphij break; 124285923Sdelphij case PUNC_SEEN: 125285923Sdelphij case PUNC_SEEN_SVN: 126285923Sdelphij sbuf_putc(id, c); 127285923Sdelphij 128285923Sdelphij switch (c) { 129285923Sdelphij case ':': 130285923Sdelphij /* 131285923Sdelphij * If we see '::' (seen : in PUNC_SEEN), 132285923Sdelphij * activate subversion treatment and transit 133285923Sdelphij * to PUNC_SEEN_SVN state. 134285923Sdelphij * 135285923Sdelphij * If more than two :'s were seen, the ident 136285923Sdelphij * is invalid and we would therefore go back 137285923Sdelphij * to INIT state. 138285923Sdelphij */ 139285923Sdelphij if (state == PUNC_SEEN) { 140285923Sdelphij state = PUNC_SEEN_SVN; 141285923Sdelphij subversion = true; 142285923Sdelphij } else { 143285923Sdelphij state = INIT; 144285923Sdelphij } 145285923Sdelphij break; 146285923Sdelphij case ' ': 147285923Sdelphij /* 148285923Sdelphij * A space after ':' or '::' indicates we are at the 149285923Sdelphij * last component of potential ident. 150285923Sdelphij */ 151285923Sdelphij state = TEXT; 152285923Sdelphij break; 153285923Sdelphij default: 154285923Sdelphij /* All other characters are invalid */ 155285923Sdelphij state = INIT; 156285923Sdelphij break; 157285923Sdelphij } 158285923Sdelphij break; 159285923Sdelphij case TEXT: 160285923Sdelphij sbuf_putc(id, c); 161285923Sdelphij 162285923Sdelphij if (iscntrl_l(c, l)) { 163285923Sdelphij /* Control characters are not allowed in this state */ 164285923Sdelphij state = INIT; 165285923Sdelphij } else if (c == '$') { 166285923Sdelphij sbuf_finish(id); 167285923Sdelphij /* 168285923Sdelphij * valid ident should end with a space. 169285923Sdelphij * 170285923Sdelphij * subversion extension uses '#' to indicate that 171285923Sdelphij * the keyword expansion have exceeded the fixed 172285923Sdelphij * width, so it is also permitted if we are in 173285923Sdelphij * subversion mode. No length check is enforced 174285923Sdelphij * because GNU RCS ident(1) does not do it either. 175285923Sdelphij */ 176285923Sdelphij c = sbuf_data(id)[sbuf_len(id) - 2]; 177285923Sdelphij if (c == ' ' || (subversion && c == '#')) { 178285923Sdelphij printf(" %s\n", sbuf_data(id)); 179285923Sdelphij hasid = true; 180285923Sdelphij } 181285923Sdelphij state = INIT; 182285923Sdelphij } 183285923Sdelphij /* Other characters: stay in the state */ 184285923Sdelphij break; 185285890Sbapt } 186285890Sbapt } 187285890Sbapt sbuf_delete(id); 188285890Sbapt freelocale(l); 189285890Sbapt 190285890Sbapt if (!hasid) { 191285890Sbapt if (!quiet) 192285890Sbapt fprintf(stderr, "%s warning: no id keywords in %s\n", 193285890Sbapt getprogname(), name ? name : "standard input"); 194285890Sbapt 195285890Sbapt return (EXIT_FAILURE); 196285890Sbapt } 197285890Sbapt 198285890Sbapt return (EXIT_SUCCESS); 199285890Sbapt} 200285890Sbapt 201285890Sbaptint 202285890Sbaptmain(int argc, char **argv) 203285890Sbapt{ 204285890Sbapt bool quiet = false; 205285890Sbapt int ch, i; 206285890Sbapt int ret = EXIT_SUCCESS; 207285890Sbapt FILE *fp; 208285890Sbapt 209285890Sbapt while ((ch = getopt(argc, argv, "qV")) != -1) { 210285890Sbapt switch (ch) { 211285890Sbapt case 'q': 212285890Sbapt quiet = true; 213285890Sbapt break; 214285890Sbapt case 'V': 215285890Sbapt /* Do nothing, compat with GNU rcs's ident */ 216285890Sbapt return (EXIT_SUCCESS); 217285890Sbapt default: 218285890Sbapt errx(EXIT_FAILURE, "usage: %s [-q] [-V] [file...]", 219285890Sbapt getprogname()); 220285890Sbapt } 221285890Sbapt } 222285890Sbapt 223285890Sbapt argc -= optind; 224285890Sbapt argv += optind; 225285890Sbapt 226285890Sbapt if (argc == 0) 227285890Sbapt return (scan(stdin, NULL, quiet)); 228285890Sbapt 229285890Sbapt for (i = 0; i < argc; i++) { 230285890Sbapt fp = fopen(argv[i], "r"); 231285890Sbapt if (fp == NULL) { 232285890Sbapt warn("%s", argv[i]); 233285890Sbapt ret = EXIT_FAILURE; 234285890Sbapt continue; 235285890Sbapt } 236285890Sbapt if (scan(fp, argv[i], quiet) != EXIT_SUCCESS) 237285890Sbapt ret = EXIT_FAILURE; 238285890Sbapt fclose(fp); 239285890Sbapt } 240285890Sbapt 241285890Sbapt return (ret); 242285890Sbapt} 243