1/* Copyright (C) 1989, 2000 Aladdin Enterprises.  All rights reserved. */
2
3/*$Id$*/
4/* Convert ANSI C function definitions to K&R ("traditional C") syntax */
5
6/*
7ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
8WARRANTY.  No author or distributor accepts responsibility to anyone for the
9consequences of using it or for whether it serves any particular purpose or
10works at all, unless he says so in writing.  Refer to the GNU General Public
11License (the "GPL") for full details.
12
13Everyone is granted permission to copy, modify and redistribute ansi2knr,
14but only under the conditions described in the GPL.  A copy of this license
15is supposed to have been given to you along with ansi2knr so you can know
16your rights and responsibilities.  It should be in a file named COPYLEFT,
17or, if there is no file named COPYLEFT, a file named COPYING.  Among other
18things, the copyright notice and this notice must be preserved on all
19copies.
20
21We explicitly state here what we believe is already implied by the GPL: if
22the ansi2knr program is distributed as a separate set of sources and a
23separate executable file which are aggregated on a storage medium together
24with another program, this in itself does not bring the other program under
25the GPL, nor does the mere fact that such a program or the procedures for
26constructing it invoke the ansi2knr executable bring any other part of the
27program under the GPL.
28*/
29
30/*
31 * Usage:
32	ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
33 * --filename provides the file name for the #line directive in the output,
34 * overriding input_file (if present).
35 * If no input_file is supplied, input is read from stdin.
36 * If no output_file is supplied, output goes to stdout.
37 * There are no error messages.
38 *
39 * ansi2knr recognizes function definitions by seeing a non-keyword
40 * identifier at the left margin, followed by a left parenthesis, with a
41 * right parenthesis as the last character on the line, and with a left
42 * brace as the first token on the following line (ignoring possible
43 * intervening comments and/or preprocessor directives), except that a line
44 * consisting of only
45 *	identifier1(identifier2)
46 * will not be considered a function definition unless identifier2 is
47 * the word "void", and a line consisting of
48 *	identifier1(identifier2, <<arbitrary>>)
49 * will not be considered a function definition.
50 * ansi2knr will recognize a multi-line header provided that no intervening
51 * line ends with a left or right brace or a semicolon.  These algorithms
52 * ignore whitespace, comments, and preprocessor directives, except that
53 * the function name must be the first thing on the line.  The following
54 * constructs will confuse it:
55 *	- Any other construct that starts at the left margin and
56 *	    follows the above syntax (such as a macro or function call).
57 *	- Some macros that tinker with the syntax of function headers.
58 */
59
60/*
61 * The original and principal author of ansi2knr is L. Peter Deutsch
62 * <ghost@aladdin.com>.  Other authors are noted in the change history
63 * that follows (in reverse chronological order):
64
65	lpd 2000-04-12 backs out Eggert's changes because of bugs:
66	- concatlits didn't declare the type of its bufend argument;
67	- concatlits didn't recognize when it was inside a comment;
68	- scanstring could scan backward past the beginning of the string; when
69	- the check for \ + newline in scanstring was unnecessary.
70
71	2000-03-05  Paul Eggert  <eggert@twinsun.com>
72
73	Add support for concatenated string literals.
74	* ansi2knr.c (concatlits): New decl.
75	(main): Invoke concatlits to concatenate string literals.
76	(scanstring): Handle backslash-newline correctly.  Work with
77	character constants.  Fix bug when scanning backwards through
78	backslash-quote.  Check for unterminated strings.
79	(convert1): Parse character constants, too.
80	(appendline, concatlits): New functions.
81	* ansi2knr.1: Document this.
82
83	lpd 1999-08-17 added code to allow preprocessor directives
84		wherever comments are allowed
85	lpd 1999-04-12 added minor fixes from Pavel Roskin
86		<pavel_roskin@geocities.com> for clean compilation with
87		gcc -W -Wall
88	lpd 1999-03-22 added hack to recognize lines consisting of
89		identifier1(identifier2, xxx) as *not* being procedures
90	lpd 1999-02-03 made indentation of preprocessor commands consistent
91	lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
92		endless loop; quoted strings within an argument list
93		confused the parser
94	lpd 1999-01-24 added a check for write errors on the output,
95		suggested by Jim Meyering <meyering@ascend.com>
96	lpd 1998-11-09 added further hack to recognize identifier(void)
97		as being a procedure
98	lpd 1998-10-23 added hack to recognize lines consisting of
99		identifier1(identifier2) as *not* being procedures
100	lpd 1997-12-08 made input_file optional; only closes input and/or
101		output file if not stdin or stdout respectively; prints
102		usage message on stderr rather than stdout; adds
103		--filename switch (changes suggested by
104		<ceder@lysator.liu.se>)
105	lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
106		compilers that don't understand void, as suggested by
107		Tom Lane
108	lpd 1996-01-15 changed to require that the first non-comment token
109		on the line following a function header be a left brace,
110		to reduce sensitivity to macros, as suggested by Tom Lane
111		<tgl@sss.pgh.pa.us>
112	lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
113		undefined preprocessor symbols as 0; changed all #ifdefs
114		for configuration symbols to #ifs
115	lpd 1995-04-05 changed copyright notice to make it clear that
116		including ansi2knr in a program does not bring the entire
117		program under the GPL
118	lpd 1994-12-18 added conditionals for systems where ctype macros
119		don't handle 8-bit characters properly, suggested by
120		Francois Pinard <pinard@iro.umontreal.ca>;
121		removed --varargs switch (this is now the default)
122	lpd 1994-10-10 removed CONFIG_BROKETS conditional
123	lpd 1994-07-16 added some conditionals to help GNU `configure',
124		suggested by Francois Pinard <pinard@iro.umontreal.ca>;
125		properly erase prototype args in function parameters,
126		contributed by Jim Avera <jima@netcom.com>;
127		correct error in writeblanks (it shouldn't erase EOLs)
128	lpd 1989-xx-xx original version
129 */
130
131/* Most of the conditionals here are to make ansi2knr work with */
132/* or without the GNU configure machinery. */
133
134#if HAVE_CONFIG_H
135# include <config.h>
136#endif
137
138#include <stdio.h>
139#include <ctype.h>
140
141#if HAVE_CONFIG_H
142
143/*
144   For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
145   This will define HAVE_CONFIG_H and so, activate the following lines.
146 */
147
148# if STDC_HEADERS || HAVE_STRING_H
149#  include <string.h>
150# else
151#  include <strings.h>
152# endif
153
154#else /* not HAVE_CONFIG_H */
155
156/* Otherwise do it the hard way */
157
158# ifdef BSD
159#  include <strings.h>
160# else
161#  ifdef VMS
162    extern int strlen(), strncmp();
163#  else
164#   include <string.h>
165#  endif
166# endif
167
168#endif /* not HAVE_CONFIG_H */
169
170#if STDC_HEADERS
171# include <stdlib.h>
172#else
173/*
174   malloc and free should be declared in stdlib.h,
175   but if you've got a K&R compiler, they probably aren't.
176 */
177# ifdef MSDOS
178#  include <malloc.h>
179# else
180#  ifdef VMS
181     extern char *malloc();
182     extern void free();
183#  else
184     extern char *malloc();
185     extern int free();
186#  endif
187# endif
188
189#endif
190
191/* Define NULL (for *very* old compilers). */
192#ifndef NULL
193# define NULL (0)
194#endif
195
196/*
197 * The ctype macros don't always handle 8-bit characters correctly.
198 * Compensate for this here.
199 */
200#ifdef isascii
201# undef HAVE_ISASCII		/* just in case */
202# define HAVE_ISASCII 1
203#else
204#endif
205#if STDC_HEADERS || !HAVE_ISASCII
206# define is_ascii(c) 1
207#else
208# define is_ascii(c) isascii(c)
209#endif
210
211#define is_space(c) (is_ascii(c) && isspace(c))
212#define is_alpha(c) (is_ascii(c) && isalpha(c))
213#define is_alnum(c) (is_ascii(c) && isalnum(c))
214
215/* Scanning macros */
216#define isidchar(ch) (is_alnum(ch) || (ch) == '_')
217#define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
218
219/* Forward references */
220char *ppdirforward();
221char *ppdirbackward();
222char *skipspace();
223char *scanstring();
224int writeblanks();
225int test1();
226int convert1();
227
228/* The main program */
229int
230main(argc, argv)
231    int argc;
232    char *argv[];
233{	FILE *in = stdin;
234	FILE *out = stdout;
235	char *filename = 0;
236	char *program_name = argv[0];
237	char *output_name = 0;
238#define bufsize 5000			/* arbitrary size */
239	char *buf;
240	char *line;
241	char *more;
242	char *usage =
243	  "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
244	/*
245	 * In previous versions, ansi2knr recognized a --varargs switch.
246	 * If this switch was supplied, ansi2knr would attempt to convert
247	 * a ... argument to va_alist and va_dcl; if this switch was not
248	 * supplied, ansi2knr would simply drop any such arguments.
249	 * Now, ansi2knr always does this conversion, and we only
250	 * check for this switch for backward compatibility.
251	 */
252	int convert_varargs = 1;
253	int output_error;
254
255	while ( argc > 1 && argv[1][0] == '-' ) {
256	  if ( !strcmp(argv[1], "--varargs") ) {
257	    convert_varargs = 1;
258	    argc--;
259	    argv++;
260	    continue;
261	  }
262	  if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
263	    filename = argv[2];
264	    argc -= 2;
265	    argv += 2;
266	    continue;
267	  }
268	  fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
269		  argv[1]);
270	  fprintf(stderr, usage);
271	  exit(1);
272	}
273	switch ( argc )
274	   {
275	default:
276		fprintf(stderr, usage);
277		exit(0);
278	case 3:
279		output_name = argv[2];
280		out = fopen(output_name, "w");
281		if ( out == NULL ) {
282		  fprintf(stderr, "%s: Cannot open output file %s\n",
283			  program_name, output_name);
284		  exit(1);
285		}
286		/* falls through */
287	case 2:
288		in = fopen(argv[1], "r");
289		if ( in == NULL ) {
290		  fprintf(stderr, "%s: Cannot open input file %s\n",
291			  program_name, argv[1]);
292		  exit(1);
293		}
294		if ( filename == 0 )
295		  filename = argv[1];
296		/* falls through */
297	case 1:
298		break;
299	   }
300	if ( filename )
301	  fprintf(out, "#line 1 \"%s\"\n", filename);
302	buf = malloc(bufsize);
303	if ( buf == NULL )
304	   {
305		fprintf(stderr, "Unable to allocate read buffer!\n");
306		exit(1);
307	   }
308	line = buf;
309	while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
310	   {
311test:		line += strlen(line);
312		switch ( test1(buf) )
313		   {
314		case 2:			/* a function header */
315			convert1(buf, out, 1, convert_varargs);
316			break;
317		case 1:			/* a function */
318			/* Check for a { at the start of the next line. */
319			more = ++line;
320f:			if ( line >= buf + (bufsize - 1) ) /* overflow check */
321			  goto wl;
322			if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
323			  goto wl;
324			switch ( *skipspace(ppdirforward(more), 1) )
325			  {
326			  case '{':
327			    /* Definitely a function header. */
328			    convert1(buf, out, 0, convert_varargs);
329			    fputs(more, out);
330			    break;
331			  case 0:
332			    /* The next line was blank or a comment: */
333			    /* keep scanning for a non-comment. */
334			    line += strlen(line);
335			    goto f;
336			  default:
337			    /* buf isn't a function header, but */
338			    /* more might be. */
339			    fputs(buf, out);
340			    strcpy(buf, more);
341			    line = buf;
342			    goto test;
343			  }
344			break;
345		case -1:		/* maybe the start of a function */
346			if ( line != buf + (bufsize - 1) ) /* overflow check */
347			  continue;
348			/* falls through */
349		default:		/* not a function */
350wl:			fputs(buf, out);
351			break;
352		   }
353		line = buf;
354	   }
355	if ( line != buf )
356	  fputs(buf, out);
357	free(buf);
358	if ( output_name ) {
359	  output_error = ferror(out);
360	  output_error |= fclose(out);
361	} else {		/* out == stdout */
362	  fflush(out);
363	  output_error = ferror(out);
364	}
365	if ( output_error ) {
366	  fprintf(stderr, "%s: error writing to %s\n", program_name,
367		  (output_name ? output_name : "stdout"));
368	  exit(1);
369	}
370	if ( in != stdin )
371	  fclose(in);
372	return 0;
373}
374
375/*
376 * Skip forward or backward over one or more preprocessor directives.
377 */
378char *
379ppdirforward(p)
380    char *p;
381{
382    for (; *p == '#'; ++p) {
383	for (; *p != '\r' && *p != '\n'; ++p)
384	    if (*p == 0)
385		return p;
386	if (*p == '\r' && p[1] == '\n')
387	    ++p;
388    }
389    return p;
390}
391char *
392ppdirbackward(p, limit)
393    char *p;
394    char *limit;
395{
396    char *np = p;
397
398    for (;; p = --np) {
399	if (*np == '\n' && np[-1] == '\r')
400	    --np;
401	for (; np > limit && np[-1] != '\r' && np[-1] != '\n'; --np)
402	    if (np[-1] == 0)
403		return np;
404	if (*np != '#')
405	    return p;
406    }
407}
408
409/*
410 * Skip over whitespace, comments, and preprocessor directives,
411 * in either direction.
412 */
413char *
414skipspace(p, dir)
415    char *p;
416    int dir;			/* 1 for forward, -1 for backward */
417{
418    for ( ; ; ) {
419	while ( is_space(*p) )
420	    p += dir;
421	if ( !(*p == '/' && p[dir] == '*') )
422	    break;
423	p += dir;  p += dir;
424	while ( !(*p == '*' && p[dir] == '/') ) {
425	    if ( *p == 0 )
426		return p;	/* multi-line comment?? */
427	    p += dir;
428	}
429	p += dir;  p += dir;
430    }
431    return p;
432}
433
434/* Scan over a quoted string, in either direction. */
435char *
436scanstring(p, dir)
437    char *p;
438    int dir;
439{
440    for (p += dir; ; p += dir)
441	if (*p == '"' && p[-dir] != '\\')
442	    return p + dir;
443}
444
445/*
446 * Write blanks over part of a string.
447 * Don't overwrite end-of-line characters.
448 */
449int
450writeblanks(start, end)
451    char *start;
452    char *end;
453{	char *p;
454	for ( p = start; p < end; p++ )
455	  if ( *p != '\r' && *p != '\n' )
456	    *p = ' ';
457	return 0;
458}
459
460/*
461 * Test whether the string in buf is a function definition.
462 * The string may contain and/or end with a newline.
463 * Return as follows:
464 *	0 - definitely not a function definition;
465 *	1 - definitely a function definition;
466 *	2 - definitely a function prototype (NOT USED);
467 *	-1 - may be the beginning of a function definition,
468 *		append another line and look again.
469 * The reason we don't attempt to convert function prototypes is that
470 * Ghostscript's declaration-generating macros look too much like
471 * prototypes, and confuse the algorithms.
472 */
473int
474test1(buf)
475    char *buf;
476{	char *p = buf;
477	char *bend;
478	char *endfn;
479	int contin;
480
481	if ( !isidfirstchar(*p) )
482	  return 0;		/* no name at left margin */
483	bend = skipspace(ppdirbackward(buf + strlen(buf) - 1, buf), -1);
484	switch ( *bend )
485	   {
486	   case ';': contin = 0 /*2*/; break;
487	   case ')': contin = 1; break;
488	   case '{': return 0;		/* not a function */
489	   case '}': return 0;		/* not a function */
490	   default: contin = -1;
491	   }
492	while ( isidchar(*p) )
493	  p++;
494	endfn = p;
495	p = skipspace(p, 1);
496	if ( *p++ != '(' )
497	  return 0;		/* not a function */
498	p = skipspace(p, 1);
499	if ( *p == ')' )
500	  return 0;		/* no parameters */
501	/* Check that the apparent function name isn't a keyword. */
502	/* We only need to check for keywords that could be followed */
503	/* by a left parenthesis (which, unfortunately, is most of them). */
504	   {	static char *words[] =
505		   {	"asm", "auto", "case", "char", "const", "double",
506			"extern", "float", "for", "if", "int", "long",
507			"register", "return", "short", "signed", "sizeof",
508			"static", "switch", "typedef", "unsigned",
509			"void", "volatile", "while", 0
510		   };
511		char **key = words;
512		char *kp;
513		unsigned len = endfn - buf;
514
515		while ( (kp = *key) != 0 )
516		   {	if ( strlen(kp) == len && !strncmp(kp, buf, len) )
517			  return 0;	/* name is a keyword */
518			key++;
519		   }
520	   }
521	   {
522	       char *id = p;
523	       int len;
524	       /*
525		* Check for identifier1(identifier2) and not
526		* identifier1(void), or identifier1(identifier2, xxxx).
527		*/
528
529	       while ( isidchar(*p) )
530		   p++;
531	       len = p - id;
532	       p = skipspace(p, 1);
533	       if (*p == ',' ||
534		   (*p == ')' && (len != 4 || strncmp(id, "void", 4)))
535		   )
536		   return 0;	/* not a function */
537	   }
538	/*
539	 * If the last significant character was a ), we need to count
540	 * parentheses, because it might be part of a formal parameter
541	 * that is a procedure.
542	 */
543	if (contin > 0) {
544	    int level = 0;
545
546	    for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
547		level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
548	    if (level > 0)
549		contin = -1;
550	}
551	return contin;
552}
553
554/* Convert a recognized function definition or header to K&R syntax. */
555int
556convert1(buf, out, header, convert_varargs)
557    char *buf;
558    FILE *out;
559    int header;			/* Boolean */
560    int convert_varargs;	/* Boolean */
561{	char *endfn;
562	char *p;
563	/*
564	 * The breaks table contains pointers to the beginning and end
565	 * of each argument.
566	 */
567	char **breaks;
568	unsigned num_breaks = 2;	/* for testing */
569	char **btop;
570	char **bp;
571	char **ap;
572	char *vararg = 0;
573
574	/* Pre-ANSI implementations don't agree on whether strchr */
575	/* is called strchr or index, so we open-code it here. */
576	for ( endfn = buf; *(endfn++) != '('; )
577	  ;
578top:	p = endfn;
579	breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
580	if ( breaks == NULL )
581	   {	/* Couldn't allocate break table, give up */
582		fprintf(stderr, "Unable to allocate break table!\n");
583		fputs(buf, out);
584		return -1;
585	   }
586	btop = breaks + num_breaks * 2 - 2;
587	bp = breaks;
588	/* Parse the argument list */
589	do
590	   {	int level = 0;
591		char *lp = NULL;
592		char *rp = NULL;
593		char *end = NULL;
594
595		if ( bp >= btop )
596		   {	/* Filled up break table. */
597			/* Allocate a bigger one and start over. */
598			free((char *)breaks);
599			num_breaks <<= 1;
600			goto top;
601		   }
602		*bp++ = p;
603		/* Find the end of the argument */
604		for ( ; end == NULL; p++ )
605		   {	switch(*p)
606			   {
607			   case ',':
608				if ( !level ) end = p;
609				break;
610			   case '(':
611				if ( !level ) lp = p;
612				level++;
613				break;
614			   case ')':
615				if ( --level < 0 ) end = p;
616				else rp = p;
617				break;
618			   case '/':
619				if (p[1] == '*')
620				    p = skipspace(p, 1) - 1;
621				break;
622			   case '"':
623			       p = scanstring(p, 1) - 1;
624			       break;
625			   default:
626				;
627			   }
628		   }
629		/* Erase any embedded prototype parameters. */
630		if ( lp && rp )
631		  writeblanks(lp + 1, rp);
632		p--;			/* back up over terminator */
633		/* Find the name being declared. */
634		/* This is complicated because of procedure and */
635		/* array modifiers. */
636		for ( ; ; )
637		   {	p = skipspace(p - 1, -1);
638			switch ( *p )
639			   {
640			   case ']':	/* skip array dimension(s) */
641			   case ')':	/* skip procedure args OR name */
642			   {	int level = 1;
643				while ( level )
644				 switch ( *--p )
645				   {
646				   case ']': case ')':
647				       level++;
648				       break;
649				   case '[': case '(':
650				       level--;
651				       break;
652				   case '/':
653				       if (p > buf && p[-1] == '*')
654					   p = skipspace(p, -1) + 1;
655				       break;
656				   case '"':
657				       p = scanstring(p, -1) + 1;
658				       break;
659				   default: ;
660				   }
661			   }
662				if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
663				   {	/* We found the name being declared */
664					while ( !isidfirstchar(*p) )
665					  p = skipspace(p, 1) + 1;
666					goto found;
667				   }
668				break;
669			   default:
670				goto found;
671			   }
672		   }
673found:		if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
674		  {	if ( convert_varargs )
675			  {	*bp++ = "va_alist";
676				vararg = p-2;
677			  }
678			else
679			  {	p++;
680				if ( bp == breaks + 1 )	/* sole argument */
681				  writeblanks(breaks[0], p);
682				else
683				  writeblanks(bp[-1] - 1, p);
684				bp--;
685			  }
686		   }
687		else
688		   {	while ( isidchar(*p) ) p--;
689			*bp++ = p+1;
690		   }
691		p = end;
692	   }
693	while ( *p++ == ',' );
694	*bp = p;
695	/* Make a special check for 'void' arglist */
696	if ( bp == breaks+2 )
697	   {	p = skipspace(breaks[0], 1);
698		if ( !strncmp(p, "void", 4) )
699		   {	p = skipspace(p+4, 1);
700			if ( p == breaks[2] - 1 )
701			   {	bp = breaks;	/* yup, pretend arglist is empty */
702				writeblanks(breaks[0], p + 1);
703			   }
704		   }
705	   }
706	/* Put out the function name and left parenthesis. */
707	p = buf;
708	while ( p != endfn ) putc(*p, out), p++;
709	/* Put out the declaration. */
710	if ( header )
711	  {	fputs(");", out);
712		for ( p = breaks[0]; *p; p++ )
713		  if ( *p == '\r' || *p == '\n' )
714		    putc(*p, out);
715	  }
716	else
717	  {	for ( ap = breaks+1; ap < bp; ap += 2 )
718		  {	p = *ap;
719			while ( isidchar(*p) )
720			  putc(*p, out), p++;
721			if ( ap < bp - 1 )
722			  fputs(", ", out);
723		  }
724		fputs(")  ", out);
725		/* Put out the argument declarations */
726		for ( ap = breaks+2; ap <= bp; ap += 2 )
727		  (*ap)[-1] = ';';
728		if ( vararg != 0 )
729		  {	*vararg = 0;
730			fputs(breaks[0], out);		/* any prior args */
731			fputs("va_dcl", out);		/* the final arg */
732			fputs(bp[0], out);
733		  }
734		else
735		  fputs(breaks[0], out);
736	  }
737	free((char *)breaks);
738	return 0;
739}
740