1/*
2 * man2tcl.c --
3 *
4 *	This file contains a program that turns a man page of the form used
5 *	for Tcl and Tk into a Tcl script that invokes a Tcl command for each
6 *	construct in the man page. The script can then be eval'ed to translate
7 *	the manual entry into some other format such as MIF or HTML.
8 *
9 * Usage:
10 *
11 *	man2tcl ?fileName?
12 *
13 * Copyright (c) 1995 Sun Microsystems, Inc.
14 *
15 * See the file "license.terms" for information on usage and redistribution of
16 * this file, and for a DISCLAIMER OF ALL WARRANTIES.
17 *
18 * RCS: @(#) $Id: man2tcl.c,v 1.13.2.3 2008/10/24 00:46:27 patthoyts Exp $
19 */
20
21static char sccsid[] = "@(#) man2tcl.c 1.3 95/08/12 17:34:08";
22
23#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <ctype.h>
27#include <errno.h>
28
29/*
30 * Imported things that aren't defined in header files:
31 */
32
33/*
34 * Some <errno.h> define errno to be something complex and thread-aware; in
35 * that case we definitely do not want to declare errno ourselves!
36 */
37
38#ifndef errno
39extern int errno;
40#endif
41
42/*
43 * Current line number, used for error messages.
44 */
45
46static int lineNumber;
47
48/*
49 * The variable below is set to 1 if an error occurs anywhere while reading in
50 * the file.
51 */
52
53static int status;
54
55/*
56 * The variable below is set to 1 if output should be generated. If it's 0, it
57 * means we're doing a pre-pass to make sure that the file can be properly
58 * parsed.
59 */
60
61static int writeOutput;
62
63#define PRINT(args)	if (writeOutput) { printf args; }
64#define PRINTC(chr)	if (writeOutput) { putc((chr), stdout); }
65
66/*
67 * Prototypes for functions defined in this file:
68 */
69
70static void		DoMacro(char *line);
71static void		DoText(char *line);
72static void		QuoteText(char *string, int count);
73
74/*
75 *----------------------------------------------------------------------
76 *
77 * main --
78 *
79 *	This function is the main program, which does all of the work of the
80 *	program.
81 *
82 * Results:
83 *	None: exits with a 0 return status to indicate success, or 1 to
84 *	indicate that there were problems in the translation.
85 *
86 * Side effects:
87 *	A Tcl script is output to standard output. Error messages may be
88 *	output on standard error.
89 *
90 *----------------------------------------------------------------------
91 */
92
93int
94main(
95    int argc,			/* Number of command-line arguments. */
96    char **argv)		/* Values of command-line arguments. */
97{
98    FILE *f;
99#define MAX_LINE_SIZE 4000
100    char line[MAX_LINE_SIZE];
101    char *p;
102
103    /*
104     * Find the file to read, and open it if it isn't stdin.
105     */
106
107    if (argc == 1) {
108	f = stdin;
109    } else if (argc == 2) {
110	f = fopen(argv[1], "r");
111	if (f == NULL) {
112	    fprintf(stderr, "Couldn't read \"%s\": %s\n", argv[1],
113		    strerror(errno));
114	    exit(1);
115	}
116    } else {
117	fprintf(stderr, "Usage: %s ?fileName?\n", argv[0]);
118    }
119
120    /*
121     * Make two passes over the file. In the first pass, just check to make
122     * sure we can handle everything. If there are problems, generate output
123     * and stop. If everything is OK, make a second pass to actually generate
124     * output.
125     */
126
127    for (writeOutput = 0; writeOutput < 2; writeOutput++) {
128	lineNumber = 0;
129	status = 0;
130	while (fgets(line, MAX_LINE_SIZE, f) != NULL) {
131	    for (p = line; *p != 0; p++) {
132		if (*p == '\n') {
133		    *p = 0;
134		    break;
135		}
136	    }
137	    lineNumber++;
138
139	    if (((line[0] == '.') || (line[0] == '\'')) && (line[1] == '\\') && (line[2] == '\"')) {
140		/*
141		 * This line is a comment. Ignore it.
142		 */
143
144		continue;
145	    }
146
147	    if (strlen(line) >= MAX_LINE_SIZE -1) {
148		fprintf(stderr, "Too long line. Max is %d chars.\n",
149			MAX_LINE_SIZE - 1);
150		exit(1);
151	    }
152
153	    if ((line[0] == '.') || (line[0] == '\'')) {
154		/*
155		 * This line is a macro invocation.
156		 */
157
158		DoMacro(line);
159	    } else {
160		/*
161		 * This line is text, possibly with formatting characters
162		 * embedded in it.
163		 */
164
165		DoText(line);
166	    }
167	}
168	if (status != 0) {
169	    break;
170	}
171	fseek(f, 0, SEEK_SET);
172    }
173    exit(status);
174}
175
176/*
177 *----------------------------------------------------------------------
178 *
179 * DoMacro --
180 *
181 *	This function is called to handle a macro invocation. It parses the
182 *	arguments to the macro and generates a Tcl command to handle the
183 *	invocation.
184 *
185 * Results:
186 *	None.
187 *
188 * Side effects:
189 *	A Tcl command is written to stdout.
190 *
191 *----------------------------------------------------------------------
192 */
193
194static void
195DoMacro(
196    char *line)			/* The line of text that contains the macro
197				 * invocation. */
198{
199    char *p, *end;
200    int quote;
201
202    /*
203     * If there is no macro name, then just skip the whole line.
204     */
205
206    if ((line[1] == 0) || (isspace(line[1]))) {
207	return;
208    }
209
210    PRINT(("macro"));
211    if (*line != '.') {
212	PRINT(("2"));
213    }
214
215    /*
216     * Parse the arguments to the macro (including the name), in order.
217     */
218
219    p = line+1;
220    while (1) {
221	PRINTC(' ');
222	if (*p == '"') {
223	    /*
224	     * The argument is delimited by quotes.
225	     */
226
227	    for (end = p+1; *end != '"'; end++) {
228		if (*end == 0) {
229		    fprintf(stderr,
230			    "Unclosed quote in macro call on line %d.\n",
231			    lineNumber);
232		    status = 1;
233		    break;
234		}
235	    }
236	    QuoteText(p+1, (end-(p+1)));
237	} else {
238	    quote = 0;
239	    for (end = p+1; (*end != 0) && (quote || !isspace(*end)); end++) {
240		if (*end == '\'') {
241		    quote = !quote;
242		}
243	    }
244	    QuoteText(p, end-p);
245	}
246	if (*end == 0) {
247	    break;
248	}
249	p = end+1;
250	while (isspace(*p)) {
251	    /*
252	     * Skip empty space before next argument.
253	     */
254
255	    p++;
256	}
257	if (*p == 0) {
258	    break;
259	}
260    }
261    PRINTC('\n');
262}
263
264/*
265 *----------------------------------------------------------------------
266 *
267 * DoText --
268 *
269 *	This function is called to handle a line of troff text. It parses the
270 *	text, generating Tcl commands for text and for formatting stuff such
271 *	as font changes.
272 *
273 * Results:
274 *	None.
275 *
276 * Side effects:
277 *	Tcl commands are written to stdout.
278 *
279 *----------------------------------------------------------------------
280 */
281
282static void
283DoText(
284    char *line)			/* The line of text. */
285{
286    char *p, *end;
287
288    /*
289     * Divide the line up into pieces consisting of backslash sequences, tabs,
290     * and other text.
291     */
292
293    p = line;
294    while (*p != 0) {
295	if (*p == '\t') {
296	    PRINT(("tab\n"));
297	    p++;
298	} else if (*p != '\\') {
299	    /*
300	     * Ordinary text.
301	     */
302
303	    for (end = p+1; (*end != '\\') && (*end != 0); end++) {
304		/* Empty loop body. */
305	    }
306	    PRINT(("text "));
307	    QuoteText(p, end-p);
308	    PRINTC('\n');
309	    p = end;
310	} else {
311	    /*
312	     * A backslash sequence. There are particular ones that we
313	     * understand; output an error message for anything else and just
314	     * ignore the backslash.
315	     */
316
317	    p++;
318	    if (*p == 'f') {
319		/*
320		 * Font change.
321		 */
322
323		PRINT(("font %c\n", p[1]));
324		p += 2;
325	    } else if (*p == '-') {
326		PRINT(("dash\n"));
327		p++;
328	    } else if (*p == 'e') {
329		PRINT(("text \\\\\n"));
330		p++;
331	    } else if (*p == '.') {
332		PRINT(("text .\n"));
333		p++;
334	    } else if (*p == '&') {
335		p++;
336	    } else if (*p == '0') {
337		PRINT(("text { }\n"));
338		p++;
339	    } else if (*p == '(') {
340		if ((p[1] == 0) || (p[2] == 0)) {
341		    fprintf(stderr, "Bad \\( sequence on line %d.\n",
342			    lineNumber);
343		    status = 1;
344		} else {
345		    PRINT(("char {\\(%c%c}\n", p[1], p[2]));
346		    p += 3;
347		}
348	    } else if (*p == 'N' && *(p+1) == '\'') {
349		int ch;
350
351		p += 2;
352		sscanf(p,"%d",&ch);
353		PRINT(("text \\u%04x\n", ch));
354		while(*p&&*p!='\'') p++;
355		p++;
356	    } else if (*p != 0) {
357		PRINT(("char {\\%c}\n", *p));
358		p++;
359	    }
360	}
361    }
362    PRINT(("newline\n"));
363}
364
365/*
366 *----------------------------------------------------------------------
367 *
368 * QuoteText --
369 *
370 *	Copy the "string" argument to stdout, adding quote characters around
371 *	any special Tcl characters so that they'll just be treated as ordinary
372 *	text.
373 *
374 * Results:
375 *	None.
376 *
377 * Side effects:
378 *	Text is written to stdout.
379 *
380 *----------------------------------------------------------------------
381 */
382
383static void
384QuoteText(
385    char *string,		/* The line of text. */
386    int count)			/* Number of characters to write from
387				 * string. */
388{
389    if (count == 0) {
390	PRINT(("{}"));
391	return;
392    }
393    for ( ; count > 0; string++, count--) {
394	switch (*string) {
395	case '\\':
396	    if (*(string+1) == 'N' && *(string+2) == '\'') {
397		int ch;
398
399		string += 3;
400		count -= 3;
401		sscanf(string,"%d",&ch);
402		PRINT(("\\u%04x", ch));
403		while(count>0&&*string!='\'') {string++;count--;}
404		continue;
405	    } else if (*(string+1) == '0') {
406		PRINT(("\\ "));
407		string++;
408		count--;
409		continue;
410	    }
411	case '$': case '[': case '{': case ' ': case ';':
412	case '"': case '\t':
413	    PRINTC('\\');
414	default:
415	    PRINTC(*string);
416	}
417    }
418}
419
420/*
421 * Local Variables:
422 * mode: c
423 * c-basic-offset: 4
424 * fill-column: 78
425 * End:
426 */
427