1/*
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996-2009 Oracle.  All rights reserved.
5 *
6 */
7
8/*
9 * This is the entry function of the db_sql command.  Db_sql is a
10 * utility program that translates a schema description written in a
11 * SQL Data Definition Language dialect into C code that implements
12 * the schema using Berkeley DB.
13 */
14#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17#include <assert.h>
18#include "db_sql.h"
19
20extern int getopt(int, char *const [], const char *);
21static int usage(char *);
22static char * change_extension(char *path, char *extension);
23static int read_and_parse(FILE *fp);
24
25char *progname = "db_sql";
26int line_number = 0;
27int debug = 0;
28
29int
30main(argc,argv)
31	int argc;
32	char **argv;
33{
34	extern char *optarg;
35	extern int optind;
36	int opt, free_ofilename, free_hfilename;
37	FILE *ifile, *hfile, *ofile, *tfile, *vfile;
38	char *ifilename, *hfilename, *ofilename, *tfilename, *vfilename;
39
40	ifilename = hfilename = ofilename = tfilename = vfilename = NULL;
41	free_ofilename = free_hfilename = 0;
42
43	progname = argv[0];
44
45	/* parse the command line switches */
46
47	while ((opt = getopt(argc, argv, "i:t:o:h:dv:")) != -1) {
48		switch(opt) {
49		case 'i':              /* input file name */
50			ifilename = optarg;
51			break;
52		case 'h':              /* header output file name */
53			hfilename = optarg;
54			break;
55		case 'o':              /* output file name */
56			ofilename = optarg;
57			break;
58		case 't':              /* test code output file name */
59			tfilename = optarg;
60			break;
61		case 'd':
62			debug = 1;
63			break;
64		case 'v':              /* verification code output file name */
65			vfilename = optarg;
66			break;
67		default:
68			return(usage(0));
69		}
70	}
71
72	argc -= optind;
73	argv += optind;
74
75	if (argc != 0) {
76		fprintf(stderr,
77			"extra argument %s after switch arguments\n", *argv);
78		return(usage(0));
79	}
80
81	if (ifilename == NULL)
82		ifile = stdin;
83	else
84		if ((ifile = fopen(ifilename, "r")) == NULL)
85			return(usage(ifilename));
86
87	/* if ofilename wasn't given, use ifilename with a .c extension */
88
89	if (ofilename == NULL && ifilename != NULL) {
90		ofilename = change_extension(ifilename, "c");
91		free_ofilename = 1;
92	}
93
94	if (ofilename == NULL)
95		ofile = stdout;
96	else
97		if ((ofile = fopen(ofilename, "w")) == NULL)
98			return(usage(ofilename));
99
100	/* if hfilename wasn't given, use ofilename with a .h extension */
101
102	if (hfilename == NULL && ofilename != NULL) {
103		hfilename = change_extension(ofilename, "h");
104		free_hfilename = 1;
105	}
106
107	if (hfilename == NULL)
108		hfile = stdout;
109	else
110		if ((hfile = fopen(hfilename, "w")) == NULL)
111			return(usage(hfilename));
112
113	/*
114	 * if tfile wasn't given, we won't generate the test code.
115	 *  tfile == null turns off test code generation
116	 */
117	if (tfilename == NULL)
118		tfile = 0;
119	else {
120		if (hfilename == NULL) {
121			fprintf(stderr,
122			    "Can't produce test when streaming to stdout\n");
123			return(usage(0));
124		}
125		if ((tfile = fopen(tfilename, "w")) == NULL)
126			return(usage(tfilename));
127	}
128	/*
129	 * Verification files are generated for internal testing purposes,
130	 * they are similar to the test output file. This functionality is
131	 * not targeted at end users, so is not documented.
132	 */
133	if (vfilename == NULL)
134		vfile = 0;
135	else {
136		if (hfilename == NULL) {
137			fprintf(stderr,
138			    "Can't produce verify when streaming to stdout\n");
139			return(usage(0));
140		}
141		if ((vfile = fopen(vfilename, "w")) == NULL)
142			return(usage(vfilename));
143	}
144
145	if (read_and_parse(ifile))
146		exit(1);
147
148	generate(hfile, ofile, tfile, vfile, hfilename);
149
150	/* clean up the allocated memory */
151	if (free_ofilename)
152		free(ofilename);
153	if (free_hfilename)
154		free(hfilename);
155	return 0;
156}
157
158/*
159 * Scan input buffer for a semicolon that is not in a comment.
160 * Later, this may need to notice quotes as well.
161 */
162static char *
163scan_for_rightmost_semicolon(p)
164	char *p;
165{
166	static enum scanner_state {
167		IDLE = 0, GOT_SLASH = 1, IN_SLASHSTAR_COMMENT = 2,
168		GOT_STAR = 3, GOT_HYPHEN = 4, IN_HYPHHYPH_COMMENT = 5
169	} state = IDLE;
170
171	char *result;
172
173	result = NULL;
174
175	if (p == NULL || *p == '\0')
176		return result;
177
178	do {
179		switch(state) {
180		case IDLE:
181			switch(*p) {
182			case '/': state = GOT_SLASH; break;
183			case '*': state = GOT_STAR; break;
184			case '-': state = GOT_HYPHEN; break;
185			}
186			break;
187		case GOT_SLASH:
188			switch(*p) {
189			case '*': state = IN_SLASHSTAR_COMMENT; break;
190			default: state = IDLE;
191			}
192			break;
193		case IN_SLASHSTAR_COMMENT:
194			switch(*p) {
195			case '*': state = GOT_STAR; break;
196			}
197			break;
198		case GOT_STAR:
199			switch(*p) {
200			case '/': state = IDLE; break;
201			default: state = IN_SLASHSTAR_COMMENT; break;
202			}
203			break;
204		case GOT_HYPHEN:
205			switch(*p) {
206			case '-': state = IN_HYPHHYPH_COMMENT; break;
207			default: state = IDLE; break;
208			}
209		case IN_HYPHHYPH_COMMENT:
210			switch(*p) {
211			case '\n': state = IDLE; break;
212			}
213			break;
214		}
215
216		if (state == IDLE && *p == ';')
217			result = p;
218
219	} while (*p++);
220
221	return result;
222}
223
224/*
225 * read_and_parse reads lines from the input file (containing SQL DDL),
226 * and sends the to the tokenizer and parser.  Because of the way the
227 * SQLite tokenizer works, the chunks sent to the tokenizer must
228 * contain a multiple of whole SQL statements -- a partial statement
229 * will produce a syntax error.  Therefore, this function splits its
230 * input at semicolons.
231 */
232static int
233read_and_parse(fp)
234	FILE *fp;
235{
236	size_t line_len, copy_len, collector_len;
237	char *q, *collector, buf[256], *err_msg;
238
239	collector = 0;
240	collector_len = 0;
241	err_msg = 0;
242
243	/* line_number is global */
244
245	for (line_number = 1; fgets(buf, sizeof(buf), fp) != 0; line_number++) {
246
247		line_len = strlen(buf);
248
249		if (1 + strlen(buf)  == sizeof(buf)) {
250			fprintf(stderr, "%s: line %d is too long", progname,
251				line_number);
252			return 1;
253		}
254
255		/*
256		 * Does this line contain a semicolon?  If so, copy
257		 * the line, up to and including its last semicolon,
258		 * into collector and parse it.  Then reinitialize
259		 * collector with the remainer of the line
260		 */
261		if ((q = scan_for_rightmost_semicolon(buf)) != NULL)
262			copy_len = 1 + q - buf;
263		else
264			copy_len = line_len;
265
266		collector_len += 1 + copy_len;
267		if (collector == NULL)
268			collector = calloc(1, collector_len);
269		else
270			collector = realloc(collector, collector_len);
271
272		strnconcat(collector, collector_len, buf, copy_len);
273
274		if (q != 0) {
275			if (do_parse(collector, &err_msg) != 0) {
276				fprintf(stderr,
277					"parsing error at line %d : %s\n",
278					line_number, err_msg);
279				return 1;
280			}
281
282			collector_len = 1 + line_len - copy_len;
283			collector = realloc(collector, collector_len);
284			memcpy(collector, buf + copy_len, collector_len);
285			assert(collector[collector_len-1] == 0);
286		}
287	}
288
289	/*
290	 * if there's anything after the final semicolon, send it on
291	 * to the tokenizer -- it might be a hint comment
292	 */
293	if (collector != 0) {
294		if (strlen(collector) > 0 &&
295		    do_parse(collector, &err_msg) != 0) {
296			fprintf(stderr, "parsing error at end of file: %s\n",
297				err_msg);
298			return 1;
299		}
300
301		free (collector);
302	}
303
304	return 0;
305}
306
307/*
308 * Basename isn't available everywhere, so we have our own version
309 * which works on unix and windows.
310 */
311static char *
312final_component_of(path)
313	char *path;
314{
315	char *p;
316	p = strrchr(path, '/');
317	if (p == NULL)
318		p = strrchr(path, '\\');
319	if (p != NULL)
320		return p + 1;
321
322	return path;
323}
324
325/*
326 * Return a new pathname in which any existing "extension" (the part
327 * after ".") has been replaced by the given extension.  If the
328 * pathname has no extension, the new extension is simply appended.
329 * Returns allocated memory
330 */
331static char *
332change_extension(path, extension)
333	char *path, *extension;
334{
335        size_t path_len, copy_len;
336	char *p, *copy;
337	const char dot = '.';
338
339	/* isolate the final component of the pathname, so that we can
340	 * examine it for the presence of a '.' without finding a '.'
341	 * in a directory name componenet of the pathname
342	 */
343
344	p = final_component_of(path);
345	if (*p != 0)
346		p++;  /* skip initial char in basename, it could be a dot */
347
348	/*
349	 * Is there a dot in the basename? If so, then the path has
350	 * an extension that we'll elide before adding the new one.
351	 */
352	if (strrchr(p, dot) != 0) {
353		p = strrchr(path, dot);
354		path_len = p - path;
355	} else
356		path_len = strlen(path);
357
358	copy_len = 2 + path_len + strlen(extension);
359	copy = malloc(copy_len);
360	memcpy(copy, path, path_len);
361	copy[path_len] = 0; /* terminate the string */
362	strconcat(copy, copy_len, ".");
363	strconcat(copy, copy_len, extension);
364
365	return copy;
366}
367
368static int
369usage(char *error_tag) {
370	if (error_tag != 0)
371		perror(error_tag);
372	fprintf(stderr, "\
373Usage:  %s [-i inputFile] [-h outputHeaderFile] [-o outputFile] \
374[-t testOutputFile] [-d] [-v verificationOutputFile]\n",
375		progname);
376	return(1);
377}
378