1/*	$OpenBSD: lowparse.c,v 1.36 2023/09/04 11:35:11 espie Exp $ */
2
3/* low-level parsing functions. */
4
5/*
6 * Copyright (c) 1999,2000 Marc Espie.
7 *
8 * Extensive code changes for the OpenBSD project.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OPENBSD
23 * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <assert.h>
33#include <stddef.h>
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <unistd.h>
38#include "defines.h"
39#include "buf.h"
40#include "lowparse.h"
41#include "error.h"
42#include "lst.h"
43#include "memory.h"
44#include "pathnames.h"
45#ifndef LOCATION_TYPE
46#include "location.h"
47#endif
48#include "var.h"
49
50
51#define READ_MAKEFILES "MAKEFILE_LIST"
52
53/* Input stream structure: file or string.
54 * Files have str == NULL, F != NULL.
55 * Strings have F == NULL, str != NULL.
56 */
57struct input_stream {
58	Location origin;	/* Name of file and line number */
59	FILE *F;		/* Open stream, or NULL if pure string. */
60	char *str;		/* Input string, if F == NULL. */
61
62	/* Line buffer. */
63	char *ptr;		/* Where we are. */
64	char *end;		/* Don't overdo it. */
65};
66
67static struct input_stream *current;	/* the input_stream being parsed. */
68
69static LIST input_stack;	/* Stack of input_stream waiting to be parsed
70				 * (includes and loop reparses) */
71
72/* record gnode location for proper reporting at runtime */
73static Location *post_parse = NULL;
74
75/* input_stream ctors.
76 *
77 * obj = new_input_file(filename, filehandle);
78 *	Create input stream from filename, filehandle. */
79static struct input_stream *new_input_file(const char *, FILE *);
80/* obj = new_input_string(str, origin);
81 *	Create input stream from str, origin. */
82static struct input_stream *new_input_string(char *, const Location *);
83/* free_input_stream(obj);
84 *	Discard consumed input stream, closing files, freeing memory.  */
85static void free_input_stream(struct input_stream *);
86
87
88/* Handling basic character reading.
89 * c = read_char();
90 *	New character c from current input stream, or EOF at end of stream. */
91#define read_char()	\
92    current->ptr < current->end ? *current->ptr++ : grab_new_line_and_readchar()
93/* char = grab_new_line_and_readchar();
94 *	Guts for read_char. Grabs a new line off fgetln when we have
95 *	consumed the current line and returns the first char, or EOF at end of
96 *	stream.  */
97static int grab_new_line_and_readchar(void);
98/* c = skip_to_end_of_line();
99 *	Skips to the end of the current line, returns either '\n' or EOF.  */
100static int skip_to_end_of_line(void);
101
102
103/* Helper functions to handle basic parsing. */
104/* read_logical_line(buffer, firstchar);
105 *	Grabs logical line into buffer, the first character has already been
106 *	read into firstchar.  */
107static void read_logical_line(Buffer, int);
108
109/* firstchar = ParseSkipEmptyLines(buffer);
110 *	Scans lines, skipping empty lines. May put some characters into
111 *	buffer, returns the first character useful to continue parsing
112 *	(e.g., not a backslash or a space. */
113static int skip_empty_lines_and_read_char(Buffer);
114
115const char *curdir;
116size_t curdir_len;
117
118void
119Parse_setcurdir(const char *dir)
120{
121	curdir = dir;
122	curdir_len = strlen(dir);
123}
124
125static bool
126startswith(const char *f, const char *s, size_t len)
127{
128	return strncmp(f, s, len) == 0 && f[len] == '/';
129}
130
131static const char *
132simplify(const char *filename)
133{
134	if (startswith(filename, curdir, curdir_len))
135		return filename + curdir_len + 1;
136	else if (startswith(filename, _PATH_DEFSYSPATH,
137	    sizeof(_PATH_DEFSYSPATH)-1)) {
138	    	size_t sz;
139		char *buf;
140		sz = strlen(filename) - sizeof(_PATH_DEFSYSPATH)+3;
141		buf = emalloc(sz);
142		snprintf(buf, sz, "<%s>", filename+sizeof(_PATH_DEFSYSPATH));
143		return buf;
144	} else
145		return filename;
146}
147
148static struct input_stream *
149new_input_file(const char *name, FILE *stream)
150{
151	struct input_stream *istream;
152
153	istream = emalloc(sizeof(*istream));
154	istream->origin.fname = simplify(name);
155	Var_Append(READ_MAKEFILES, name);
156	istream->str = NULL;
157	/* Naturally enough, we start reading at line 0. */
158	istream->origin.lineno = 0;
159	istream->F = stream;
160	istream->ptr = istream->end = NULL;
161	return istream;
162}
163
164static void
165free_input_stream(struct input_stream *istream)
166{
167	if (istream->F) {
168		if (ferror(istream->F))
169			Parse_Error(PARSE_FATAL, "Read error");
170		if (fileno(istream->F) != STDIN_FILENO)
171			(void)fclose(istream->F);
172	}
173	free(istream->str);
174	/* Note we can't free the file names, as they are embedded in GN
175	 * for error reports. */
176	free(istream);
177}
178
179static struct input_stream *
180new_input_string(char *str, const Location *origin)
181{
182	struct input_stream *istream;
183
184	istream = emalloc(sizeof(*istream));
185	/* No malloc, name is always taken from an already existing istream
186	 * and strings are used in for loops, so we need to reset the line
187	 * counter to an appropriate value. */
188	istream->origin = *origin;
189	istream->F = NULL;
190	istream->ptr = istream->str = str;
191	istream->end = str + strlen(str);
192	return istream;
193}
194
195
196void
197Parse_FromString(char *str, unsigned long lineno)
198{
199	Location origin;
200
201	origin.fname = current->origin.fname;
202	origin.lineno = lineno;
203	if (DEBUG(FOR))
204		(void)fprintf(stderr, "%s\n----\n", str);
205
206	Lst_Push(&input_stack, current);
207	assert(current != NULL);
208	current = new_input_string(str, &origin);
209}
210
211
212void
213Parse_FromFile(const char *name, FILE *stream)
214{
215	if (current != NULL)
216		Lst_Push(&input_stack, current);
217	current = new_input_file(name, stream);
218}
219
220bool
221Parse_NextFile(void)
222{
223	if (current != NULL)
224		free_input_stream(current);
225	current = Lst_Pop(&input_stack);
226	return current != NULL;
227}
228
229static int
230grab_new_line_and_readchar(void)
231{
232	size_t len;
233
234	if (current->F) {
235		current->ptr = fgetln(current->F, &len);
236		if (current->ptr) {
237			current->end = current->ptr + len;
238			return *current->ptr++;
239		} else {
240			current->end = NULL;
241		}
242	}
243	return EOF;
244}
245
246static int
247skip_to_end_of_line(void)
248{
249	if (current->F) {
250		if (current->end - current->ptr > 1)
251			current->ptr = current->end - 1;
252		if (*current->ptr == '\n')
253			return *current->ptr++;
254		return EOF;
255	} else {
256		int c;
257
258		do {
259			c = read_char();
260		} while (c != '\n' && c != EOF);
261		return c;
262	}
263}
264
265
266char *
267Parse_ReadNextConditionalLine(Buffer linebuf)
268{
269	int c;
270
271	/* If first char isn't dot, skip to end of line, handling \ */
272	while ((c = read_char()) != '.') {
273		for (;c != '\n'; c = read_char()) {
274			if (c == '\\') {
275				c = read_char();
276				if (c == '\n')
277					current->origin.lineno++;
278			}
279			if (c == EOF)
280				/* Unclosed conditional, reported by cond.c */
281				return NULL;
282		}
283		current->origin.lineno++;
284	}
285
286	/* This is the line we need to copy */
287	return Parse_ReadUnparsedLine(linebuf, "conditional");
288}
289
290static void
291read_logical_line(Buffer linebuf, int c)
292{
293	for (;;) {
294		if (c == '\n') {
295			current->origin.lineno++;
296			break;
297		}
298		if (c == EOF)
299			break;
300		Buf_AddChar(linebuf, c);
301		c = read_char();
302		while (c == '\\') {
303			c = read_char();
304			if (c == '\n') {
305				Buf_AddSpace(linebuf);
306				current->origin.lineno++;
307				do {
308					c = read_char();
309				} while (c == ' ' || c == '\t');
310			} else {
311				Buf_AddChar(linebuf, '\\');
312				if (c == '\\') {
313					Buf_AddChar(linebuf, '\\');
314					c = read_char();
315				}
316				break;
317			}
318		}
319	}
320}
321
322char *
323Parse_ReadUnparsedLine(Buffer linebuf, const char *type)
324{
325	int c;
326
327	Buf_Reset(linebuf);
328	c = read_char();
329	if (c == EOF) {
330		Parse_Error(PARSE_FATAL, "Unclosed %s", type);
331		return NULL;
332	}
333
334	/* Handle '\' at beginning of line, since \\n needs special treatment */
335	while (c == '\\') {
336		c = read_char();
337		if (c == '\n') {
338			current->origin.lineno++;
339			do {
340				c = read_char();
341			} while (c == ' ' || c == '\t');
342		} else {
343			Buf_AddChar(linebuf, '\\');
344			if (c == '\\') {
345				Buf_AddChar(linebuf, '\\');
346				c = read_char();
347			}
348			break;
349		}
350	}
351	read_logical_line(linebuf, c);
352
353	return Buf_Retrieve(linebuf);
354}
355
356/* This is a fairly complex function, but without it, we could not skip
357 * blocks of comments without reading them. */
358static int
359skip_empty_lines_and_read_char(Buffer linebuf)
360{
361	int c;		/* the current character */
362
363	for (;;) {
364		Buf_Reset(linebuf);
365		c = read_char();
366		/* Strip leading spaces, fold on '\n' */
367		if (c == ' ') {
368			do {
369				c = read_char();
370			} while (c == ' ' || c == '\t');
371			while (c == '\\') {
372				c = read_char();
373				if (c == '\n') {
374					current->origin.lineno++;
375					do {
376						c = read_char();
377					} while (c == ' ' || c == '\t');
378				} else {
379					Buf_AddChar(linebuf, '\\');
380					if (c == '\\') {
381						Buf_AddChar(linebuf, '\\');
382						c = read_char();
383					}
384					if (c == EOF)
385						return '\n';
386					else
387						return c;
388				}
389			}
390			assert(c != '\t');
391		}
392		if (c == '#')
393			c = skip_to_end_of_line();
394		/* Almost identical to spaces, except this occurs after
395		 * comments have been taken care of, and we keep the tab
396		 * itself.  */
397		if (c == '\t') {
398			Buf_AddChar(linebuf, '\t');
399			do {
400				c = read_char();
401			} while (c == ' ' || c == '\t');
402			while (c == '\\') {
403				c = read_char();
404				if (c == '\n') {
405					current->origin.lineno++;
406					do {
407						c = read_char();
408					} while (c == ' ' || c == '\t');
409				} else {
410					Buf_AddChar(linebuf, '\\');
411					if (c == '\\') {
412						Buf_AddChar(linebuf, '\\');
413						c = read_char();
414					}
415					if (c == EOF)
416						return '\n';
417					else
418						return c;
419				}
420			}
421		}
422		if (c == '\n')
423			current->origin.lineno++;
424		else
425			return c;
426	}
427}
428
429/* Parse_ReadNormalLine removes beginning and trailing blanks (but keeps
430 * the first tab), handles escaped newlines, and skips over uninteresting
431 * lines.
432 *
433 * The line number is incremented, which implies that continuation
434 * lines are numbered with the last line number (we could do better, at a
435 * price).
436 *
437 * Trivial comments are also removed, but we can't do more, as
438 * we don't know which lines are shell commands or not.  */
439char *
440Parse_ReadNormalLine(Buffer linebuf)
441{
442	int c;		/* the current character */
443
444	c = skip_empty_lines_and_read_char(linebuf);
445
446	if (c == EOF)
447		return NULL;
448	else {
449		read_logical_line(linebuf, c);
450		return Buf_Retrieve(linebuf);
451	}
452}
453
454unsigned long
455Parse_Getlineno(void)
456{
457	return current ? current->origin.lineno : 0;
458}
459
460const char *
461Parse_Getfilename(void)
462{
463	return current ? current->origin.fname : NULL;
464}
465
466void
467Parse_SetLocation(Location *origin)
468{
469	post_parse = origin;
470}
471
472void
473Parse_FillLocation(Location *origin)
474{
475	if (post_parse) {
476		*origin = *post_parse;
477	} else {
478		origin->lineno = Parse_Getlineno();
479		origin->fname = Parse_Getfilename();
480	}
481}
482
483void
484Parse_ReportErrors(void)
485{
486	if (fatal_errors)
487		exit(1);
488	else
489		assert(current == NULL);
490}
491