1/* $OpenBSD: lowparse.c,v 1.36 2023/09/04 11:35:11 espie Exp $ */ 2 3/* low-level parsing functions. */ 4 5/* 6 * Copyright (c) 1999,2000 Marc Espie. 7 * 8 * Extensive code changes for the OpenBSD project. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE OPENBSD PROJECT AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBSD 23 * PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include <assert.h> 33#include <stddef.h> 34#include <stdio.h> 35#include <stdlib.h> 36#include <string.h> 37#include <unistd.h> 38#include "defines.h" 39#include "buf.h" 40#include "lowparse.h" 41#include "error.h" 42#include "lst.h" 43#include "memory.h" 44#include "pathnames.h" 45#ifndef LOCATION_TYPE 46#include "location.h" 47#endif 48#include "var.h" 49 50 51#define READ_MAKEFILES "MAKEFILE_LIST" 52 53/* Input stream structure: file or string. 54 * Files have str == NULL, F != NULL. 55 * Strings have F == NULL, str != NULL. 56 */ 57struct input_stream { 58 Location origin; /* Name of file and line number */ 59 FILE *F; /* Open stream, or NULL if pure string. */ 60 char *str; /* Input string, if F == NULL. */ 61 62 /* Line buffer. */ 63 char *ptr; /* Where we are. */ 64 char *end; /* Don't overdo it. */ 65}; 66 67static struct input_stream *current; /* the input_stream being parsed. */ 68 69static LIST input_stack; /* Stack of input_stream waiting to be parsed 70 * (includes and loop reparses) */ 71 72/* record gnode location for proper reporting at runtime */ 73static Location *post_parse = NULL; 74 75/* input_stream ctors. 76 * 77 * obj = new_input_file(filename, filehandle); 78 * Create input stream from filename, filehandle. */ 79static struct input_stream *new_input_file(const char *, FILE *); 80/* obj = new_input_string(str, origin); 81 * Create input stream from str, origin. */ 82static struct input_stream *new_input_string(char *, const Location *); 83/* free_input_stream(obj); 84 * Discard consumed input stream, closing files, freeing memory. */ 85static void free_input_stream(struct input_stream *); 86 87 88/* Handling basic character reading. 89 * c = read_char(); 90 * New character c from current input stream, or EOF at end of stream. */ 91#define read_char() \ 92 current->ptr < current->end ? *current->ptr++ : grab_new_line_and_readchar() 93/* char = grab_new_line_and_readchar(); 94 * Guts for read_char. Grabs a new line off fgetln when we have 95 * consumed the current line and returns the first char, or EOF at end of 96 * stream. */ 97static int grab_new_line_and_readchar(void); 98/* c = skip_to_end_of_line(); 99 * Skips to the end of the current line, returns either '\n' or EOF. */ 100static int skip_to_end_of_line(void); 101 102 103/* Helper functions to handle basic parsing. */ 104/* read_logical_line(buffer, firstchar); 105 * Grabs logical line into buffer, the first character has already been 106 * read into firstchar. */ 107static void read_logical_line(Buffer, int); 108 109/* firstchar = ParseSkipEmptyLines(buffer); 110 * Scans lines, skipping empty lines. May put some characters into 111 * buffer, returns the first character useful to continue parsing 112 * (e.g., not a backslash or a space. */ 113static int skip_empty_lines_and_read_char(Buffer); 114 115const char *curdir; 116size_t curdir_len; 117 118void 119Parse_setcurdir(const char *dir) 120{ 121 curdir = dir; 122 curdir_len = strlen(dir); 123} 124 125static bool 126startswith(const char *f, const char *s, size_t len) 127{ 128 return strncmp(f, s, len) == 0 && f[len] == '/'; 129} 130 131static const char * 132simplify(const char *filename) 133{ 134 if (startswith(filename, curdir, curdir_len)) 135 return filename + curdir_len + 1; 136 else if (startswith(filename, _PATH_DEFSYSPATH, 137 sizeof(_PATH_DEFSYSPATH)-1)) { 138 size_t sz; 139 char *buf; 140 sz = strlen(filename) - sizeof(_PATH_DEFSYSPATH)+3; 141 buf = emalloc(sz); 142 snprintf(buf, sz, "<%s>", filename+sizeof(_PATH_DEFSYSPATH)); 143 return buf; 144 } else 145 return filename; 146} 147 148static struct input_stream * 149new_input_file(const char *name, FILE *stream) 150{ 151 struct input_stream *istream; 152 153 istream = emalloc(sizeof(*istream)); 154 istream->origin.fname = simplify(name); 155 Var_Append(READ_MAKEFILES, name); 156 istream->str = NULL; 157 /* Naturally enough, we start reading at line 0. */ 158 istream->origin.lineno = 0; 159 istream->F = stream; 160 istream->ptr = istream->end = NULL; 161 return istream; 162} 163 164static void 165free_input_stream(struct input_stream *istream) 166{ 167 if (istream->F) { 168 if (ferror(istream->F)) 169 Parse_Error(PARSE_FATAL, "Read error"); 170 if (fileno(istream->F) != STDIN_FILENO) 171 (void)fclose(istream->F); 172 } 173 free(istream->str); 174 /* Note we can't free the file names, as they are embedded in GN 175 * for error reports. */ 176 free(istream); 177} 178 179static struct input_stream * 180new_input_string(char *str, const Location *origin) 181{ 182 struct input_stream *istream; 183 184 istream = emalloc(sizeof(*istream)); 185 /* No malloc, name is always taken from an already existing istream 186 * and strings are used in for loops, so we need to reset the line 187 * counter to an appropriate value. */ 188 istream->origin = *origin; 189 istream->F = NULL; 190 istream->ptr = istream->str = str; 191 istream->end = str + strlen(str); 192 return istream; 193} 194 195 196void 197Parse_FromString(char *str, unsigned long lineno) 198{ 199 Location origin; 200 201 origin.fname = current->origin.fname; 202 origin.lineno = lineno; 203 if (DEBUG(FOR)) 204 (void)fprintf(stderr, "%s\n----\n", str); 205 206 Lst_Push(&input_stack, current); 207 assert(current != NULL); 208 current = new_input_string(str, &origin); 209} 210 211 212void 213Parse_FromFile(const char *name, FILE *stream) 214{ 215 if (current != NULL) 216 Lst_Push(&input_stack, current); 217 current = new_input_file(name, stream); 218} 219 220bool 221Parse_NextFile(void) 222{ 223 if (current != NULL) 224 free_input_stream(current); 225 current = Lst_Pop(&input_stack); 226 return current != NULL; 227} 228 229static int 230grab_new_line_and_readchar(void) 231{ 232 size_t len; 233 234 if (current->F) { 235 current->ptr = fgetln(current->F, &len); 236 if (current->ptr) { 237 current->end = current->ptr + len; 238 return *current->ptr++; 239 } else { 240 current->end = NULL; 241 } 242 } 243 return EOF; 244} 245 246static int 247skip_to_end_of_line(void) 248{ 249 if (current->F) { 250 if (current->end - current->ptr > 1) 251 current->ptr = current->end - 1; 252 if (*current->ptr == '\n') 253 return *current->ptr++; 254 return EOF; 255 } else { 256 int c; 257 258 do { 259 c = read_char(); 260 } while (c != '\n' && c != EOF); 261 return c; 262 } 263} 264 265 266char * 267Parse_ReadNextConditionalLine(Buffer linebuf) 268{ 269 int c; 270 271 /* If first char isn't dot, skip to end of line, handling \ */ 272 while ((c = read_char()) != '.') { 273 for (;c != '\n'; c = read_char()) { 274 if (c == '\\') { 275 c = read_char(); 276 if (c == '\n') 277 current->origin.lineno++; 278 } 279 if (c == EOF) 280 /* Unclosed conditional, reported by cond.c */ 281 return NULL; 282 } 283 current->origin.lineno++; 284 } 285 286 /* This is the line we need to copy */ 287 return Parse_ReadUnparsedLine(linebuf, "conditional"); 288} 289 290static void 291read_logical_line(Buffer linebuf, int c) 292{ 293 for (;;) { 294 if (c == '\n') { 295 current->origin.lineno++; 296 break; 297 } 298 if (c == EOF) 299 break; 300 Buf_AddChar(linebuf, c); 301 c = read_char(); 302 while (c == '\\') { 303 c = read_char(); 304 if (c == '\n') { 305 Buf_AddSpace(linebuf); 306 current->origin.lineno++; 307 do { 308 c = read_char(); 309 } while (c == ' ' || c == '\t'); 310 } else { 311 Buf_AddChar(linebuf, '\\'); 312 if (c == '\\') { 313 Buf_AddChar(linebuf, '\\'); 314 c = read_char(); 315 } 316 break; 317 } 318 } 319 } 320} 321 322char * 323Parse_ReadUnparsedLine(Buffer linebuf, const char *type) 324{ 325 int c; 326 327 Buf_Reset(linebuf); 328 c = read_char(); 329 if (c == EOF) { 330 Parse_Error(PARSE_FATAL, "Unclosed %s", type); 331 return NULL; 332 } 333 334 /* Handle '\' at beginning of line, since \\n needs special treatment */ 335 while (c == '\\') { 336 c = read_char(); 337 if (c == '\n') { 338 current->origin.lineno++; 339 do { 340 c = read_char(); 341 } while (c == ' ' || c == '\t'); 342 } else { 343 Buf_AddChar(linebuf, '\\'); 344 if (c == '\\') { 345 Buf_AddChar(linebuf, '\\'); 346 c = read_char(); 347 } 348 break; 349 } 350 } 351 read_logical_line(linebuf, c); 352 353 return Buf_Retrieve(linebuf); 354} 355 356/* This is a fairly complex function, but without it, we could not skip 357 * blocks of comments without reading them. */ 358static int 359skip_empty_lines_and_read_char(Buffer linebuf) 360{ 361 int c; /* the current character */ 362 363 for (;;) { 364 Buf_Reset(linebuf); 365 c = read_char(); 366 /* Strip leading spaces, fold on '\n' */ 367 if (c == ' ') { 368 do { 369 c = read_char(); 370 } while (c == ' ' || c == '\t'); 371 while (c == '\\') { 372 c = read_char(); 373 if (c == '\n') { 374 current->origin.lineno++; 375 do { 376 c = read_char(); 377 } while (c == ' ' || c == '\t'); 378 } else { 379 Buf_AddChar(linebuf, '\\'); 380 if (c == '\\') { 381 Buf_AddChar(linebuf, '\\'); 382 c = read_char(); 383 } 384 if (c == EOF) 385 return '\n'; 386 else 387 return c; 388 } 389 } 390 assert(c != '\t'); 391 } 392 if (c == '#') 393 c = skip_to_end_of_line(); 394 /* Almost identical to spaces, except this occurs after 395 * comments have been taken care of, and we keep the tab 396 * itself. */ 397 if (c == '\t') { 398 Buf_AddChar(linebuf, '\t'); 399 do { 400 c = read_char(); 401 } while (c == ' ' || c == '\t'); 402 while (c == '\\') { 403 c = read_char(); 404 if (c == '\n') { 405 current->origin.lineno++; 406 do { 407 c = read_char(); 408 } while (c == ' ' || c == '\t'); 409 } else { 410 Buf_AddChar(linebuf, '\\'); 411 if (c == '\\') { 412 Buf_AddChar(linebuf, '\\'); 413 c = read_char(); 414 } 415 if (c == EOF) 416 return '\n'; 417 else 418 return c; 419 } 420 } 421 } 422 if (c == '\n') 423 current->origin.lineno++; 424 else 425 return c; 426 } 427} 428 429/* Parse_ReadNormalLine removes beginning and trailing blanks (but keeps 430 * the first tab), handles escaped newlines, and skips over uninteresting 431 * lines. 432 * 433 * The line number is incremented, which implies that continuation 434 * lines are numbered with the last line number (we could do better, at a 435 * price). 436 * 437 * Trivial comments are also removed, but we can't do more, as 438 * we don't know which lines are shell commands or not. */ 439char * 440Parse_ReadNormalLine(Buffer linebuf) 441{ 442 int c; /* the current character */ 443 444 c = skip_empty_lines_and_read_char(linebuf); 445 446 if (c == EOF) 447 return NULL; 448 else { 449 read_logical_line(linebuf, c); 450 return Buf_Retrieve(linebuf); 451 } 452} 453 454unsigned long 455Parse_Getlineno(void) 456{ 457 return current ? current->origin.lineno : 0; 458} 459 460const char * 461Parse_Getfilename(void) 462{ 463 return current ? current->origin.fname : NULL; 464} 465 466void 467Parse_SetLocation(Location *origin) 468{ 469 post_parse = origin; 470} 471 472void 473Parse_FillLocation(Location *origin) 474{ 475 if (post_parse) { 476 *origin = *post_parse; 477 } else { 478 origin->lineno = Parse_Getlineno(); 479 origin->fname = Parse_Getfilename(); 480 } 481} 482 483void 484Parse_ReportErrors(void) 485{ 486 if (fatal_errors) 487 exit(1); 488 else 489 assert(current == NULL); 490} 491