1/*	$NetBSD: pi.c,v 1.24 2023/08/26 15:18:27 rillig Exp $	*/
2
3/*
4 * Copyright (c) 1980, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33#ifndef lint
34#if 0
35static char sccsid[] = "@(#)pi.c	8.1 (Berkeley) 6/6/93";
36#endif
37__RCSID("$NetBSD: pi.c,v 1.24 2023/08/26 15:18:27 rillig Exp $");
38#endif /* not lint */
39
40#include <stdio.h>
41#include <ctype.h>
42#include <string.h>
43#include <stdlib.h>
44#include "error.h"
45
46#if 0 /* not const-correct */
47static char *unk_hdr[] = {"In", "program", "???"};
48#else
49DECL_STRINGS_3(static, unk_hdr, "In", "program", "???");
50#endif
51
52static char *c_linenumber;
53static char **c_header = &unk_hdr[0];
54
55static bool alldigits(const char *);
56static bool isdateformat(int, char **);
57static bool instringset(const char *, const char **);
58static bool piptr(const char *);
59
60
61/*
62 * Attempt to handle error messages produced by pi (and by pc)
63 *
64 *	problem #1:	There is no file name available when a file does not
65 *			use a #include; this will have to be given to error
66 *			in the command line.
67 *	problem #2:	pi doesn't always tell you what line number
68 *			a error refers to; for example during the tree
69 *			walk phase of code generation and error detection,
70 *			an error can refer to "variable foo in procedure bletch"
71 *			without giving a line number
72 *	problem #3:	line numbers, when available, are attached to
73 *			the source line, along with the source line itself
74 *			These line numbers must be extracted, and
75 *			the source line thrown away.
76 *	problem #4:	Some error messages produce more than one line number
77 *			on the same message.
78 *			There are only two (I think):
79 *				%s undefined on line%s
80 *				%s improperly used on line%s
81 *			here, the %s makes line plural or singular.
82 *
83 *	Here are the error strings used in pi version 1.2 that can refer
84 *	to a file name or line number:
85 *
86 *		Multiply defined label in case, lines %d and %d
87 *		Goto %s from line %d is into a structured statement
88 *		End matched %s on line %d
89 *		Inserted keyword end matching %s on line %d
90 *
91 *	Here are the general pi patterns recognized:
92 *	define piptr == -.*^-.*
93 *	define msg = .*
94 *	define digit = [0-9]
95 *	definename = .*
96 *	define date_format letter*3 letter*3 (digit | (digit digit))
97 *			(digit | (digit digit)):digit*2 digit*4
98 *
99 *	{e,E} (piptr) (msg)	Encounter an error during textual scan
100 *	E {digit}* - (msg)	Have an error message that refers to a new line
101 *	E - msg			Have an error message that refers to current
102 *					function, program or procedure
103 *	(date_format) (name):	When switch compilation files
104 *	... (msg)		When refer to the previous line
105 *	'In' ('procedure'|'function'|'program') (name):
106 *				pi is now complaining about 2nd pass errors.
107 *
108 *	Here is the output from a compilation
109 *
110 *
111 *	     2  	var	i:integer;
112 *	e --------------^--- Inserted ';'
113 *	E 2 - All variables must be declared in one var part
114 *	E 5 - Include filename must end in .i
115 *	Mon Apr 21 15:56 1980  test.h:
116 *	     2  begin
117 *	e ------^--- Inserted ';'
118 *	Mon Apr 21 16:06 1980  test.p:
119 *	E 2 - Function type must be specified
120 *	     6  procedure foo(var x:real);
121 *	e ------^--- Inserted ';'
122 *	In function bletch:
123 *	  E - No assignment to the function variable
124 *	  w - variable x is never used
125 *	E 6 - foo is already defined in this block
126 *	In procedure foo:
127 *	  w - variable x is neither used nor set
128 *	     9  	z : = 23;
129 *	E --------------^--- Undefined variable
130 *	    10  	y = [1];
131 *	e ----------------^--- Inserted ':'
132 *	    13  	z := 345.;
133 *	e -----------------------^--- Digits required after decimal point
134 *	E 10 - Constant set involved in non set context
135 *	E 11 - Type clash: real is incompatible with integer
136 *	   ... Type of expression clashed with type of variable in assignment
137 *	E 12 - Parameter type not identical to type of var parameter x of foo
138 *	In program mung:
139 *	  w - variable y is never used
140 *	  w - type foo is never used
141 *	  w - function bletch is never used
142 *	  E - z undefined on lines 9 13
143 */
144static const char *Months[] = {
145	"Jan", "Feb", "Mar", "Apr", "May", "Jun",
146	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
147	NULL
148};
149static const char *Days[] = {
150	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
151};
152static const char *Piroutines[] = {
153	"program", "function", "procedure", NULL
154};
155
156
157static bool structured, multiple;
158
159#if 0 /* not const-correct */
160static char *pi_Endmatched[] = {"End", "matched"};
161static char *pi_Inserted[] = {"Inserted", "keyword", "end", "matching"};
162
163static char *pi_multiple[] = {"Multiply", "defined", "label", "in", "case,", "line"};
164static char *pi_structured[] = {"is", "into", "a", "structured", "statement"};
165
166static char *pi_und1[] = {"undefined", "on", "line"};
167static char *pi_und2[] = {"undefined", "on", "lines"};
168static char *pi_imp1[] = {"improperly", "used", "on", "line"};
169static char *pi_imp2[] = {"improperly", "used", "on", "lines"};
170
171#else
172DECL_STRINGS_2(static, pi_Endmatched, "End", "matched");
173DECL_STRINGS_4(static, pi_Inserted, "Inserted", "keyword", "end", "matching");
174
175DECL_STRINGS_6(static, pi_multiple,
176	       "Multiply", "defined", "label", "in", "case,", "line");
177DECL_STRINGS_5(static, pi_structured,
178	       "is", "into", "a", "structured", "statement");
179
180DECL_STRINGS_3(static, pi_und1, "undefined", "on", "line");
181DECL_STRINGS_3(static, pi_und2, "undefined", "on", "lines");
182DECL_STRINGS_4(static, pi_imp1, "improperly", "used", "on", "line");
183DECL_STRINGS_4(static, pi_imp2, "improperly", "used", "on", "lines");
184
185#endif
186
187static bool
188alldigits(const char *string)
189{
190	for (; *string != '\0' && isdigit((unsigned char)*string); string++)
191		continue;
192	return *string == '\0';
193}
194
195static bool
196instringset(const char *member, const char **set)
197{
198	for (; *set != NULL; set++)
199		if (strcmp(*set, member) == 0)
200			return true;
201	return false;
202}
203
204static bool
205isdateformat(int wordc, char **wordv)
206{
207	return  wordc == 5
208	     && instringset(wordv[0], Days)
209	     && instringset(wordv[1], Months)
210	     && alldigits(wordv[2])
211	     && alldigits(wordv[4]);
212}
213
214static bool
215piptr(const char *string)
216{
217	if (*string != '-')
218		return false;
219	while (*string == '-')
220		string++;
221	if (*string != '^')
222		return false;
223	string++;
224	while (*string == '-')
225		string++;
226	return *string == '\0';
227}
228
229Errorclass
230pi(void)
231{
232	char **nwordv;
233
234	nwordv = NULL;
235	if (cur_wordc < 2)
236		return C_UNKNOWN;
237	if (strlen(cur_wordv[1]) == 1
238	    && ( cur_wordv[1][0] == 'e' || cur_wordv[1][0] == 'E')
239	    && piptr(cur_wordv[2])
240	) {
241		bool longpiptr = false;
242
243		/*
244		 *	We have recognized a first pass error of the form:
245		 *	letter ------^---- message
246		 *
247		 *	turn into an error message of the form:
248		 *
249		 *	file line 'pascal errortype' letter \n |---- message
250		 *	or of the form:
251		 *	file line letter |---- message
252		 *		when there are strlen("(*[pi]") or more
253		 *		preceding '-' on the error pointer.
254		 *
255		 *	Where the | is intended to be a down arrow, so that
256		 *	the pi error messages can be inserted above the
257		 *	line in error, instead of below.  (All of the other
258		 *	languages put their messages before the source line,
259		 *	instead of after it as does pi.)
260		 *
261		 *	where the pointer to the error has been truncated
262		 *	by 6 characters to account for the fact that
263		 *	the pointer points into a tab preceded input line.
264		 */
265		language = INPI;
266		(void)substitute(cur_wordv[2], '^', '|');
267		longpiptr = position(cur_wordv[2],'|') > (6+8);
268		nwordv = wordvsplice(longpiptr ? 2 : 4, cur_wordc, cur_wordv+1);
269		nwordv[0] = strdup(currentfilename);
270		nwordv[1] = strdup(c_linenumber);
271		if (!longpiptr) {
272			nwordv[2] = Strdup("pascal errortype"); /* XXX leaked */
273			nwordv[3] = cur_wordv[1];
274			nwordv[4] = strdup("%%%\n");
275			if (strlen(nwordv[5]) > (8-2))	/* this is the pointer */
276				nwordv[5] += (8-2);	/* bump over 6 characters */
277		}
278		cur_wordv = nwordv - 1;		/* convert to 1 based */
279		cur_wordc += longpiptr ? 2 : 4;
280		return C_TRUE;
281	}
282	if (cur_wordc >= 4
283	    && strlen(cur_wordv[1]) == 1
284	    && (*cur_wordv[1] == 'E' || *cur_wordv[1] == 'w' || *cur_wordv[1] == 'e')
285	    && alldigits(cur_wordv[2])
286	    && strlen(cur_wordv[3]) == 1
287	    && cur_wordv[3][0] == '-'
288	) {
289		/*
290		 * Message of the form: letter linenumber - message
291		 * Turn into form: filename linenumber letter - message
292		 */
293		language = INPI;
294		nwordv = wordvsplice(1, cur_wordc, cur_wordv + 1);
295		nwordv[0] = strdup(currentfilename);
296		nwordv[1] = cur_wordv[2];
297		nwordv[2] = cur_wordv[1];
298		c_linenumber = cur_wordv[2];
299		cur_wordc += 1;
300		cur_wordv = nwordv - 1;
301		return C_TRUE;
302	}
303	if (cur_wordc >= 3
304	    && strlen(cur_wordv[1]) == 1
305	    && (*cur_wordv[1] == 'E' || *cur_wordv[1] == 'w' || *cur_wordv[1] == 'e')
306	    && strlen(cur_wordv[2]) == 1
307	    && cur_wordv[2][0] == '-'
308	) {
309		/*
310		 * Message of the form: letter - message
311		 *
312		 * This happens only when we are traversing the tree
313		 * during the second pass of pi, and discover semantic
314		 * errors.
315		 *
316		 * We have already (presumably) saved the header message
317		 * and can now construct a nulled error message for the
318		 * current file.
319		 *
320		 * Turns into a message of the form:
321		 *      filename (header) letter - message
322		 *
323		 * First, see if it is a message referring to more than
324		 * one line number.  Only of the form:
325 		 *      %s undefined on line%s
326 		 *      %s improperly used on line%s
327		 */
328		bool undefined = false;
329		int wordindex;
330
331		language = INPI;
332		if ((undefined = wordv_eq(cur_wordv+2, 3, pi_und1))
333		     || (undefined = wordv_eq(cur_wordv+2, 3, pi_und2))
334		     || wordv_eq(cur_wordv+2, 4, pi_imp1)
335		     || wordv_eq(cur_wordv+2, 4, pi_imp2)
336		) {
337			for (wordindex = undefined ? 5 : 6;
338			     wordindex <= cur_wordc;
339			     wordindex++) {
340				if (nwordv != NULL) {
341					free(nwordv[0]);
342					free(nwordv);
343				}
344				nwordv = wordvsplice(2, undefined ? 2 : 3, cur_wordv+1);
345				nwordv[0] = strdup(currentfilename);
346				nwordv[1] = cur_wordv[wordindex];
347				if (wordindex != cur_wordc)
348					erroradd(undefined ? 4 : 5, nwordv,
349						C_TRUE, C_UNKNOWN);
350			}
351			cur_wordc = undefined ? 4 : 5;
352			cur_wordv = nwordv - 1;
353			return C_TRUE;
354		}
355
356		nwordv = wordvsplice(1+3, cur_wordc, cur_wordv+1);
357		nwordv[0] = strdup(currentfilename);
358		nwordv[1] = strdup(c_header[0]);
359		nwordv[2] = strdup(c_header[1]);
360		nwordv[3] = strdup(c_header[2]);
361		cur_wordv = nwordv - 1;
362		cur_wordc += 1 + 3;
363		return C_THISFILE;
364	}
365	if (strcmp(cur_wordv[1], "...") == 0 && c_linenumber != NULL &&
366	    currentfilename != default_currentfilename) {
367		/*
368		 * have a continuation error message
369		 * of the form: ... message
370		 * Turn into form : filename linenumber message
371		 */
372		language = INPI;
373		nwordv = wordvsplice(1, cur_wordc, cur_wordv+1);
374		nwordv[0] = strdup(currentfilename);
375		nwordv[1] = strdup(c_linenumber);
376		cur_wordv = nwordv - 1;
377		cur_wordc += 1;
378		return C_TRUE;
379	}
380	if (cur_wordc == 6
381	   && lastchar(cur_wordv[6]) == ':'
382	   && isdateformat(5, cur_wordv + 1)
383	) {
384		/*
385		 * Have message that tells us we have changed files
386		 */
387		language = INPI;
388		currentfilename = strdup(cur_wordv[6]);
389		clob_last(currentfilename, '\0');
390		return C_SYNC;
391	}
392	if (cur_wordc == 3
393	   && strcmp(cur_wordv[1], "In") == 0
394	   && lastchar(cur_wordv[3]) == ':'
395	   && instringset(cur_wordv[2], Piroutines)
396	) {
397		language = INPI;
398		c_header = wordvsplice(0, cur_wordc, cur_wordv+1);
399		return C_SYNC;
400	}
401
402	/*
403	 * now, check for just the line number followed by the text
404	 */
405	if (alldigits(cur_wordv[1])) {
406		language = INPI;
407		c_linenumber = cur_wordv[1];
408		return C_IGNORE;
409	}
410
411	/*
412	 * Attempt to match messages refering to a line number
413	 *
414	 * Multiply defined label in case, lines %d and %d
415	 * Goto %s from line %d is into a structured statement
416	 * End matched %s on line %d
417	 * Inserted keyword end matching %s on line %d
418	 */
419	structured = false;
420	multiple = false;
421	if (
422	       (cur_wordc == 6 && wordv_eq(cur_wordv+1, 2, pi_Endmatched))
423	    || (cur_wordc == 8 && wordv_eq(cur_wordv+1, 4, pi_Inserted))
424	    || (multiple = (cur_wordc == 9 && wordv_eq(cur_wordv+1,6, pi_multiple)))
425	    || (structured = (cur_wordc == 10 && wordv_eq(cur_wordv+6,5, pi_structured)))
426	) {
427		language = INPI;
428		nwordv = wordvsplice(2, cur_wordc, cur_wordv+1);
429		nwordv[0] = strdup(currentfilename);
430		nwordv[1] = structured ? cur_wordv [5] : cur_wordv[cur_wordc];
431		cur_wordc += 2;
432		cur_wordv = nwordv - 1;
433		if (!multiple)
434			return C_TRUE;
435		erroradd(cur_wordc, nwordv, C_TRUE, C_UNKNOWN);
436		nwordv = wordvsplice(0, cur_wordc, nwordv);
437		nwordv[1] = cur_wordv[cur_wordc - 2];
438		return C_TRUE;
439	}
440	return C_UNKNOWN;
441}
442