1/*-
2 * Copyright (c) 2010 The NetBSD Foundation, Inc.
3 * All rights reserved.
4 *
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by David A. Holland.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#include <stdlib.h>
31#include <string.h>
32#include <limits.h>
33#include <errno.h>
34
35//#define DEBUG
36#ifdef DEBUG
37#include <stdio.h>
38#endif
39
40#include "utils.h"
41#include "array.h"
42#include "mode.h"
43#include "place.h"
44#include "eval.h"
45
46/*
47 * e ::=
48 *    e1 ? e2 : e3
49 *    e1 || e2
50 *    e1 && e2
51 *    e1 | e2
52 *    e1 ^ e2
53 *    e1 & e2
54 *    e1 == e2  |  e1 != e2
55 *    e1 < e2   |  e1 <= e2  |  e1 > e2  |  e1 >= e2
56 *    e1 << e2  |  e1 >> e2
57 *    e1 + e2   |  e1 - e2
58 *    e1 * e2   |  e1 / e2   |  e1 % e2
59 *    !e  |  ~e  |  -e  |  +e
60 *    ( e )  |  ident
61 */
62
63enum tokens {
64	T_EOF,		/* end of input */
65	T_VAL,		/* value */
66	T_LPAREN,	/* parens */
67	T_RPAREN,
68	T_PIPEPIPE,	/* operators */
69	T_AMPAMP,
70	T_EQEQ,
71	T_BANGEQ,
72	T_LTEQ,
73	T_GTEQ,
74	T_LTLT,
75	T_GTGT,
76	T_QUES,
77	T_COLON,
78	T_PIPE,
79	T_CARET,
80	T_AMP,
81	T_LT,
82	T_GT,
83	T_PLUS,
84	T_MINUS,
85	T_STAR,
86	T_SLASH,
87	T_PCT,
88	T_BANG,
89	T_TILDE,
90};
91
92static const struct {
93	char c1, c2;
94	enum tokens tok;
95} tokens_2[] = {
96	{ '|', '|', T_PIPEPIPE },
97	{ '&', '&', T_AMPAMP },
98	{ '=', '=', T_EQEQ },
99	{ '!', '=', T_BANGEQ },
100	{ '<', '=', T_LTEQ },
101	{ '>', '=', T_GTEQ },
102	{ '<', '<', T_LTLT },
103	{ '>', '>', T_GTGT },
104};
105static const unsigned num_tokens_2 = HOWMANY(tokens_2);
106
107static const struct {
108	char c1;
109	enum tokens tok;
110} tokens_1[] = {
111	{ '?', T_QUES },
112	{ ':', T_COLON },
113	{ '|', T_PIPE },
114	{ '^', T_CARET },
115	{ '&', T_AMP },
116	{ '<', T_LT },
117	{ '>', T_GT },
118	{ '+', T_PLUS },
119	{ '-', T_MINUS },
120	{ '*', T_STAR },
121	{ '/', T_SLASH },
122	{ '%', T_PCT },
123	{ '!', T_BANG },
124	{ '~', T_TILDE },
125	{ '(', T_LPAREN },
126	{ ')', T_RPAREN },
127};
128static const unsigned num_tokens_1 = HOWMANY(tokens_1);
129
130struct token {
131	struct place place;
132	enum tokens tok;
133	int val;
134};
135DECLARRAY(token, static UNUSED);
136DEFARRAY(token, static);
137
138static struct tokenarray tokens;
139
140static
141struct token *
142token_create(const struct place *p, enum tokens tok, int val)
143{
144	struct token *t;
145
146	t = domalloc(sizeof(*t));
147	t->place = *p;
148	t->tok = tok;
149	t->val = val;
150	return t;
151}
152
153static
154void
155token_destroy(struct token *t)
156{
157	dofree(t, sizeof(*t));
158}
159
160DESTROYALL_ARRAY(token, );
161
162#ifdef DEBUG
163static
164void
165printtokens(void)
166{
167	unsigned i, num;
168	struct token *t;
169
170	fprintf(stderr, "tokens:");
171	num = tokenarray_num(&tokens);
172	for (i=0; i<num; i++) {
173		t = tokenarray_get(&tokens, i);
174		switch (t->tok) {
175		    case T_EOF: fprintf(stderr, " <eof>"); break;
176		    case T_VAL: fprintf(stderr, " %d", t->val); break;
177		    case T_LPAREN: fprintf(stderr, " ("); break;
178		    case T_RPAREN: fprintf(stderr, " )"); break;
179		    case T_PIPEPIPE: fprintf(stderr, " ||"); break;
180		    case T_AMPAMP: fprintf(stderr, " &&"); break;
181		    case T_EQEQ: fprintf(stderr, " =="); break;
182		    case T_BANGEQ: fprintf(stderr, " !="); break;
183		    case T_LTEQ: fprintf(stderr, " <="); break;
184		    case T_GTEQ: fprintf(stderr, " >="); break;
185		    case T_LTLT: fprintf(stderr, " <<"); break;
186		    case T_GTGT: fprintf(stderr, " >>"); break;
187		    case T_QUES: fprintf(stderr, " ?"); break;
188		    case T_COLON: fprintf(stderr, " :"); break;
189		    case T_PIPE: fprintf(stderr, " |"); break;
190		    case T_CARET: fprintf(stderr, " ^"); break;
191		    case T_AMP: fprintf(stderr, " &"); break;
192		    case T_LT: fprintf(stderr, " <"); break;
193		    case T_GT: fprintf(stderr, " >"); break;
194		    case T_PLUS: fprintf(stderr, " +"); break;
195		    case T_MINUS: fprintf(stderr, " -"); break;
196		    case T_STAR: fprintf(stderr, " *"); break;
197		    case T_SLASH: fprintf(stderr, " /"); break;
198		    case T_PCT: fprintf(stderr, " %%"); break;
199		    case T_BANG: fprintf(stderr, " !"); break;
200		    case T_TILDE: fprintf(stderr, " ~"); break;
201		}
202	}
203	fprintf(stderr, "\n");
204}
205#endif
206
207static
208bool
209isuop(enum tokens tok)
210{
211	switch (tok) {
212	    case T_BANG:
213	    case T_TILDE:
214	    case T_MINUS:
215	    case T_PLUS:
216		return true;
217	    default:
218		break;
219	}
220	return false;
221}
222
223static
224bool
225isbop(enum tokens tok)
226{
227	switch (tok) {
228	    case T_EOF:
229	    case T_VAL:
230	    case T_LPAREN:
231	    case T_RPAREN:
232	    case T_COLON:
233	    case T_QUES:
234	    case T_BANG:
235	    case T_TILDE:
236		return false;
237	    default:
238		break;
239	}
240	return true;
241}
242
243static
244bool
245isop(enum tokens tok)
246{
247	switch (tok) {
248	    case T_EOF:
249	    case T_VAL:
250	    case T_LPAREN:
251	    case T_RPAREN:
252		return false;
253	    default:
254		break;
255	}
256	return true;
257}
258
259static
260int
261getprec(enum tokens tok)
262{
263	switch (tok) {
264	    case T_BANG: case T_TILDE: return -1;
265	    case T_STAR: case T_SLASH: case T_PCT: return 0;
266	    case T_PLUS: case T_MINUS: return 1;
267	    case T_LTLT: case T_GTGT: return 2;
268	    case T_LT: case T_LTEQ: case T_GT: case T_GTEQ: return 3;
269	    case T_EQEQ: case T_BANGEQ: return 4;
270	    case T_AMP: return 5;
271	    case T_CARET: return 6;
272	    case T_PIPE: return 7;
273	    case T_AMPAMP: return 8;
274	    case T_PIPEPIPE: return 9;
275	    default: break;
276	}
277	return 10;
278}
279
280static
281bool
282looser(enum tokens t1, enum tokens t2)
283{
284	return getprec(t1) >= getprec(t2);
285}
286
287static
288int
289eval_uop(enum tokens op, int val)
290{
291	switch (op) {
292	    case T_BANG: val = !val; break;
293	    case T_TILDE: val = (int)~(unsigned)val; break;
294	    case T_MINUS: val = -val; break;
295	    case T_PLUS: break;
296	    default: assert(0); break;
297	}
298	return val;
299}
300
301static
302int
303eval_bop(struct place *p, int lv, enum tokens op, int rv)
304{
305	unsigned mask;
306
307	switch (op) {
308	    case T_PIPEPIPE: return lv || rv;
309	    case T_AMPAMP:   return lv && rv;
310	    case T_PIPE:     return (int)((unsigned)lv | (unsigned)rv);
311	    case T_CARET:    return (int)((unsigned)lv ^ (unsigned)rv);
312	    case T_AMP:      return (int)((unsigned)lv & (unsigned)rv);
313	    case T_EQEQ:     return lv == rv;
314	    case T_BANGEQ:   return lv != rv;
315	    case T_LT:       return lv < rv;
316	    case T_GT:       return lv > rv;
317	    case T_LTEQ:     return lv <= rv;
318	    case T_GTEQ:     return lv >= rv;
319
320	    case T_LTLT:
321	    case T_GTGT:
322		if (rv < 0) {
323			complain(p, "Negative bit-shift");
324			complain_fail();
325			rv = 0;
326		}
327		if ((unsigned)rv >= CHAR_BIT * sizeof(unsigned)) {
328			complain(p, "Bit-shift farther than type width");
329			complain_fail();
330			rv = 0;
331		}
332		if (op == T_LTLT) {
333			return (int)((unsigned)lv << (unsigned)rv);
334		}
335		mask = ((unsigned)-1) << (CHAR_BIT * sizeof(unsigned) - rv);
336		lv = (int)(((unsigned)lv >> (unsigned)rv) | mask);
337		return lv;
338
339	    case T_MINUS:
340		if (rv == INT_MIN) {
341			if (lv == INT_MIN) {
342				return 0;
343			}
344			lv--;
345			rv++;
346		}
347		rv = -rv;
348		/* FALLTHROUGH */
349	    case T_PLUS:
350		if (rv > 0 && lv > (INT_MAX - rv)) {
351			complain(p, "Integer overflow");
352			complain_fail();
353			return INT_MAX;
354		}
355		if (rv < 0 && lv < (INT_MIN - rv)) {
356			complain(p, "Integer underflow");
357			complain_fail();
358			return INT_MIN;
359		}
360		return lv + rv;
361
362	    case T_STAR:
363		if (rv == 0) {
364			return 0;
365		}
366		if (rv == 1) {
367			return lv;
368		}
369		if (rv == -1 && lv == INT_MIN) {
370			lv++;
371			lv = -lv;
372			if (lv == INT_MAX) {
373				complain(p, "Integer overflow");
374				complain_fail();
375				return INT_MAX;
376			}
377			lv++;
378			return lv;
379		}
380		if (lv == INT_MIN && rv < 0) {
381			complain(p, "Integer overflow");
382			complain_fail();
383			return INT_MAX;
384		}
385		if (lv == INT_MIN && rv > 0) {
386			complain(p, "Integer underflow");
387			complain_fail();
388			return INT_MIN;
389		}
390		if (rv < 0) {
391			rv = -rv;
392			lv = -lv;
393		}
394		if (lv > 0 && lv > INT_MAX / rv) {
395			complain(p, "Integer overflow");
396			complain_fail();
397			return INT_MAX;
398		}
399		if (lv < 0 && lv < INT_MIN / rv) {
400			complain(p, "Integer underflow");
401			complain_fail();
402			return INT_MIN;
403		}
404		return lv * rv;
405
406	    case T_SLASH:
407		if (rv == 0) {
408			complain(p, "Division by zero");
409			complain_fail();
410			return 0;
411		}
412		return lv / rv;
413
414	    case T_PCT:
415		if (rv == 0) {
416			complain(p, "Modulus by zero");
417			complain_fail();
418			return 0;
419		}
420		return lv % rv;
421
422	    default: assert(0); break;
423	}
424	return 0;
425}
426
427static
428void
429tryreduce(void)
430{
431	unsigned num;
432	struct token *t1, *t2, *t3, *t4, *t5, *t6;
433
434	while (1) {
435#ifdef DEBUG
436		printtokens();
437#endif
438		num = tokenarray_num(&tokens);
439		t1 = (num >= 1) ? tokenarray_get(&tokens, num-1) : NULL;
440		t2 = (num >= 2) ? tokenarray_get(&tokens, num-2) : NULL;
441		t3 = (num >= 3) ? tokenarray_get(&tokens, num-3) : NULL;
442
443		if (num >= 3 &&
444		    t3->tok == T_LPAREN &&
445		    t2->tok == T_VAL &&
446		    t1->tok == T_RPAREN) {
447			/* (x) -> x */
448			t2->place = t3->place;
449			token_destroy(t1);
450			token_destroy(t3);
451			tokenarray_remove(&tokens, num-1);
452			tokenarray_remove(&tokens, num-3);
453			continue;
454		}
455
456		if (num >= 2 &&
457		    (num == 2 || isop(t3->tok) || t3->tok == T_LPAREN) &&
458		    isuop(t2->tok) &&
459		    t1->tok == T_VAL) {
460			/* unary operator */
461			t1->val = eval_uop(t2->tok, t1->val);
462			t1->place = t2->place;
463			token_destroy(t2);
464			tokenarray_remove(&tokens, num-2);
465			continue;
466		}
467		if (num >= 2 &&
468		    (num == 2 || isop(t3->tok) || t3->tok == T_LPAREN) &&
469		    t2->tok != T_LPAREN && t2->tok != T_VAL &&
470		    t1->tok == T_VAL) {
471			complain(&t2->place, "Invalid unary operator");
472			complain_fail();
473			token_destroy(t2);
474			tokenarray_remove(&tokens, num-2);
475			continue;
476		}
477
478
479		t4 = (num >= 4) ? tokenarray_get(&tokens, num-4) : NULL;
480
481		if (num >= 4 &&
482		    t4->tok == T_VAL &&
483		    isbop(t3->tok) &&
484		    t2->tok == T_VAL) {
485			/* binary operator */
486			if (looser(t1->tok, t3->tok)) {
487				t4->val = eval_bop(&t3->place,
488						   t4->val, t3->tok, t2->val);
489				token_destroy(t2);
490				token_destroy(t3);
491				tokenarray_remove(&tokens, num-2);
492				tokenarray_remove(&tokens, num-3);
493				continue;
494			}
495			break;
496		}
497
498		t5 = (num >= 5) ? tokenarray_get(&tokens, num-5) : NULL;
499		t6 = (num >= 6) ? tokenarray_get(&tokens, num-6) : NULL;
500
501		if (num >= 6 &&
502		    t6->tok == T_VAL &&
503		    t5->tok == T_QUES &&
504		    t4->tok == T_VAL &&
505		    t3->tok == T_COLON &&
506		    t2->tok == T_VAL &&
507		    !isop(t1->tok)) {
508			/* conditional expression */
509			t6->val = t6->val ? t4->val : t2->val;
510			token_destroy(t2);
511			token_destroy(t3);
512			token_destroy(t4);
513			token_destroy(t5);
514			tokenarray_remove(&tokens, num-2);
515			tokenarray_remove(&tokens, num-3);
516			tokenarray_remove(&tokens, num-4);
517			tokenarray_remove(&tokens, num-5);
518			continue;
519		}
520
521		if (num >= 2 &&
522		    t2->tok == T_LPAREN &&
523		    t1->tok == T_RPAREN) {
524			complain(&t1->place, "Value expected within ()");
525			complain_fail();
526			t1->tok = T_VAL;
527			t1->val = 0;
528			token_destroy(t1);
529			tokenarray_remove(&tokens, num-1);
530			continue;
531		}
532
533		if (num >= 2 &&
534		    t2->tok == T_VAL &&
535		    t1->tok == T_VAL) {
536			complain(&t1->place, "Operator expected");
537			complain_fail();
538			token_destroy(t1);
539			tokenarray_remove(&tokens, num-1);
540			continue;
541		}
542
543		if (num >= 2 &&
544		    isop(t2->tok) &&
545		    t1->tok == T_EOF) {
546			complain(&t1->place, "Value expected after operator");
547			complain_fail();
548			token_destroy(t2);
549			tokenarray_remove(&tokens, num-2);
550			continue;
551		}
552
553		if (num == 2 &&
554		    t2->tok == T_VAL &&
555		    t1->tok == T_RPAREN) {
556			complain(&t1->place, "Excess right parenthesis");
557			complain_fail();
558			token_destroy(t1);
559			tokenarray_remove(&tokens, num-1);
560			continue;
561		}
562
563		if (num == 3 &&
564		    t3->tok == T_LPAREN &&
565		    t2->tok == T_VAL &&
566		    t1->tok == T_EOF) {
567			complain(&t1->place, "Unclosed left parenthesis");
568			complain_fail();
569			token_destroy(t3);
570			tokenarray_remove(&tokens, num-3);
571			continue;
572		}
573
574		if (num == 2 &&
575		    t2->tok == T_VAL &&
576		    t1->tok == T_EOF) {
577			/* accepting state */
578			break;
579		}
580
581		if (num >= 1 &&
582		    t1->tok == T_EOF) {
583			/* any other configuration at eof is an error */
584			complain(&t1->place, "Parse error");
585			complain_fail();
586			break;
587		}
588
589		/* otherwise, wait for more input */
590		break;
591	}
592}
593
594static
595void
596token(struct place *p, enum tokens tok, int val)
597{
598	struct token *t;
599
600	t = token_create(p, tok, val);
601
602	tokenarray_add(&tokens, t, NULL);
603	tryreduce();
604}
605
606static
607int
608wordval(struct place *p, char *word)
609{
610	unsigned long val;
611	char *t;
612
613	if (word[0] >= '0' && word[0] <= '9') {
614		errno = 0;
615		val = strtoul(word, &t, 0);
616		if (errno) {
617			complain(p, "Invalid integer constant");
618			complain_fail();
619			return 0;
620		}
621		while (*t == 'U' || *t == 'L') {
622			t++;
623		}
624		if (*t != '\0') {
625			complain(p, "Trailing garbage after integer constant");
626			complain_fail();
627			return 0;
628		}
629		if (val > INT_MAX) {
630			complain(p, "Integer constant too large");
631			complain_fail();
632			return INT_MAX;
633		}
634		return val;
635	}
636
637	/* if it's a symbol, warn and substitute 0. */
638	if (warns.undef) {
639		complain(p, "Warning: value of undefined symbol %s is 0",
640			 word);
641		if (mode.werror) {
642			complain_fail();
643		}
644	}
645	debuglog(p, "Undefined symbol %s; substituting 0", word);
646	return 0;
647}
648
649static
650bool
651check_word(struct place *p, char *expr, size_t pos, size_t *len_ret)
652{
653	size_t len;
654	int val;
655	char tmp;
656
657	if (!strchr(alnum, expr[pos])) {
658		return false;
659	}
660	len = strspn(expr + pos, alnum);
661	tmp = expr[pos + len];
662	expr[pos + len] = '\0';
663	val = wordval(p, expr + pos);
664	expr[pos + len] = tmp;
665	token(p, T_VAL, val);
666	*len_ret = len;
667	return true;
668}
669
670static
671bool
672check_tokens_2(struct place *p, char *expr, size_t pos)
673{
674	unsigned i;
675
676	for (i=0; i<num_tokens_2; i++) {
677		if (expr[pos] == tokens_2[i].c1 &&
678		    expr[pos+1] == tokens_2[i].c2) {
679			token(p, tokens_2[i].tok, 0);
680			return true;
681		}
682	}
683	return false;
684}
685
686static
687bool
688check_tokens_1(struct place *p, char *expr, size_t pos)
689{
690	unsigned i;
691
692	for (i=0; i<num_tokens_1; i++) {
693		if (expr[pos] == tokens_1[i].c1) {
694			token(p, tokens_1[i].tok, 0);
695			return true;
696		}
697	}
698	return false;
699}
700
701static
702void
703tokenize(struct place *p, char *expr)
704{
705	size_t pos, len;
706
707	pos = 0;
708	while (expr[pos] != '\0') {
709		len = strspn(expr+pos, ws);
710		pos += len;
711		place_addcolumns(p, len);
712		/* trailing whitespace is supposed to have been pruned */
713		assert(expr[pos] != '\0');
714		if (check_word(p, expr, pos, &len)) {
715			pos += len;
716			place_addcolumns(p, len);
717			continue;
718		}
719		if (check_tokens_2(p, expr, pos)) {
720			pos += 2;
721			place_addcolumns(p, 2);
722			continue;
723		}
724		if (check_tokens_1(p, expr, pos)) {
725			pos++;
726			place_addcolumns(p, 1);
727			continue;
728		}
729		complain(p, "Invalid character %u in #if-expression",
730			 (unsigned char)expr[pos]);
731		complain_fail();
732		pos++;
733		place_addcolumns(p, 1);
734	}
735	token(p, T_EOF, 0);
736}
737
738bool
739eval(struct place *p, char *expr)
740{
741	struct token *t1, *t2;
742	unsigned num;
743	bool result;
744
745#ifdef DEBUG
746	fprintf(stderr, "eval: %s\n", expr);
747#endif
748	debuglog(p, "eval: %s", expr);
749
750	tokenarray_init(&tokens);
751	tokenize(p, expr);
752
753	result = false;
754	num = tokenarray_num(&tokens);
755	if (num == 2) {
756		t1 = tokenarray_get(&tokens, num-1);
757		t2 = tokenarray_get(&tokens, num-2);
758		if (t2->tok == T_VAL &&
759		    t1->tok == T_EOF) {
760			result = t2->val != 0;
761		}
762	}
763
764	tokenarray_destroyall(&tokens);
765	tokenarray_cleanup(&tokens);
766	return result;
767}
768