1/*
2 * Copyright 2006-2012 Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		Stephan A��mus <superstippi@gmx.de>
7 *		John Scipione <jscipione@gmail.com>
8 *		Ingo Weinhold <bonefish@cs.tu-berlin.de>
9 */
10
11#include <ExpressionParser.h>
12
13#include <ctype.h>
14#include <math.h>
15#include <stdio.h>
16#include <stdlib.h>
17#include <string.h>
18
19#include <m_apm.h>
20
21
22static const int32 kMaxDecimalPlaces = 32;
23
24enum {
25	TOKEN_IDENTIFIER			= 0,
26
27	TOKEN_CONSTANT,
28
29	TOKEN_PLUS,
30	TOKEN_MINUS,
31
32	TOKEN_STAR,
33	TOKEN_SLASH,
34	TOKEN_MODULO,
35
36	TOKEN_POWER,
37	TOKEN_FACTORIAL,
38
39	TOKEN_OPENING_BRACKET,
40	TOKEN_CLOSING_BRACKET,
41
42	TOKEN_AND,
43	TOKEN_OR,
44	TOKEN_NOT,
45
46	TOKEN_NONE,
47	TOKEN_END_OF_LINE
48};
49
50struct ExpressionParser::Token {
51	Token()
52		: string(""),
53		  type(TOKEN_NONE),
54		  value(0),
55		  position(0)
56	{
57	}
58
59	Token(const Token& other)
60		: string(other.string),
61		  type(other.type),
62		  value(other.value),
63		  position(other.position)
64	{
65	}
66
67	Token(const char* string, int32 length, int32 position, int32 type)
68		: string(string, length),
69		  type(type),
70		  value(0),
71		  position(position)
72	{
73	}
74
75	Token& operator=(const Token& other)
76	{
77		string = other.string;
78		type = other.type;
79		value = other.value;
80		position = other.position;
81		return *this;
82	}
83
84	BString		string;
85	int32		type;
86	MAPM		value;
87
88	int32		position;
89};
90
91
92class ExpressionParser::Tokenizer {
93 public:
94	Tokenizer()
95		: fString(""),
96		  fCurrentChar(NULL),
97		  fCurrentToken(),
98		  fReuseToken(false),
99		  fHexSupport(false)
100	{
101	}
102
103	void SetSupportHexInput(bool enabled)
104	{
105		fHexSupport = enabled;
106	}
107
108	void SetTo(const char* string)
109	{
110		fString = string;
111		fCurrentChar = fString.String();
112		fCurrentToken = Token();
113		fReuseToken = false;
114	}
115
116	const Token& NextToken()
117	{
118		if (fCurrentToken.type == TOKEN_END_OF_LINE)
119			return fCurrentToken;
120
121		if (fReuseToken) {
122			fReuseToken = false;
123//printf("next token (recycled): '%s'\n", fCurrentToken.string.String());
124			return fCurrentToken;
125		}
126
127		while (*fCurrentChar != 0 && isspace(*fCurrentChar))
128			fCurrentChar++;
129
130		if (*fCurrentChar == 0)
131			return fCurrentToken = Token("", 0, _CurrentPos(), TOKEN_END_OF_LINE);
132
133		bool decimal = *fCurrentChar == '.' || *fCurrentChar == ',';
134
135		if (decimal || isdigit(*fCurrentChar)) {
136			if (fHexSupport && *fCurrentChar == '0' && fCurrentChar[1] == 'x')
137				return _ParseHexNumber();
138
139			BString temp;
140
141			const char* begin = fCurrentChar;
142
143			// optional digits before the comma
144			while (isdigit(*fCurrentChar)) {
145				temp << *fCurrentChar;
146				fCurrentChar++;
147			}
148
149			// optional post comma part
150			// (required if there are no digits before the comma)
151			if (*fCurrentChar == '.' || *fCurrentChar == ',') {
152				temp << '.';
153				fCurrentChar++;
154
155				// optional post comma digits
156				while (isdigit(*fCurrentChar)) {
157					temp << *fCurrentChar;
158					fCurrentChar++;
159				}
160			}
161
162			// optional exponent part
163			if (*fCurrentChar == 'E') {
164				temp << *fCurrentChar;
165				fCurrentChar++;
166
167				// optional exponent sign
168				if (*fCurrentChar == '+' || *fCurrentChar == '-') {
169					temp << *fCurrentChar;
170					fCurrentChar++;
171				}
172
173				// required exponent digits
174				if (!isdigit(*fCurrentChar)) {
175					throw ParseException("missing exponent in constant",
176						fCurrentChar - begin);
177				}
178
179				while (isdigit(*fCurrentChar)) {
180					temp << *fCurrentChar;
181					fCurrentChar++;
182				}
183			}
184
185			int32 length = fCurrentChar - begin;
186			BString test = temp;
187			test << "&_";
188			double value;
189			char t[2];
190			int32 matches = sscanf(test.String(), "%lf&%s", &value, t);
191			if (matches != 2) {
192				throw ParseException("error in constant",
193					_CurrentPos() - length);
194			}
195
196			fCurrentToken = Token(begin, length, _CurrentPos() - length,
197				TOKEN_CONSTANT);
198			fCurrentToken.value = temp.String();
199
200		} else if (isalpha(*fCurrentChar) && *fCurrentChar != 'x') {
201			const char* begin = fCurrentChar;
202			while (*fCurrentChar != 0 && (isalpha(*fCurrentChar)
203				|| isdigit(*fCurrentChar))) {
204				fCurrentChar++;
205			}
206			int32 length = fCurrentChar - begin;
207			fCurrentToken = Token(begin, length, _CurrentPos() - length,
208				TOKEN_IDENTIFIER);
209
210		} else if ((unsigned char)fCurrentChar[0] == 0xCF
211			&& (unsigned char)fCurrentChar[1] == 0x80) {
212			// UTF-8 small greek letter PI
213			fCurrentToken = Token(fCurrentChar, 2, _CurrentPos() - 1,
214				TOKEN_IDENTIFIER);
215			fCurrentChar += 2;
216
217		} else {
218			int32 type = TOKEN_NONE;
219
220			switch (*fCurrentChar) {
221				case '+':
222					type = TOKEN_PLUS;
223					break;
224				case '-':
225					type = TOKEN_MINUS;
226					break;
227				case '*':
228					type = TOKEN_STAR;
229					break;
230				case '/':
231				case '\\':
232				case ':':
233					type = TOKEN_SLASH;
234					break;
235
236				case '%':
237					type = TOKEN_MODULO;
238					break;
239				case '^':
240					type = TOKEN_POWER;
241					break;
242				case '!':
243					type = TOKEN_FACTORIAL;
244					break;
245
246				case '(':
247					type = TOKEN_OPENING_BRACKET;
248					break;
249				case ')':
250					type = TOKEN_CLOSING_BRACKET;
251					break;
252
253				case '&':
254					type = TOKEN_AND;
255					break;
256				case '|':
257					type = TOKEN_OR;
258					break;
259				case '~':
260					type = TOKEN_NOT;
261					break;
262
263				case '\n':
264					type = TOKEN_END_OF_LINE;
265					break;
266
267				case 'x':
268					if (!fHexSupport) {
269						type = TOKEN_STAR;
270						break;
271					}
272					// fall through
273
274				default:
275					throw ParseException("unexpected character", _CurrentPos());
276			}
277			fCurrentToken = Token(fCurrentChar, 1, _CurrentPos(), type);
278			fCurrentChar++;
279		}
280
281//printf("next token: '%s'\n", fCurrentToken.string.String());
282		return fCurrentToken;
283	}
284
285	void RewindToken()
286	{
287		fReuseToken = true;
288	}
289
290 private:
291	static bool _IsHexDigit(char c)
292	{
293		return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
294	}
295
296	Token& _ParseHexNumber()
297	{
298		const char* begin = fCurrentChar;
299		fCurrentChar += 2;
300			// skip "0x"
301
302		if (!_IsHexDigit(*fCurrentChar))
303			throw ParseException("expected hex digit", _CurrentPos());
304
305		fCurrentChar++;
306		while (_IsHexDigit(*fCurrentChar))
307			fCurrentChar++;
308
309		int32 length = fCurrentChar - begin;
310		fCurrentToken = Token(begin, length, _CurrentPos() - length,
311			TOKEN_CONSTANT);
312
313		// MAPM has no conversion from long long, so we need to improvise.
314		uint64 value = strtoll(fCurrentToken.string.String(), NULL, 0);
315		if (value <= 0x7fffffff) {
316			fCurrentToken.value = (long)value;
317		} else {
318			fCurrentToken.value = (int)(value >> 60);
319			fCurrentToken.value *= 1 << 30;
320			fCurrentToken.value += (int)((value >> 30) & 0x3fffffff);
321			fCurrentToken.value *= 1 << 30;
322			fCurrentToken.value += (int)(value& 0x3fffffff);
323		}
324
325		return fCurrentToken;
326	}
327
328	int32 _CurrentPos() const
329	{
330		return fCurrentChar - fString.String();
331	}
332
333	BString		fString;
334	const char*	fCurrentChar;
335	Token		fCurrentToken;
336	bool		fReuseToken;
337	bool		fHexSupport;
338};
339
340
341ExpressionParser::ExpressionParser()
342	:	fTokenizer(new Tokenizer()),
343		fDegreeMode(false)
344{
345}
346
347
348ExpressionParser::~ExpressionParser()
349{
350	delete fTokenizer;
351}
352
353
354bool
355ExpressionParser::DegreeMode()
356{
357	return fDegreeMode;
358}
359
360
361void
362ExpressionParser::SetDegreeMode(bool degrees)
363{
364	fDegreeMode = degrees;
365}
366
367
368void
369ExpressionParser::SetSupportHexInput(bool enabled)
370{
371	fTokenizer->SetSupportHexInput(enabled);
372}
373
374
375BString
376ExpressionParser::Evaluate(const char* expressionString)
377{
378	fTokenizer->SetTo(expressionString);
379
380	MAPM value = _ParseBinary();
381	Token token = fTokenizer->NextToken();
382	if (token.type != TOKEN_END_OF_LINE)
383		throw ParseException("parse error", token.position);
384
385	if (value == 0)
386		return BString("0");
387
388	char* buffer = value.toFixPtStringExp(kMaxDecimalPlaces, '.', 0, 0);
389	if (buffer == NULL)
390		throw ParseException("out of memory", 0);
391
392	// remove surplus zeros
393	int32 lastChar = strlen(buffer) - 1;
394	if (strchr(buffer, '.')) {
395		while (buffer[lastChar] == '0')
396			lastChar--;
397		if (buffer[lastChar] == '.')
398			lastChar--;
399	}
400
401	BString result(buffer, lastChar + 1);
402	free(buffer);
403	return result;
404}
405
406
407int64
408ExpressionParser::EvaluateToInt64(const char* expressionString)
409{
410	fTokenizer->SetTo(expressionString);
411
412	MAPM value = _ParseBinary();
413	Token token = fTokenizer->NextToken();
414	if (token.type != TOKEN_END_OF_LINE)
415		throw ParseException("parse error", token.position);
416
417	char buffer[128];
418	value.toIntegerString(buffer);
419
420	return strtoll(buffer, NULL, 0);
421}
422
423
424double
425ExpressionParser::EvaluateToDouble(const char* expressionString)
426{
427	fTokenizer->SetTo(expressionString);
428
429	MAPM value = _ParseBinary();
430	Token token = fTokenizer->NextToken();
431	if (token.type != TOKEN_END_OF_LINE)
432		throw ParseException("parse error", token.position);
433
434	char buffer[1024];
435	value.toString(buffer, sizeof(buffer) - 4);
436
437	return strtod(buffer, NULL);
438}
439
440
441MAPM
442ExpressionParser::_ParseBinary()
443{
444	return _ParseSum();
445	// binary operation appearantly not supported by m_apm library,
446	// should not be too hard to implement though....
447
448//	double value = _ParseSum();
449//
450//	while (true) {
451//		Token token = fTokenizer->NextToken();
452//		switch (token.type) {
453//			case TOKEN_AND:
454//				value = (uint64)value & (uint64)_ParseSum();
455//				break;
456//			case TOKEN_OR:
457//				value = (uint64)value | (uint64)_ParseSum();
458//				break;
459//
460//			default:
461//				fTokenizer->RewindToken();
462//				return value;
463//		}
464//	}
465}
466
467
468MAPM
469ExpressionParser::_ParseSum()
470{
471	// TODO: check isnan()...
472	MAPM value = _ParseProduct();
473
474	while (true) {
475		Token token = fTokenizer->NextToken();
476		switch (token.type) {
477			case TOKEN_PLUS:
478				value = value + _ParseProduct();
479				break;
480			case TOKEN_MINUS:
481				value = value - _ParseProduct();
482				break;
483
484			default:
485				fTokenizer->RewindToken();
486				return _ParseFactorial(value);
487		}
488	}
489}
490
491
492MAPM
493ExpressionParser::_ParseProduct()
494{
495	// TODO: check isnan()...
496	MAPM value = _ParsePower();
497
498	while (true) {
499		Token token = fTokenizer->NextToken();
500		switch (token.type) {
501			case TOKEN_STAR:
502				value = value * _ParsePower();
503				break;
504			case TOKEN_SLASH: {
505				MAPM rhs = _ParsePower();
506				if (rhs == MAPM(0))
507					throw ParseException("division by zero", token.position);
508				value = value / rhs;
509				break;
510			}
511			case TOKEN_MODULO: {
512				MAPM rhs = _ParsePower();
513				if (rhs == MAPM(0))
514					throw ParseException("modulo by zero", token.position);
515				value = value % rhs;
516				break;
517			}
518
519			default:
520				fTokenizer->RewindToken();
521				return _ParseFactorial(value);
522		}
523	}
524}
525
526
527MAPM
528ExpressionParser::_ParsePower()
529{
530	MAPM value = _ParseUnary();
531
532	while (true) {
533		Token token = fTokenizer->NextToken();
534		if (token.type != TOKEN_POWER) {
535			fTokenizer->RewindToken();
536			return _ParseFactorial(value);
537		}
538		value = value.pow(_ParseUnary());
539	}
540}
541
542
543MAPM
544ExpressionParser::_ParseUnary()
545{
546	Token token = fTokenizer->NextToken();
547	if (token.type == TOKEN_END_OF_LINE)
548		throw ParseException("unexpected end of expression", token.position);
549
550	switch (token.type) {
551		case TOKEN_PLUS:
552			return _ParseUnary();
553		case TOKEN_MINUS:
554			return -_ParseUnary();
555// TODO: Implement !
556//		case TOKEN_NOT:
557//			return ~(uint64)_ParseUnary();
558
559		case TOKEN_IDENTIFIER:
560			return _ParseFunction(token);
561
562		default:
563			fTokenizer->RewindToken();
564			return _ParseAtom();
565	}
566
567	return MAPM(0);
568}
569
570
571struct Function {
572	const char*	name;
573	int			argumentCount;
574	void*		function;
575	MAPM		value;
576};
577
578
579void
580ExpressionParser::_InitArguments(MAPM values[], int32 argumentCount)
581{
582	_EatToken(TOKEN_OPENING_BRACKET);
583
584	for (int32 i = 0; i < argumentCount; i++)
585		values[i] = _ParseBinary();
586
587	_EatToken(TOKEN_CLOSING_BRACKET);
588}
589
590
591MAPM
592ExpressionParser::_ParseFunction(const Token& token)
593{
594	if (strcmp("e", token.string.String()) == 0)
595		return _ParseFactorial(MAPM(MM_E));
596	else if (strcasecmp("pi", token.string.String()) == 0
597		|| ((unsigned char)token.string.String()[0] == 0xCF
598			&& (unsigned char)token.string.String()[1] == 0x80)) {
599		// UTF-8 small greek letter PI
600		return _ParseFactorial(MAPM(MM_PI));
601	}
602
603	// hard coded cases for different count of arguments
604	// supports functions with 3 arguments at most
605
606	MAPM values[3];
607
608	if (strcasecmp("abs", token.string.String()) == 0) {
609		_InitArguments(values, 1);
610		return _ParseFactorial(values[0].abs());
611	} else if (strcasecmp("acos", token.string.String()) == 0) {
612		_InitArguments(values, 1);
613		if (fDegreeMode)
614			values[0] = values[0] * MM_PI / 180;
615
616		if (values[0] < -1 || values[0] > 1)
617			throw ParseException("out of domain", token.position);
618
619		return _ParseFactorial(values[0].acos());
620	} else if (strcasecmp("asin", token.string.String()) == 0) {
621		_InitArguments(values, 1);
622		if (fDegreeMode)
623			values[0] = values[0] * MM_PI / 180;
624
625		if (values[0] < -1 || values[0] > 1)
626			throw ParseException("out of domain", token.position);
627
628		return _ParseFactorial(values[0].asin());
629	} else if (strcasecmp("atan", token.string.String()) == 0) {
630		_InitArguments(values, 1);
631		if (fDegreeMode)
632			values[0] = values[0] * MM_PI / 180;
633
634		return _ParseFactorial(values[0].atan());
635	} else if (strcasecmp("atan2", token.string.String()) == 0) {
636		_InitArguments(values, 2);
637
638		if (fDegreeMode) {
639			values[0] = values[0] * MM_PI / 180;
640			values[1] = values[1] * MM_PI / 180;
641		}
642
643		return _ParseFactorial(values[0].atan2(values[1]));
644	} else if (strcasecmp("cbrt", token.string.String()) == 0) {
645		_InitArguments(values, 1);
646		return _ParseFactorial(values[0].cbrt());
647	} else if (strcasecmp("ceil", token.string.String()) == 0) {
648		_InitArguments(values, 1);
649		return _ParseFactorial(values[0].ceil());
650	} else if (strcasecmp("cos", token.string.String()) == 0) {
651		_InitArguments(values, 1);
652		if (fDegreeMode)
653			values[0] = values[0] * MM_PI / 180;
654
655		return _ParseFactorial(values[0].cos());
656	} else if (strcasecmp("cosh", token.string.String()) == 0) {
657		_InitArguments(values, 1);
658		// This function always uses radians
659		return _ParseFactorial(values[0].cosh());
660	} else if (strcasecmp("exp", token.string.String()) == 0) {
661		_InitArguments(values, 1);
662		return _ParseFactorial(values[0].exp());
663	} else if (strcasecmp("floor", token.string.String()) == 0) {
664		_InitArguments(values, 1);
665		return _ParseFactorial(values[0].floor());
666	} else if (strcasecmp("ln", token.string.String()) == 0) {
667		_InitArguments(values, 1);
668		if (values[0] <= 0)
669			throw ParseException("out of domain", token.position);
670
671		return _ParseFactorial(values[0].log());
672	} else if (strcasecmp("log", token.string.String()) == 0) {
673		_InitArguments(values, 1);
674		if (values[0] <= 0)
675			throw ParseException("out of domain", token.position);
676
677		return _ParseFactorial(values[0].log10());
678	} else if (strcasecmp("pow", token.string.String()) == 0) {
679		_InitArguments(values, 2);
680		return _ParseFactorial(values[0].pow(values[1]));
681	} else if (strcasecmp("sin", token.string.String()) == 0) {
682		_InitArguments(values, 1);
683		if (fDegreeMode)
684			values[0] = values[0] * MM_PI / 180;
685
686		return _ParseFactorial(values[0].sin());
687	} else if (strcasecmp("sinh", token.string.String()) == 0) {
688		_InitArguments(values, 1);
689		// This function always uses radians
690		return _ParseFactorial(values[0].sinh());
691	} else if (strcasecmp("sqrt", token.string.String()) == 0) {
692		_InitArguments(values, 1);
693		if (values[0] < 0)
694			throw ParseException("out of domain", token.position);
695
696		return _ParseFactorial(values[0].sqrt());
697	} else if (strcasecmp("tan", token.string.String()) == 0) {
698		_InitArguments(values, 1);
699		if (fDegreeMode)
700			values[0] = values[0] * MM_PI / 180;
701
702		MAPM divided_by_half_pi = values[0] / MM_HALF_PI;
703		if (divided_by_half_pi.is_integer() && divided_by_half_pi.is_odd())
704			throw ParseException("out of domain", token.position);
705
706		return _ParseFactorial(values[0].tan());
707	} else if (strcasecmp("tanh", token.string.String()) == 0) {
708		_InitArguments(values, 1);
709		// This function always uses radians
710		return _ParseFactorial(values[0].tanh());
711	}
712
713	throw ParseException("unknown identifier", token.position);
714}
715
716
717MAPM
718ExpressionParser::_ParseAtom()
719{
720	Token token = fTokenizer->NextToken();
721	if (token.type == TOKEN_END_OF_LINE)
722		throw ParseException("unexpected end of expression", token.position);
723
724	if (token.type == TOKEN_CONSTANT)
725		return _ParseFactorial(token.value);
726
727	fTokenizer->RewindToken();
728
729	_EatToken(TOKEN_OPENING_BRACKET);
730
731	MAPM value = _ParseBinary();
732
733	_EatToken(TOKEN_CLOSING_BRACKET);
734
735	return _ParseFactorial(value);
736}
737
738
739MAPM
740ExpressionParser::_ParseFactorial(MAPM value)
741{
742	if (fTokenizer->NextToken().type == TOKEN_FACTORIAL) {
743		fTokenizer->RewindToken();
744		_EatToken(TOKEN_FACTORIAL);
745		return value.factorial();
746	}
747
748	fTokenizer->RewindToken();
749	return value;
750}
751
752
753void
754ExpressionParser::_EatToken(int32 type)
755{
756	Token token = fTokenizer->NextToken();
757	if (token.type != type) {
758		BString temp("expected '");
759		temp << (char)type << "' got '" << token.string << "'";
760		throw ParseException(temp.String(), token.position);
761	}
762}
763
764