1/*
2 * Copyright 2006-2014 Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		Stephan Aßmus <superstippi@gmx.de>
7 *		Rene Gollent <rene@gollent.com>
8 *		John Scipione <jscipione@gmail.com>
9 *		Ingo Weinhold <bonefish@cs.tu-berlin.de>
10 */
11#ifndef C_LANGUAGE_TOKENIZER
12#define C_LANGUAGE_TOKENIZER
13
14
15#include <String.h>
16
17#include <Variant.h>
18
19
20namespace CLanguage {
21
22
23enum {
24	TOKEN_NONE					= 0,
25	TOKEN_IDENTIFIER,
26	TOKEN_CONSTANT,
27	TOKEN_END_OF_LINE,
28
29	TOKEN_PLUS,
30	TOKEN_MINUS,
31
32	TOKEN_STAR,
33	TOKEN_SLASH,
34	TOKEN_MODULO,
35
36	TOKEN_OPENING_PAREN,
37	TOKEN_CLOSING_PAREN,
38
39	TOKEN_OPENING_SQUARE_BRACKET,
40	TOKEN_CLOSING_SQUARE_BRACKET,
41
42	TOKEN_OPENING_CURLY_BRACE,
43	TOKEN_CLOSING_CURLY_BRACE,
44
45	TOKEN_ASSIGN,
46	TOKEN_LOGICAL_AND,
47	TOKEN_LOGICAL_OR,
48	TOKEN_LOGICAL_NOT,
49	TOKEN_BITWISE_AND,
50	TOKEN_BITWISE_OR,
51	TOKEN_BITWISE_NOT,
52	TOKEN_BITWISE_XOR,
53	TOKEN_EQ,
54	TOKEN_NE,
55	TOKEN_GT,
56	TOKEN_GE,
57	TOKEN_LT,
58	TOKEN_LE,
59
60	TOKEN_BACKSLASH,
61	TOKEN_CONDITION,
62	TOKEN_COLON,
63	TOKEN_SEMICOLON,
64	TOKEN_COMMA,
65	TOKEN_PERIOD,
66	TOKEN_POUND,
67
68	TOKEN_SINGLE_QUOTE,
69	TOKEN_DOUBLE_QUOTE,
70
71	TOKEN_STRING_LITERAL,
72	TOKEN_BEGIN_COMMENT_BLOCK,
73	TOKEN_END_COMMENT_BLOCK,
74	TOKEN_INLINE_COMMENT,
75
76	TOKEN_MEMBER_PTR
77};
78
79
80class ParseException {
81 public:
82	ParseException(const char* message, int32 position)
83		: message(message),
84		  position(position)
85	{
86	}
87
88	ParseException(const ParseException& other)
89		: message(other.message),
90		  position(other.position)
91	{
92	}
93
94	BString	message;
95	int32	position;
96};
97
98
99struct Token {
100								Token();
101								Token(const Token& other);
102								Token(const char* string, int32 length,
103								int32 position, int32 type);
104			Token& 	operator=(const Token& other);
105
106	BString						string;
107	int32						type;
108	BVariant					value;
109	int32						position;
110};
111
112
113class Tokenizer {
114public:
115								Tokenizer();
116
117			void 				SetTo(const char* string);
118
119			const Token& 		NextToken();
120			void 				RewindToken();
121private:
122			bool 				_ParseOperator();
123 			char 				_Peek() const;
124
125	static 	bool 				_IsHexDigit(char c);
126
127			Token& 				_ParseHexOperand();
128			int32 				_CurrentPos() const;
129
130private:
131	BString						fString;
132	const char*					fCurrentChar;
133	Token						fCurrentToken;
134	bool						fReuseToken;
135};
136
137
138}	// namespace CLanguage
139
140
141#endif	// C_LANGUAGE_TOKENIZER
142