1/*
2 * Copyright 2014, Rene Gollent, rene@gollent.com.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7#include "CLanguageFamilySyntaxHighlightInfo.h"
8
9#include <AutoDeleter.h>
10
11#include "CLanguageTokenizer.h"
12#include "LineDataSource.h"
13#include "TeamTypeInformation.h"
14#include "TypeLookupConstraints.h"
15
16
17using namespace CLanguage;
18
19
20static const char* kLanguageKeywords[] = {
21	"NULL",
22	"asm",
23	"auto",
24	"bool",
25	"break",
26	"case",
27	"catch",
28	"char",
29	"class",
30	"const",
31	"const_cast",
32	"constexpr",
33	"continue",
34	"default",
35	"delete",
36	"do",
37	"double",
38	"dynamic_cast",
39	"else",
40	"enum",
41	"explicit",
42	"extern",
43	"false",
44	"float",
45	"for",
46	"goto",
47	"if",
48	"inline",
49	"int",
50	"long",
51	"mutable",
52	"namespace",
53	"new",
54	"operator",
55	"private",
56	"protected",
57	"public",
58	"register",
59	"reinterpret_cast",
60	"return",
61	"short",
62	"signed",
63	"sizeof",
64	"static",
65	"static_cast",
66	"struct",
67	"switch",
68	"template",
69	"this",
70	"throw",
71	"true",
72	"try",
73	"typedef",
74	"typeid",
75	"typename",
76	"union",
77	"unsigned",
78	"using",
79	"virtual",
80	"void",
81	"volatile",
82	"while"
83};
84
85
86static bool IsLanguageKeyword(const Token& token)
87{
88	int lower = 0;
89	int upper = (sizeof(kLanguageKeywords)/sizeof(char*)) - 1;
90
91	while (lower < upper) {
92		int mid = (lower + upper + 1) / 2;
93
94		int cmp = token.string.Compare(kLanguageKeywords[mid]);
95		if (cmp == 0)
96			return true;
97		else if (cmp < 0)
98			upper = mid - 1;
99		else
100			lower = mid;
101	}
102
103	return token.string.Compare(kLanguageKeywords[lower]) == 0;
104}
105
106
107// #pragma mark - CLanguageFamilySyntaxHighlightInfo::SyntaxPair
108
109
110struct CLanguageFamilySyntaxHighlightInfo::SyntaxPair {
111	int32 column;
112	syntax_highlight_type type;
113
114	SyntaxPair(int32 column, syntax_highlight_type type)
115		:
116		column(column),
117		type(type)
118	{
119	}
120};
121
122
123// #pragma mark - CLanguageFamilySyntaxHighlightInfo::LineInfo
124
125
126class CLanguageFamilySyntaxHighlightInfo::LineInfo {
127public:
128	LineInfo(int32 line)
129		:
130		fLine(line),
131		fPairs(5, true)
132	{
133	}
134
135	inline int32 CountPairs() const
136	{
137		return fPairs.CountItems();
138	}
139
140	SyntaxPair* PairAt(int32 index) const
141	{
142		return fPairs.ItemAt(index);
143	}
144
145	bool AddPair(int32 column, syntax_highlight_type type)
146	{
147		SyntaxPair* pair = new(std::nothrow) SyntaxPair(column, type);
148		if (pair == NULL)
149			return false;
150
151		ObjectDeleter<SyntaxPair> pairDeleter(pair);
152		if (!fPairs.AddItem(pair))
153			return false;
154
155		pairDeleter.Detach();
156		return true;
157	}
158
159private:
160	typedef BObjectList<SyntaxPair> SyntaxPairList;
161
162private:
163	int32 fLine;
164	SyntaxPairList fPairs;
165};
166
167
168// #pragma mark - CLanguageFamilySyntaxHighlightInfo;
169
170
171CLanguageFamilySyntaxHighlightInfo::CLanguageFamilySyntaxHighlightInfo(
172	LineDataSource* source, Tokenizer* tokenizer,
173	TeamTypeInformation* typeInfo)
174	:
175	SyntaxHighlightInfo(),
176	fHighlightSource(source),
177	fTokenizer(tokenizer),
178	fTypeInfo(typeInfo),
179	fLineInfos(10, true)
180{
181	fHighlightSource->AcquireReference();
182}
183
184
185CLanguageFamilySyntaxHighlightInfo::~CLanguageFamilySyntaxHighlightInfo()
186{
187	fHighlightSource->ReleaseReference();
188	delete fTokenizer;
189}
190
191
192int32
193CLanguageFamilySyntaxHighlightInfo::GetLineHighlightRanges(int32 line,
194	int32* _columns, syntax_highlight_type* _types, int32 maxCount)
195{
196	if (line >= fHighlightSource->CountLines())
197		return 0;
198
199	// lazily parse the source's highlight information the first time
200	// it's actually requested. Subsequently it's cached for quick retrieval.
201	if (fLineInfos.CountItems() == 0) {
202		if (_ParseLines() != B_OK)
203			return 0;
204	}
205
206	LineInfo* info = fLineInfos.ItemAt(line);
207	if (info == NULL)
208		return 0;
209
210	int32 count = 0;
211	for (; count < info->CountPairs(); count++) {
212		if (count == maxCount - 1)
213			break;
214
215		SyntaxPair* pair = info->PairAt(count);
216		if (pair == NULL)
217			break;
218
219		_columns[count] = pair->column;
220		_types[count] = pair->type;
221	}
222
223	return count;
224}
225
226
227status_t
228CLanguageFamilySyntaxHighlightInfo::_ParseLines()
229{
230	syntax_highlight_type type = SYNTAX_HIGHLIGHT_NONE;
231
232	for (int32 i = 0; i < fHighlightSource->CountLines(); i++) {
233		const char* line = fHighlightSource->LineAt(i);
234		fTokenizer->SetTo(line);
235		LineInfo* info = NULL;
236
237		status_t error = _ParseLine(i, type, info);
238		if (error != B_OK)
239			return error;
240
241		ObjectDeleter<LineInfo> infoDeleter(info);
242		if (!fLineInfos.AddItem(info))
243			return B_NO_MEMORY;
244
245		infoDeleter.Detach();
246	}
247
248	return B_OK;
249}
250
251
252status_t
253CLanguageFamilySyntaxHighlightInfo::_ParseLine(int32 line,
254	syntax_highlight_type& _lastType, LineInfo*& _info)
255{
256	bool inCommentBlock = (_lastType == SYNTAX_HIGHLIGHT_COMMENT);
257	bool inPreprocessor = false;
258
259	_info = new(std::nothrow) LineInfo(line);
260	if (_info == NULL)
261		return B_NO_MEMORY;
262	ObjectDeleter<LineInfo> infoDeleter(_info);
263	if (inCommentBlock) {
264		if (!_info->AddPair(0, SYNTAX_HIGHLIGHT_COMMENT))
265			return B_NO_MEMORY;
266	}
267
268	try {
269		for (;;) {
270			const Token& token = fTokenizer->NextToken();
271			if (token.type == TOKEN_END_OF_LINE)
272				break;
273
274			if (inCommentBlock) {
275				if (token.type == TOKEN_END_COMMENT_BLOCK)
276					inCommentBlock = false;
277				continue;
278			} else if (inPreprocessor) {
279				fTokenizer->NextToken();
280				inPreprocessor = false;
281			} else if (token.type == TOKEN_INLINE_COMMENT) {
282				if (!_info->AddPair(token.position, SYNTAX_HIGHLIGHT_COMMENT))
283					return B_NO_MEMORY;
284				break;
285			}
286
287			syntax_highlight_type current = _MapTokenToSyntaxType(token);
288			if (_lastType == current)
289				continue;
290
291			_lastType = current;
292			if (!_info->AddPair(token.position, current))
293				return B_NO_MEMORY;
294
295			if (token.type == TOKEN_BEGIN_COMMENT_BLOCK)
296				inCommentBlock = true;
297			else if (token.type == TOKEN_POUND)
298				inPreprocessor = true;
299		}
300	} catch (...) {
301		// if a parse exception was thrown, simply ignore it.
302		// in such a case, we can't guarantee correct highlight
303		// information anyhow, so simply return whatever we started
304		// with.
305	}
306
307	_lastType = inCommentBlock
308		? SYNTAX_HIGHLIGHT_COMMENT : SYNTAX_HIGHLIGHT_NONE;
309	infoDeleter.Detach();
310	return B_OK;
311}
312
313
314syntax_highlight_type
315CLanguageFamilySyntaxHighlightInfo::_MapTokenToSyntaxType(const Token& token)
316{
317	static TypeLookupConstraints constraints;
318
319	switch (token.type) {
320		case TOKEN_IDENTIFIER:
321			if (IsLanguageKeyword(token))
322				return SYNTAX_HIGHLIGHT_KEYWORD;
323			else if (fTypeInfo->TypeExistsByName(token.string, constraints))
324				return SYNTAX_HIGHLIGHT_TYPE;
325			break;
326
327		case TOKEN_CONSTANT:
328			return SYNTAX_HIGHLIGHT_NUMERIC_LITERAL;
329
330		case TOKEN_END_OF_LINE:
331			break;
332
333		case TOKEN_PLUS:
334		case TOKEN_MINUS:
335		case TOKEN_STAR:
336		case TOKEN_SLASH:
337		case TOKEN_MODULO:
338		case TOKEN_OPENING_PAREN:
339		case TOKEN_CLOSING_PAREN:
340		case TOKEN_OPENING_SQUARE_BRACKET:
341		case TOKEN_CLOSING_SQUARE_BRACKET:
342		case TOKEN_OPENING_CURLY_BRACE:
343		case TOKEN_CLOSING_CURLY_BRACE:
344		case TOKEN_LOGICAL_AND:
345		case TOKEN_LOGICAL_OR:
346		case TOKEN_LOGICAL_NOT:
347		case TOKEN_BITWISE_AND:
348		case TOKEN_BITWISE_OR:
349		case TOKEN_BITWISE_NOT:
350		case TOKEN_BITWISE_XOR:
351		case TOKEN_EQ:
352		case TOKEN_NE:
353		case TOKEN_GT:
354		case TOKEN_GE:
355		case TOKEN_LT:
356		case TOKEN_LE:
357		case TOKEN_MEMBER_PTR:
358		case TOKEN_CONDITION:
359		case TOKEN_COLON:
360		case TOKEN_SEMICOLON:
361		case TOKEN_BACKSLASH:
362			return SYNTAX_HIGHLIGHT_OPERATOR;
363
364		case TOKEN_STRING_LITERAL:
365			return SYNTAX_HIGHLIGHT_STRING_LITERAL;
366
367		case TOKEN_POUND:
368			return SYNTAX_HIGHLIGHT_PREPROCESSOR_KEYWORD;
369
370		case TOKEN_BEGIN_COMMENT_BLOCK:
371		case TOKEN_END_COMMENT_BLOCK:
372		case TOKEN_INLINE_COMMENT:
373			return SYNTAX_HIGHLIGHT_COMMENT;
374	}
375
376	return SYNTAX_HIGHLIGHT_NONE;
377}
378